superb
/

superb-submission

Model card Files Files and versions

xet

Community

lewtun HF Staff commited on Sep 26, 2021

Commit

17c79b3

1 Parent(s): 84cea97

Use example expert from s3prl

Browse files

Files changed (1) hide show

{{cookiecutter.repo_name}}/expert.py +64 -43

{{cookiecutter.repo_name}}/expert.py CHANGED Viewed

@@ -1,56 +1,77 @@
-from packaging import version
-import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from torch.nn.utils.rnn import pad_sequence
-import fairseq
-from s3prl.upstream.interfaces import UpstreamBase
-SAMPLE_RATE = 16000
-EXAMPLE_SEC = 5
-class UpstreamExpert(UpstreamBase):
-    def __init__(self, ckpt, **kwargs):
-        super().__init__(**kwargs)
-        assert version.parse(fairseq.__version__) > version.parse(
-            "0.10.2"
-        ), "Please install the fairseq master branch."
-        model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
-            [ckpt]
         )
-        self.model = model[0]
-        self.task = task
-        if len(self.hooks) == 0:
-            module_name = "self.model.encoder.layers"
-            for module_id in range(len(eval(module_name))):
-                self.add_hook(
-                    f"{module_name}[{module_id}]",
-                    lambda input, output: input[0].transpose(0, 1),
-                )
-            self.add_hook("self.model.encoder", lambda input, output: output[0])
-    def forward(self, wavs):
-        if self.task.cfg.normalize:
-            wavs = [F.layer_norm(wav, wav.shape) for wav in wavs]
-        device = wavs[0].device
-        wav_lengths = torch.LongTensor([len(wav) for wav in wavs]).to(device)
-        wav_padding_mask = ~torch.lt(
-            torch.arange(max(wav_lengths)).unsqueeze(0).to(device),
-            wav_lengths.unsqueeze(1),
         )
-        padded_wav = pad_sequence(wavs, batch_first=True)
-        features, feat_padding_mask = self.model.extract_features(
-            padded_wav,
-            padding_mask=wav_padding_mask,
-            mask=None,
-        )
         return {
-            "default": features,
         }

+from collections import OrderedDict
+from typing import List, Union, Dict
 import torch.nn as nn
+from torch import Tensor
 from torch.nn.utils.rnn import pad_sequence
+HIDDEN_DIM = 8
+class UpstreamExpert(nn.Module):
+    def __init__(self, ckpt: str = None, model_config: str = None, **kwargs):
+        """
+        Args:
+            ckpt:
+                The checkpoint path for loading your pretrained weights.
+                Can be assigned by the -k option in run_downstream.py
+            model_config:
+                The config path for constructing your model.
+                Might not needed if you also save that in your checkpoint file.
+                Can be assigned by the -g option in run_downstream.py
+        """
+        super().__init__()
+        self.name = "[Example UpstreamExpert]"
+        print(
+            f"{self.name} - You can use model_config to construct your customized model: {model_config}"
         )
+        print(f"{self.name} - You can use ckpt to load your pretrained weights: {ckpt}")
+        print(
+            f"{self.name} - If you store the pretrained weights and model config in a single file, "
+            "you can just choose one argument (ckpt or model_config) to pass. It's up to you!"
         )
+        # The model needs to be a nn.Module for finetuning, not required for representation extraction
+        self.model1 = nn.Linear(1, HIDDEN_DIM)
+        self.model2 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
+    def get_downsample_rates(self, key: str) -> int:
+        """
+        Since we do not do any downsampling in this example upstream
+        All keys' corresponding representations have downsample rate of 1
+        """
+        return 1
+    def forward(self, wavs: List[Tensor]) -> Dict[str, Union[Tensor, List[Tensor]]]:
+        """
+        When the returning Dict contains the List with more than one Tensor,
+        those Tensors should be in the same shape to train a weighted-sum on them.
+        """
+        wavs = pad_sequence(wavs, batch_first=True).unsqueeze(-1)
+        # wavs: (batch_size, max_len, 1)
+        hidden = self.model1(wavs)
+        # hidden: (batch_size, max_len, hidden_dim)
+        feature = self.model2(hidden)
+        # feature: (batch_size, max_len, hidden_dim)
+        # The "hidden_states" key will be used as default in many cases
+        # Others keys in this example are presented for SUPERB Challenge
         return {
+            "hidden_states": [hidden, feature],
+            "PR": [hidden, feature],
+            "ASR": [hidden, feature],
+            "QbE": [hidden, feature],
+            "SID": [hidden, feature],
+            "ASV": [hidden, feature],
+            "SD": [hidden, feature],
+            "ER": [hidden, feature],
+            "SF": [hidden, feature],
+            "SE": [hidden, feature],
+            "SS": [hidden, feature],
+            "secret": [hidden, feature],
         }