kernels-community
/

mamba-ssm

Fix decoder output class

by rom7 - opened 18 days ago

←

Files changed (1) hide show

torch-ext/mamba_ssm/utils/generation.py CHANGED Viewed

@@ -11,7 +11,7 @@ import torch.nn.functional as F
 from einops import rearrange, repeat
 from torch import Tensor
 from torch.profiler import ProfilerActivity, profile, record_function
-from transformers.generation import GreedySearchDecoderOnlyOutput, SampleDecoderOnlyOutput, TextStreamer
 @dataclass
@@ -146,7 +146,7 @@ def decode(
         max_length: int
         teacher_outputs (optional): (batch, seq_len). If provided, instead of sampling from the
             logits, the next token is taken from the teacher_outputs. Useful for testing.
-    Returns: GreedySearchDecoderOnlyOutput or SampleDecoderOnlyOutput, with the following fields:
         sequences: (batch, max_length)
         scores: tuples of (batch, vocab_size)
     """
@@ -240,7 +240,7 @@ def decode(
         end.record()
         torch.cuda.synchronize()
         print(f"Prompt processing + decoding time: {(start.elapsed_time(end)):.0f}ms")
-    output_cls = GreedySearchDecoderOnlyOutput if top_k == 1 else SampleDecoderOnlyOutput
     return output_cls(sequences=torch.cat(sequences, dim=1), scores=tuple(scores))

 from einops import rearrange, repeat
 from torch import Tensor
 from torch.profiler import ProfilerActivity, profile, record_function
+from transformers.generation import GenerateDecoderOnlyOutput, TextStreamer
 @dataclass
         max_length: int
         teacher_outputs (optional): (batch, seq_len). If provided, instead of sampling from the
             logits, the next token is taken from the teacher_outputs. Useful for testing.
+    Returns: GenerateDecoderOnlyOutput, with the following fields:
         sequences: (batch, max_length)
         scores: tuples of (batch, vocab_size)
     """
         end.record()
         torch.cuda.synchronize()
         print(f"Prompt processing + decoding time: {(start.elapsed_time(end)):.0f}ms")
+    output_cls = GenerateDecoderOnlyOutput
     return output_cls(sequences=torch.cat(sequences, dim=1), scores=tuple(scores))