Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import re | |
| import tensorflow as tf | |
| from tensorflow.keras.layers import Embedding, LSTM, Dense,Bidirectional | |
| from tensorflow.keras.models import Model | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import numpy as np | |
| import string | |
| from string import digits | |
| from sklearn.utils import shuffle | |
| from sklearn.model_selection import train_test_split | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from tqdm import tqdm | |
| class Dataset: | |
| def __init__(self, data, tknizer_ass, tknizer_eng, max_len): | |
| self.encoder_inps = data['ass'].values | |
| self.decoder_inps = data['eng_inp'].values | |
| self.decoder_outs = data['eng_out'].values | |
| self.tknizer_eng = tknizer_eng | |
| self.tknizer_ass = tknizer_ass | |
| self.max_len = max_len | |
| def __getitem__(self, i): | |
| self.encoder_seq = self.tknizer_ass.texts_to_sequences([self.encoder_inps[i]]) # need to pass list of values | |
| self.decoder_inp_seq = self.tknizer_eng.texts_to_sequences([self.decoder_inps[i]]) | |
| self.decoder_out_seq = self.tknizer_eng.texts_to_sequences([self.decoder_outs[i]]) | |
| self.encoder_seq = pad_sequences(self.encoder_seq, maxlen=self.max_len, dtype='int32', padding='post') | |
| self.decoder_inp_seq = pad_sequences(self.decoder_inp_seq, maxlen=self.max_len, dtype='int32', padding='post') | |
| self.decoder_out_seq = pad_sequences(self.decoder_out_seq, maxlen=self.max_len, dtype='int32', padding='post') | |
| return self.encoder_seq, self.decoder_inp_seq, self.decoder_out_seq | |
| def __len__(self): # your model.fit_gen requires this function | |
| return len(self.encoder_inps) | |
| class Dataloder(tf.keras.utils.Sequence): | |
| def __init__(self, dataset, batch_size=1): | |
| self.dataset = dataset | |
| self.batch_size = batch_size | |
| self.indexes = np.arange(len(self.dataset.encoder_inps)) | |
| def __getitem__(self, i): | |
| start = i * self.batch_size | |
| stop = (i + 1) * self.batch_size | |
| data = [] | |
| for j in range(start, stop): | |
| data.append(self.dataset[j]) | |
| batch = [np.squeeze(np.stack(samples, axis=1), axis=0) for samples in zip(*data)] | |
| # we are creating data like ([italian, english_inp], english_out) these are already converted into seq | |
| return tuple([[batch[0],batch[1]],batch[2]]) | |
| def __len__(self): # your model.fit_gen requires this function | |
| return len(self.indexes) // self.batch_size | |
| def on_epoch_end(self): | |
| self.indexes = np.random.permutation(self.indexes) |