Spaces:

MARI-posa
/

FindMyBook

Runtime error

MARI-posa commited on Jun 16, 2023

Commit

19d9a62

1 Parent(s): 3ce4e56

Update stri.py

Files changed (1) hide show

stri.py CHANGED Viewed

@@ -18,28 +18,28 @@ model = AutoModel.from_pretrained(model_name, output_hidden_states=True)
 # Загрузка датасета и аннотаций к книгам
 books = pd.read_csv('all+++.csv')
-books.dropna(inplace=True)
-books = books[books['annotation'].apply(lambda x: len(x.split()) >= 40)]
-books.drop_duplicates(subset='title', keep='first', inplace=True)
-books = books.reset_index(drop=True)
-def data_preprocessing(text: str) -> str:
-    text = re.sub(r'http\S+', " ", text)  # удаляем ссылки
-    text = re.sub(r'@\w+', ' ', text)  # удаляем упоминания пользователей
-    text = re.sub(r'#\w+', ' ', text)  # удаляем хэштеги
-    text = re.sub(r'<.*?>', ' ', text)  # html tags
-    return text
-for i in ['author', 'title', 'annotation']:
-    books[i] = books[i].apply(data_preprocessing)
 annot = books['annotation']
 # Получение эмбеддингов аннотаций каждой книги в датасете
-length = 512
 # Определение запроса пользователя
 query = st.text_input("Введите запрос")

 # Загрузка датасета и аннотаций к книгам
 books = pd.read_csv('all+++.csv')
+#books.dropna(inplace=True)
+#books = books[books['annotation'].apply(lambda x: len(x.split()) >= 40)]
+#books.drop_duplicates(subset='title', keep='first', inplace=True)
+#books = books.reset_index(drop=True)
+#def data_preprocessing(text: str) -> str:
+    #text = re.sub(r'http\S+', " ", text)  # удаляем ссылки
+    #text = re.sub(r'@\w+', ' ', text)  # удаляем упоминания пользователей
+    #text = re.sub(r'#\w+', ' ', text)  # удаляем хэштеги
+    #text = re.sub(r'<.*?>', ' ', text)  # html tags
+   # return text
+#for i in ['author', 'title', 'annotation']:
+    #books[i] = books[i].apply(data_preprocessing)
 annot = books['annotation']
 # Получение эмбеддингов аннотаций каждой книги в датасете
+length = 256
 # Определение запроса пользователя
 query = st.text_input("Введите запрос")