AliHaider0343 commited on
Commit
cc58df9
·
1 Parent(s): b3257f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -14
app.py CHANGED
@@ -1,14 +1,9 @@
1
  import torch
2
  import streamlit as st
3
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
4
- import nltk
5
- from nltk.corpus import stopwords
6
  import re
7
  import string
8
-
9
- nltk.download('stopwords')
10
- nltk.download('punkt')
11
- stop_words = set(stopwords.words('english'))
12
 
13
 
14
  def tokenize_sentences(sentence):
@@ -23,18 +18,11 @@ def tokenize_sentences(sentence):
23
  )
24
  return torch.cat([encoded_dict['input_ids']], dim=0), torch.cat([encoded_dict['attention_mask']], dim=0)
25
 
26
- def remove_stop_words(sentence):
27
- words = nltk.word_tokenize(sentence)
28
- custom_words = ['recommend', 'having', 'Hello', 'best', 'restaurant', 'top', 'want', 'need', 'well', 'most', 'should', 'be', 'good', 'also']
29
- stop_words.update(custom_words)
30
- words_without_stopwords = [word for word in words if word.lower() not in stop_words]
31
- sentence_without_stopwords = ' '.join(words_without_stopwords)
32
- return sentence_without_stopwords
33
 
34
  def preprocess_query(query):
35
  query = str(query).lower()
36
  query = query.strip()
37
- query = remove_stop_words(query)
38
  query=query.translate(str.maketrans("", "", string.punctuation))
39
  return query
40
 
 
1
  import torch
2
  import streamlit as st
3
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
 
 
4
  import re
5
  import string
6
+
 
 
 
7
 
8
 
9
  def tokenize_sentences(sentence):
 
18
  )
19
  return torch.cat([encoded_dict['input_ids']], dim=0), torch.cat([encoded_dict['attention_mask']], dim=0)
20
 
21
+
 
 
 
 
 
 
22
 
23
  def preprocess_query(query):
24
  query = str(query).lower()
25
  query = query.strip()
 
26
  query=query.translate(str.maketrans("", "", string.punctuation))
27
  return query
28