Spaces:
Sleeping
Sleeping
Commit
·
8f0525e
1
Parent(s):
236bdc5
fix: prepare next build for nltk fix
Browse files
core-model-prediction/Dockerfile
CHANGED
|
@@ -10,6 +10,8 @@ COPY . /app
|
|
| 10 |
# Install any needed packages specified in requirements.txt
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
|
|
|
|
|
|
| 13 |
# Make port 8080 available to the world outside this container
|
| 14 |
EXPOSE 8080
|
| 15 |
|
|
|
|
| 10 |
# Install any needed packages specified in requirements.txt
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
| 13 |
+
RUN python -m nltk.downloader punkt wordnet averaged_perceptron_tagger
|
| 14 |
+
|
| 15 |
# Make port 8080 available to the world outside this container
|
| 16 |
EXPOSE 8080
|
| 17 |
|
core-model-prediction/hypothesis.py
CHANGED
|
@@ -14,7 +14,6 @@ import zipfile
|
|
| 14 |
|
| 15 |
class BaseModelHypothesis:
|
| 16 |
def __init__(self):
|
| 17 |
-
self.download_and_extract_nltk_data()
|
| 18 |
self.analyzer = SentimentIntensityAnalyzer()
|
| 19 |
self.lexicon_df = pd.read_csv(
|
| 20 |
"https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
|
|
@@ -49,18 +48,6 @@ class BaseModelHypothesis:
|
|
| 49 |
self.scaler_not_normalized = joblib.load(
|
| 50 |
"scalers/scaler-not-normalized.joblib")
|
| 51 |
|
| 52 |
-
def download_and_extract_nltk_data(self):
|
| 53 |
-
nltk.download('punkt')
|
| 54 |
-
nltk.download('wordnet')
|
| 55 |
-
nltk.download('averaged_perceptron_tagger')
|
| 56 |
-
|
| 57 |
-
wordnet_dir = nltk.data.find("corpora/wordnet").path
|
| 58 |
-
if not os.path.exists(wordnet_dir):
|
| 59 |
-
zip_path = os.path.join(
|
| 60 |
-
os.path.dirname(wordnet_dir), "wordnet.zip")
|
| 61 |
-
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
| 62 |
-
zip_ref.extractall(os.path.dirname(wordnet_dir))
|
| 63 |
-
|
| 64 |
def process_emotion_lexicon(self):
|
| 65 |
emotion_lexicon = {}
|
| 66 |
for _, row in self.lexicon_df.iterrows():
|
|
|
|
| 14 |
|
| 15 |
class BaseModelHypothesis:
|
| 16 |
def __init__(self):
|
|
|
|
| 17 |
self.analyzer = SentimentIntensityAnalyzer()
|
| 18 |
self.lexicon_df = pd.read_csv(
|
| 19 |
"https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
|
|
|
|
| 48 |
self.scaler_not_normalized = joblib.load(
|
| 49 |
"scalers/scaler-not-normalized.joblib")
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def process_emotion_lexicon(self):
|
| 52 |
emotion_lexicon = {}
|
| 53 |
for _, row in self.lexicon_df.iterrows():
|