Spaces:

panduwana
/

interview-ai-detector

Sleeping

bearking58 commited on May 3, 2024

Commit

8f0525e

1 Parent(s): 236bdc5

fix: prepare next build for nltk fix

Files changed (2) hide show

core-model-prediction/Dockerfile CHANGED Viewed

@@ -10,6 +10,8 @@ COPY . /app
 # Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 # Make port 8080 available to the world outside this container
 EXPOSE 8080

 # Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
+RUN python -m nltk.downloader punkt wordnet averaged_perceptron_tagger
 # Make port 8080 available to the world outside this container
 EXPOSE 8080

core-model-prediction/hypothesis.py CHANGED Viewed

@@ -14,7 +14,6 @@ import zipfile
 class BaseModelHypothesis:
     def __init__(self):
-        self.download_and_extract_nltk_data()
         self.analyzer = SentimentIntensityAnalyzer()
         self.lexicon_df = pd.read_csv(
             "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
@@ -49,18 +48,6 @@ class BaseModelHypothesis:
         self.scaler_not_normalized = joblib.load(
             "scalers/scaler-not-normalized.joblib")
-    def download_and_extract_nltk_data(self):
-        nltk.download('punkt')
-        nltk.download('wordnet')
-        nltk.download('averaged_perceptron_tagger')
-        wordnet_dir = nltk.data.find("corpora/wordnet").path
-        if not os.path.exists(wordnet_dir):
-            zip_path = os.path.join(
-                os.path.dirname(wordnet_dir), "wordnet.zip")
-            with zipfile.ZipFile(zip_path, "r") as zip_ref:
-                zip_ref.extractall(os.path.dirname(wordnet_dir))
     def process_emotion_lexicon(self):
         emotion_lexicon = {}
         for _, row in self.lexicon_df.iterrows():

 class BaseModelHypothesis:
     def __init__(self):
         self.analyzer = SentimentIntensityAnalyzer()
         self.lexicon_df = pd.read_csv(
             "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
         self.scaler_not_normalized = joblib.load(
             "scalers/scaler-not-normalized.joblib")
     def process_emotion_lexicon(self):
         emotion_lexicon = {}
         for _, row in self.lexicon_df.iterrows():