Spaces:

pleonova
/

multi-label-summary-text

Running

App Files Files Community

pleonova commited on Sep 30, 2023

Commit

559f5c9

unverified ·

1 Parent(s): 1d7e9d0

Remove extra spaces

Browse files

Files changed (1) hide show

models.py +0 -7

models.py CHANGED Viewed

@@ -15,14 +15,12 @@ def create_nest_sentences(document:str, token_max_length = 1024):
   for sentence in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', document.replace("\n", ' ')):
     tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] # hugging face transformer tokenizer
     length += len(tokens_in_sentence)
     if length < token_max_length:
       sent.append(sentence)
     else:
       nested.append(sent)
       sent = [sentence]
       length = 0
   if sent:
     nested.append(sent)
   return nested
@@ -42,8 +40,6 @@ def keyword_gen(kw_model, sequence:str):
     top_n=10)
   return keywords
 # Reference: https://huggingface.co/facebook/bart-large-mnli
 @st.cache_resource
 def load_summary_model():
@@ -69,7 +65,6 @@ def summarizer_gen(summarizer, sequence:str, maximum_tokens:int, minimum_tokens:
     no_repeat_ngram_size=3)
 	return output[0].get('summary_text')
 # # Reference: https://www.datatrigger.org/post/nlp_hugging_face/
 # # Custom summarization pipeline (to handle long articles)
 # def summarize(text, minimum_length_of_summary = 100):
@@ -80,7 +75,6 @@ def summarizer_gen(summarizer, sequence:str, maximum_tokens:int, minimum_tokens:
 #     # Untokenize
 #     return([tokenizer_bart.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0])
 # Reference: https://huggingface.co/spaces/team-zero-shot-nli/zero-shot-nli/blob/main/utils.py
 @st.cache_resource
 def load_model():
@@ -93,4 +87,3 @@ def load_model():
 def classifier_zero(classifier, sequence:str, labels:list, multi_class:bool):
     outputs = classifier(sequence, labels, multi_label=multi_class)
     return outputs['labels'], outputs['scores']

   for sentence in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', document.replace("\n", ' ')):
     tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] # hugging face transformer tokenizer
     length += len(tokens_in_sentence)
     if length < token_max_length:
       sent.append(sentence)
     else:
       nested.append(sent)
       sent = [sentence]
       length = 0
   if sent:
     nested.append(sent)
   return nested
     top_n=10)
   return keywords
 # Reference: https://huggingface.co/facebook/bart-large-mnli
 @st.cache_resource
 def load_summary_model():
     no_repeat_ngram_size=3)
 	return output[0].get('summary_text')
 # # Reference: https://www.datatrigger.org/post/nlp_hugging_face/
 # # Custom summarization pipeline (to handle long articles)
 # def summarize(text, minimum_length_of_summary = 100):
 #     # Untokenize
 #     return([tokenizer_bart.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0])
 # Reference: https://huggingface.co/spaces/team-zero-shot-nli/zero-shot-nli/blob/main/utils.py
 @st.cache_resource
 def load_model():
 def classifier_zero(classifier, sequence:str, labels:list, multi_class:bool):
     outputs = classifier(sequence, labels, multi_label=multi_class)
     return outputs['labels'], outputs['scores']