Spaces:
Build error
Build error
Create utils.py
Browse files- app.py +1 -47
- rhyme_with_ai/utils.py +49 -0
app.py
CHANGED
|
@@ -12,6 +12,7 @@ import tensorflow as tf
|
|
| 12 |
import streamlit as st
|
| 13 |
from gazpacho import Soup, get
|
| 14 |
from transformers import BertTokenizer, TFAutoModelForMaskedLM
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
DEFAULT_QUERY = "Machines will take over the world soon"
|
|
@@ -349,53 +350,6 @@ def mick_rijmwoordenboek(word: str, n_words: int):
|
|
| 349 |
return random.sample(results, min(len(results), n_words))
|
| 350 |
|
| 351 |
|
| 352 |
-
def color_new_words(new: str, old: str, color: str = "#eefa66") -> str:
|
| 353 |
-
"""Color new words in strings with a span."""
|
| 354 |
-
|
| 355 |
-
def find_diff(new_, old_):
|
| 356 |
-
return [ii for ii, (n, o) in enumerate(zip(new_, old_)) if n != o]
|
| 357 |
-
|
| 358 |
-
new_words = new.split()
|
| 359 |
-
old_words = old.split()
|
| 360 |
-
forward = find_diff(new_words, old_words)
|
| 361 |
-
backward = find_diff(new_words[::-1], old_words[::-1])
|
| 362 |
-
|
| 363 |
-
if not forward or not backward:
|
| 364 |
-
# No difference
|
| 365 |
-
return new
|
| 366 |
-
|
| 367 |
-
start, end = forward[0], len(new_words) - backward[0]
|
| 368 |
-
return (
|
| 369 |
-
" ".join(new_words[:start])
|
| 370 |
-
+ " "
|
| 371 |
-
+ f'<span style="background-color: {color}">'
|
| 372 |
-
+ " ".join(new_words[start:end])
|
| 373 |
-
+ "</span>"
|
| 374 |
-
+ " "
|
| 375 |
-
+ " ".join(new_words[end:])
|
| 376 |
-
)
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
def find_last_word(s):
|
| 380 |
-
"""Find the last word in a string."""
|
| 381 |
-
# Note: will break on \n, \r, etc.
|
| 382 |
-
alpha_only_sentence = "".join([c for c in s if (c.isalpha() or (c == " "))]).strip()
|
| 383 |
-
return alpha_only_sentence.split()[-1]
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
def pairwise(iterable):
|
| 387 |
-
"""s -> (s0,s1), (s1,s2), (s2, s3), ..."""
|
| 388 |
-
# https://stackoverflow.com/questions/5434891/iterate-a-list-as-pair-current-next-in-python
|
| 389 |
-
a, b = itertools.tee(iterable)
|
| 390 |
-
next(b, None)
|
| 391 |
-
return zip(a, b)
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
def sanitize(s):
|
| 395 |
-
"""Remove punctuation from a string."""
|
| 396 |
-
return s.translate(str.maketrans("", "", string.punctuation))
|
| 397 |
-
|
| 398 |
-
|
| 399 |
if __name__ == "__main__":
|
| 400 |
logging.basicConfig(level=logging.INFO)
|
| 401 |
main()
|
|
|
|
| 12 |
import streamlit as st
|
| 13 |
from gazpacho import Soup, get
|
| 14 |
from transformers import BertTokenizer, TFAutoModelForMaskedLM
|
| 15 |
+
from rhyme_with_ai.utils import color_new_words, pairwise, find_last_word, sanitize
|
| 16 |
|
| 17 |
|
| 18 |
DEFAULT_QUERY = "Machines will take over the world soon"
|
|
|
|
| 350 |
return random.sample(results, min(len(results), n_words))
|
| 351 |
|
| 352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
if __name__ == "__main__":
|
| 354 |
logging.basicConfig(level=logging.INFO)
|
| 355 |
main()
|
rhyme_with_ai/utils.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import itertools
|
| 2 |
+
import string
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def color_new_words(new: str, old: str, color: str = "#eefa66") -> str:
|
| 6 |
+
"""Color new words in strings with a span."""
|
| 7 |
+
|
| 8 |
+
def find_diff(new_, old_):
|
| 9 |
+
return [ii for ii, (n, o) in enumerate(zip(new_, old_)) if n != o]
|
| 10 |
+
|
| 11 |
+
new_words = new.split()
|
| 12 |
+
old_words = old.split()
|
| 13 |
+
forward = find_diff(new_words, old_words)
|
| 14 |
+
backward = find_diff(new_words[::-1], old_words[::-1])
|
| 15 |
+
|
| 16 |
+
if not forward or not backward:
|
| 17 |
+
# No difference
|
| 18 |
+
return new
|
| 19 |
+
|
| 20 |
+
start, end = forward[0], len(new_words) - backward[0]
|
| 21 |
+
return (
|
| 22 |
+
" ".join(new_words[:start])
|
| 23 |
+
+ " "
|
| 24 |
+
+ f'<span style="background-color: {color}">'
|
| 25 |
+
+ " ".join(new_words[start:end])
|
| 26 |
+
+ "</span>"
|
| 27 |
+
+ " "
|
| 28 |
+
+ " ".join(new_words[end:])
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def find_last_word(s):
|
| 33 |
+
"""Find the last word in a string."""
|
| 34 |
+
# Note: will break on \n, \r, etc.
|
| 35 |
+
alpha_only_sentence = "".join([c for c in s if (c.isalpha() or (c == " "))]).strip()
|
| 36 |
+
return alpha_only_sentence.split()[-1]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def pairwise(iterable):
|
| 40 |
+
"""s -> (s0,s1), (s1,s2), (s2, s3), ..."""
|
| 41 |
+
# https://stackoverflow.com/questions/5434891/iterate-a-list-as-pair-current-next-in-python
|
| 42 |
+
a, b = itertools.tee(iterable)
|
| 43 |
+
next(b, None)
|
| 44 |
+
return zip(a, b)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def sanitize(s):
|
| 48 |
+
"""Remove punctuation from a string."""
|
| 49 |
+
return s.translate(str.maketrans("", "", string.punctuation))
|