Spaces:

hgrif
/

rhyme-with-ai

Build error

App Files Files Community

hgrif commited on Dec 21, 2021

Commit

5ddb621

1 Parent(s): b57caec

Add rhyme.py

Browse files

Files changed (2) hide show

app.py +3 -82
rhyme_with_ai/rhyme.py +69 -0

app.py CHANGED Viewed

@@ -1,18 +1,13 @@
 import copy
-import functools
-import itertools
 import logging
-import random
-import string
-from typing import List, Optional
-import requests
 import numpy as np
 import tensorflow as tf
 import streamlit as st
-from gazpacho import Soup, get
 from transformers import BertTokenizer, TFAutoModelForMaskedLM
-from rhyme_with_ai.utils import color_new_words, pairwise, find_last_word, sanitize
 DEFAULT_QUERY = "Machines will take over the world soon"
@@ -102,21 +97,6 @@ def display_output(status_text, query, current_sentences, previous_sentences):
         query + ",<br>" + "".join(print_sentences), unsafe_allow_html=True
     )
-class TokenWeighter:
-    def __init__(self, tokenizer):
-        self.tokenizer_ = tokenizer
-        self.proba = self.get_token_proba()
-    def get_token_proba(self):
-        valid_token_mask = self._filter_short_partial(self.tokenizer_.vocab)
-        return valid_token_mask
-    def _filter_short_partial(self, vocab):
-        valid_token_ids = [v for k, v in vocab.items() if len(k) > 1 and "#" not in k]
-        is_valid = np.zeros(len(vocab.keys()))
-        is_valid[valid_token_ids] = 1
-        return is_valid
 class RhymeGenerator:
     def __init__(
@@ -291,65 +271,6 @@ class RhymeGenerator:
-def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> List[str]:
-    """Returns a list of rhyme words for a sentence.
-    Parameters
-    ----------
-    sentence : Sentence that may end with punctuation
-    n_rhymes : Maximum number of rhymes to return
-    Returns
-    -------
-        List[str] -- List of words that rhyme with the final word
-    """
-    last_word = find_last_word(sentence)
-    if language == "english":
-       return query_datamuse_api(last_word, n_rhymes)
-    elif language == "dutch":
-        return mick_rijmwoordenboek(last_word, n_rhymes)
-    else:
-        raise NotImplementedError(f"Unsupported language ({language}) expected 'english' or 'dutch'.")
-def query_datamuse_api(word: str, n_rhymes: Optional[int] = None) -> List[str]:
-    """Query the DataMuse API.
-    Parameters
-    ----------
-    word : Word to rhyme with
-    n_rhymes : Max rhymes to return
-    Returns
-    -------
-    Rhyme words
-    """
-    out = requests.get(
-        "https://api.datamuse.com/words", params={"rel_rhy": word}
-    ).json()
-    words = [_["word"] for _ in out]
-    if n_rhymes is None:
-        return words
-    return words[:n_rhymes]
-@functools.lru_cache(maxsize=128, typed=False)
-def mick_rijmwoordenboek(word: str, n_words: int):
-    url = f"https://rijmwoordenboek.nl/rijm/{word}"
-    html = get(url)
-    soup = Soup(html)
-    results = soup.find("div", {"id": "rhymeResultsWords"}).html.split("<br>")
-    # clean up
-    results = [r.replace("\n", "").replace(" ", "") for r in results]
-    # filter html and empty strings
-    results = [r for r in results if ("<" not in r) and (len(r) > 0)]
-    return random.sample(results, min(len(results), n_words))
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     main()

 import copy
 import logging
+from typing import List
 import numpy as np
 import tensorflow as tf
 import streamlit as st
 from transformers import BertTokenizer, TFAutoModelForMaskedLM
+from rhyme_with_ai.utils import color_new_words, pairwise, sanitize
+from rhyme_with_ai.token_weighter import TokenWeighter
 DEFAULT_QUERY = "Machines will take over the world soon"
         query + ",<br>" + "".join(print_sentences), unsafe_allow_html=True
     )
 class RhymeGenerator:
     def __init__(
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     main()

rhyme_with_ai/rhyme.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import functools
+import random
+from typing import List, Optional
+import requests
+from gazpacho import Soup, get
+from rhyme_with_ai.utils import find_last_word
+def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> List[str]:
+    """Returns a list of rhyme words for a sentence.
+    Parameters
+    ----------
+    sentence : Sentence that may end with punctuation
+    n_rhymes : Maximum number of rhymes to return
+    Returns
+    -------
+        List[str] -- List of words that rhyme with the final word
+    """
+    last_word = find_last_word(sentence)
+    if language == "english":
+       return query_datamuse_api(last_word, n_rhymes)
+    elif language == "dutch":
+        return mick_rijmwoordenboek(last_word, n_rhymes)
+    else:
+        raise NotImplementedError(f"Unsupported language ({language}) expected 'english' or 'dutch'.")
+def query_datamuse_api(word: str, n_rhymes: Optional[int] = None) -> List[str]:
+    """Query the DataMuse API.
+    Parameters
+    ----------
+    word : Word to rhyme with
+    n_rhymes : Max rhymes to return
+    Returns
+    -------
+    Rhyme words
+    """
+    out = requests.get(
+        "https://api.datamuse.com/words", params={"rel_rhy": word}
+    ).json()
+    words = [_["word"] for _ in out]
+    if n_rhymes is None:
+        return words
+    return words[:n_rhymes]
+@functools.lru_cache(maxsize=128, typed=False)
+def mick_rijmwoordenboek(word: str, n_words: int):
+    url = f"https://rijmwoordenboek.nl/rijm/{word}"
+    html = get(url)
+    soup = Soup(html)
+    results = soup.find("div", {"id": "rhymeResultsWords"}).html.split("<br>")
+    # clean up
+    results = [r.replace("\n", "").replace(" ", "") for r in results]
+    # filter html and empty strings
+    results = [r for r in results if ("<" not in r) and (len(r) > 0)]
+    return random.sample(results, min(len(results), n_words))