Spaces:

Norod78
/

Fakypedia

Sleeping

App Files Files Community

Norod78 commited on Aug 1, 2024

Commit

72a11a5

verified ·

1 Parent(s): 71821ba

Fakypedia

Browse files

Files changed (2) hide show

app.py +59 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import spaces
+title = "Fakypedia"
+DESCRIPTION = """\
+# Genarate a silly article
+A bilingual (English and Hebrew) [nonsense generation model](https://huggingface.co/Norod78/SmolLM-135M-FakyPedia-EngHeb) which produces silly Wikipedia-like abstract text.
+Tap on the \"Submit\" button to generate a silly and/or fake \"Wikipedia-Like\" article based on the input title
+"""
+article = "<p>This model extended the tokenizer of and is a fine-tuned of [SmolLM-135M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM-135M-Instruct)</p>"
+CUDA_AVAILABLE = torch.cuda.is_available()
+device = torch.device("cuda" if CUDA_AVAILABLE else "cpu")
+model_id = "Norod78/SmolLM-135M-FakyPedia-EngHeb"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+tokenizer.pad_token_id = tokenizer.eos_token_id
+bos_token = tokenizer.bos_token
+model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
+model.generation_config.pad_token_id = tokenizer.pad_token_id
+torch.manual_seed(1234)
+@spaces.GPU
+def generate_fakypedia(article_title: str):
+    with torch.no_grad():
+        result = ""
+        string_to_tokenize= f"{bos_token}\\%{article_title}"
+        input_ids = tokenizer( string_to_tokenize, return_tensors="pt").input_ids.to(device)
+        sample_outputs = model.generate(input_ids, do_sample=True,repetition_penalty=1.2, temperature=0.5, max_length=96, num_return_sequences=3)
+        if article_title == None or len(article_title) == 0:
+            result += f"# Fakypedia results with random titles  \n"
+            article_title = ""
+        else:
+            result += f"# Fakypedia results for \"{article_title}\"  \n"
+        for i, sample_output in enumerate(sample_outputs):
+            decoded_output = tokenizer.decode(sample_output, skip_special_tokens=True)
+            decoded_output = decoded_output.replace(f"\%{article_title}", f"## {article_title}").replace("\%", " ").replace("\\n", "  \n")
+            decoded_output = decoded_output.replace("##   \n", "\n")
+            result += "{}\n".format(decoded_output)
+        return result
+demo = gr.Interface(
+    generate_fakypedia,
+    inputs=gr.Textbox(lines=1, label="Enter a title for the article (or leave blank for a random one)"),
+    outputs=gr.Markdown(label="Generated fakypedia article"),
+    title=title,
+    description=DESCRIPTION,
+    article=article,
+    examples=["Hugging face", "A socially awkward potato", "דורון אדלר", ""],
+    allow_flagging="never",
+)
+demo.queue()
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+accelerate
+torch
+transformers
+tokenizers
+spaces
+numpy