Spaces:

jeevavijay10
/

code-gen

Runtime error

App Files Files Community

code-gen / app-autogptq.py

jeevavijay10

change codet5p-770m

c09cb0e over 2 years ago

raw

history blame contribute delete

2.25 kB

	import torch
	import gradio as gr
	from transformers import AutoTokenizer, pipeline, logging
	from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig

	model_name_or_path = "TheBloke/WizardCoder-Guanaco-15B-V1.1-GPTQ"
	model_basename = "gptq_model-4bit-128g"

	use_triton = False

	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

	quantize_config = BaseQuantizeConfig(
	bits=4, # quantize model to 4-bit
	group_size=128, # it is recommended to set the value to 128
	desc_act=False, # set to False can significantly speed up inference but the perplexity may slightly bad
	)

	model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
	model_basename=model_basename,
	use_safetensors=True,
	trust_remote_code=False,
	device=device,
	use_triton=use_triton,
	quantize_config=quantize_config,
	cache_dir="models/"
	)

	"""
	To download from a specific branch, use the revision parameter, as in this example:

	model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
	revision="gptq-4bit-32g-actorder_True",
	model_basename=model_basename,
	use_safetensors=True,
	trust_remote_code=False,
	device="cuda:0",
	quantize_config=None)
	"""


	def code_gen(text):
	logging.set_verbosity(logging.CRITICAL)

	print("*** Pipeline:")
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=124,
	temperature=0.7,
	top_p=0.95,
	repetition_penalty=1.15
	)

	response = pipe(text)
	print(response)

	return response[0]['generated_text']


	iface = gr.Interface(fn=code_gen,
	inputs=gr.inputs.Textbox(
	label="Input Source Code"),
	outputs="text",
	title="Code Generation")

	iface.launch()