Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Paused

App Files Files Community

MultiModal_OpenSource_AI / app.py

VanguardAI

Update app.py

4a19484 verified over 1 year ago

raw

history blame

4.34 kB

	import torch
	import spaces
	import os
	HF_TOKEN = os.environ["HF_TOKEN"]
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	from peft import LoraConfig, PeftModel, get_peft_model
	import gradio as gr

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters")

	# Configuration for 4-bit quantization
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	# Load base model with quantization (replace 'your-username' if needed)
	base_model = AutoModelForCausalLM.from_pretrained(
	"meta-llama/Meta-Llama-3-8B-Instruct", # Replace with actual base model
	quantization_config=bnb_config,
	use_auth_token=HF_TOKEN,
	)

	# Apply LoRA adapters
	peft_config = LoraConfig(
	r=16,
	lora_alpha=16,
	target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
	lora_dropout=0,
	bias="none",
	task_type="CAUSAL_LM"
	)

	model = PeftModel.from_pretrained(base_model, "VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters", config=peft_config)

	condition = '''
	ALWAYS provide output in a JSON format.
	'''
	alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	{}

	### Input:
	{}

	### Response:
	{}"""


	@spaces.GPU(duration=300)
	def chunk_it(inventory_list, user_input_text):
	model.to('cuda')
	inputs = tokenizer(
	[
	alpaca_prompt.format(
	'''
	You will receive text input that you need to analyze to perform the following tasks:

	transaction: Record the details of an item transaction.
	last n days transactions: Retrieve transaction records for a specified time period.
	view risk inventory: View inventory items based on a risk category.
	view inventory: View inventory details.
	new items: Add new items to the inventory.
	report generation: Generate various inventory reports.
	delete item: Delete an existing Item.

	Required Parameters:
	Each task requires specific parameters to execute correctly:

	transaction:
	ItemName (string)
	ItemQt (quantity - integer)
	Type (string: "sale" or "purchase" or "return")
	ReorderPoint (integer)
	last n days transactions:
	ItemName (string)
	Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
	view risk inventory:
	RiskType (string: "overstock", "understock", or "Null" for all risk types)
	view inventory:
	ItemName (string)
	new items:
	ItemName (string)
	SellingPrice (number)
	CostPrice (number)
	report generation:
	ItemName (string)
	Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
	ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)

	The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
	''' + inventory_list +
	'''
	ALWAYS provide output in a JSON format.
	''', # instruction
	user_input_text, # input
	"", # output - leave this blank for generation!
	)
	], return_tensors="pt").to("cuda")

	# Generation with a longer max_length and better sampling
	outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)

	content = tokenizer.batch_decode(outputs, skip_special_tokens=True)
	return content[0]

	# Interface for inputs
	iface = gr.Interface(
	fn=chunk_it,
	inputs=[
	gr.Textbox(label="user_input_text", lines=3),
	gr.Textbox(label="inventory_list", lines=5)
	],
	outputs="text",
	title="Formatter Pro",
	)

	iface.launch(inline=False)