# h2oGPT API call example

Documentation: https://github.com/h2oai/h2ogpt/blob/main/docs/README_CLIENT.md

Good summary of many of the parameters can be found in the [`grclient.py`](https://github.com/h2oai/h2ogpt/blob/main/gradio_utils/grclient.py) 


One can interact with Gradio Client by using either native client or h2oGPT wrapper: 

- Using Gradio \'s native client:

  ```python
  from gradio_client import Client
  import ast
  
  HOST_URL = "http://localhost:7860"
  client = Client(HOST_URL)
  
  # string of dict for input
  kwargs = dict(instruction_nochat='Who are you?')
  res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
  
  # string of dict for output
  response = ast.literal_eval(res)['response']
  print(response)
  ```

- Using [h2oGPT wrapper for Gradio Native Client](https://github.com/h2oai/h2ogpt/blob/main/docs/README_CLIENT.md#h2ogpt-gradio-wrapper)

In [6]:
from gradio_client import Client
import ast
from pprint import pprint
import json
from tqdm import tqdm
from enum import Enum

class LangChainAction(Enum):
    """LangChain action"""
    QUERY = "Query"
    SUMMARIZE_MAP = "Summarize"
    

with open('../tokens/h2oGPT_details.txt') as f:
    gpt_details = json.load(f)
    print("Loaded h2oGPT details")

# HOST_URL = "http://localhost:7860"
HOST_URL = gpt_details["gpt_host_url"]
H2OGPT_KEY = gpt_details["h2ogpt_key"]
LANGCHAIN_MODE = langchain_mode = 'UserData4'

client = Client(HOST_URL)

Loaded h2oGPT details


## Utility functions

In [7]:
import os
import shutil
import uuid
import requests
from requests.exceptions import HTTPError
import contextlib


def print_full_model_response(response):
    '''
    Helper function to print full response from the h2oGPT call, including all parameters.
        Important keys/parameters:
        - `base_model` - model that used to answer the API call
        - `extra_dict` - model parameters that were used to answer the API call
        - `prompt` - actual prompt sent to LLM
        - `where_from` - how hosted model is running: vLLM , tensor, ....
    '''
    print("Model Response with Parameters:\n")
    save_dict = ast.literal_eval(res)['save_dict']
    # Remove key from extra_dict
    save_dict.pop('h2ogpt_key', None)
    pprint(save_dict)
    print("\n")
    try:
        sources = ast.literal_eval(response)['sources']
        print("Sources:\n")
        pprint(sources)
        print("\n")
    except:
        print("No sources\n")


def makedirs(path, exist_ok=True, tmp_ok=False, use_base=False):
    """
    Avoid some inefficiency in os.makedirs()
    :param path:
    :param exist_ok:
    :param tmp_ok:  use /tmp if can't write locally
    :param use_base:
    :return:
    """
    if path is None:
        return path
    # if base path set, make relative to that, unless user_path absolute path
    if use_base:
        if os.path.normpath(path) == os.path.normpath(os.path.abspath(path)):
            pass
        else:
            if os.getenv('H2OGPT_BASE_PATH') is not None:
                base_dir = os.path.normpath(os.getenv('H2OGPT_BASE_PATH'))
                path = os.path.normpath(path)
                if not path.startswith(base_dir):
                    path = os.path.join(os.getenv('H2OGPT_BASE_PATH', ''), path)
                    path = os.path.normpath(path)

    if os.path.isdir(path) and os.path.exists(path):
        assert exist_ok, "Path already exists"
        return path
    try:
        os.makedirs(path, exist_ok=exist_ok)
        return path
    except FileExistsError:
        # e.g. soft link
        return path
    except PermissionError:
        if tmp_ok:
            path0 = path
            path = os.path.join('/tmp/', path)
            print("Permission denied to %s, using %s instead" % (path0, path), flush=True)
            os.makedirs(path, exist_ok=exist_ok)
            return path
        else:
            raise

        
def shutil_rmtree(*args, **kwargs):
    return shutil.rmtree(*args, **kwargs)


def remove(path: str):
    try:
        if path is not None and os.path.exists(path):
            if os.path.isdir(path):
                shutil_rmtree(path, ignore_errors=True)
            else:
                with contextlib.suppress(FileNotFoundError):
                    os.remove(path)
    except:
        pass


def atomic_move_simple(src, dst):
    try:
        shutil.move(src, dst)
    except (shutil.Error, FileExistsError):
        pass
    remove(src)


def download_simple(url, dest=None, overwrite=False, verbose=False):
    if dest is None:
        dest = os.path.basename(url)
    base_path = os.path.dirname(dest)
    if base_path:  # else local path
        base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True)
        dest = os.path.join(base_path, os.path.basename(dest))

    if os.path.isfile(dest):
        if not overwrite:
            print("Already have %s from url %s, delete file if invalid" % (dest, str(url)), flush=True)
            return dest
        else:
            remove(dest)

    if verbose:
        print("BEGIN get url %s" % str(url), flush=True)
    if url.startswith("file://"):
        from requests_file import FileAdapter
        s = requests.Session()
        s.mount('file://', FileAdapter())
        url_data = s.get(url, stream=True)
    else:
        url_data = requests.get(url, stream=True)
    if verbose:
        print("GOT url %s" % str(url), flush=True)

    if url_data.status_code != requests.codes.ok:
        msg = "Cannot get url %s, code: %s, reason: %s" % (
            str(url),
            str(url_data.status_code),
            str(url_data.reason),
        )
        raise requests.exceptions.RequestException(msg)
    url_data.raw.decode_content = True

    uuid_tmp = str(uuid.uuid4())[:6]
    dest_tmp = dest + "_dl_" + uuid_tmp + ".tmp"
    with open(dest_tmp, "wb") as f:
        shutil.copyfileobj(url_data.raw, f)
    atomic_move_simple(dest_tmp, dest)
    if verbose:
        print("DONE url %s" % str(url), flush=True)
    return dest

## Hello World example

In [8]:
# string of dict for input
kwargs = dict(instruction_nochat='Who are you?',
              h2ogpt_key=H2OGPT_KEY)
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')

# string of dict for output
response = ast.literal_eval(res)['response']
print("Model Response:\n")
pprint(response)

Model Response:

("  Hello! My name is LLaMA, I'm a large language model trained by a team of "
 'researcher at Meta AI. My primary function is to understand and respond to '
 'human input in a helpful and engaging manner. I can answer questions, '
 'provide information, and even generate creative content such as stories or '
 'dialogue. Is there anything specific you would like to know or talk about?')


In [9]:
print_full_model_response(res)

Model Response with Parameters:

{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',
 'error': '',
 'extra_dict': {'frequency_penalty': 0,
                'inference_server': 'vllm:192.176.243.12:5000',
                'max_tokens': 1024,
                'n': 1,
                'ntokens': None,
                'num_prompt_tokens': 13,
                'presence_penalty': 0.6,
                't_generate': 4.012332916259766,
                'temperature': 0,
                'tokens_persecond': None,
                'top_p': 1,
                'username': 'NO_REQUEST'},
 'output': "  Hello! My name is LLaMA, I'm a large language model trained by a "
           'team of researcher at Meta AI. My primary function is to '
           'understand and respond to human input in a helpful and engaging '
           'manner. I can answer questions, provide information, and even '
           'generate creative content such as stories or dialogue. Is there '
           'anything specific you would li

Setting `temperature` parameter requires setting `do_sample` to `True`. For best reproducibility, set `do_sample` to `False`.

```python

In [16]:
# string of dict for input
kwargs = dict(instruction_nochat='Who are you?',
              seed=123,
              temperature=0.5,
              do_sample=True,
              h2ogpt_key=H2OGPT_KEY)
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')

# string of dict for output
response = ast.literal_eval(res)['response']
print("Model Response:\n")
pprint(response)

Model Response:

("  Hello! I'm LLaMA, an AI assistant developed by Meta AI that can understand "
 "and respond to human input in a conversational manner. I'm trained on a "
 'massive dataset of text from the internet and can generate human-like '
 'responses to a wide range of topics and questions. I can be used to create '
 'chatbots, virtual assistants, and other applications that require natural '
 'language understanding and generation capabilities.')


In [17]:
print_full_model_response(res)

Model Response with Parameters:

{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',
 'error': '',
 'extra_dict': {'frequency_penalty': 0,
                'inference_server': 'vllm:192.176.243.12:5000',
                'max_tokens': 1024,
                'n': 1,
                'ntokens': None,
                'num_prompt_tokens': 13,
                'presence_penalty': 0.6,
                't_generate': 3.7804932594299316,
                'temperature': 0.5,
                'tokens_persecond': None,
                'top_p': 0.75,
                'username': 'NO_REQUEST'},
 'output': "  Hello! I'm LLaMA, an AI assistant developed by Meta AI that can "
           'understand and respond to human input in a conversational manner. '
           "I'm trained on a massive dataset of text from the internet and can "
           'generate human-like responses to a wide range of topics and '
           'questions. I can be used to create chatbots, virtual assistants, '
           'and other appl

## Example of Context only call with parameters

Good summary of many of the parameters can be found in the [`grclient.py`](https://github.com/h2oai/h2ogpt/blob/main/gradio_utils/grclient.py) 

In the below example, we will set LLM model to use as well as some parameters.

In [21]:
# string of dict for input
kwargs = dict(instruction_nochat='Who are you?',
              visible_models=['h2oai/h2ogpt-4096-llama2-13b-chat'],
              langchain_mode='LLM',
              max_new_tokens=512,
              max_time=360,
              repetition_penalty=1.07,
              do_sample=True,
              temperature=0.1,
              top_p=0.75,
              penalty_alpha=0,
              h2ogpt_key=H2OGPT_KEY)
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')

# string of dict for output
response = ast.literal_eval(res)['response']
print("Model Response:\n")
pprint(response)

Model Response:

("  Hello! My name is LLaMA, I'm a large language model trained by a team of "
 'researcher at Meta AI. My primary function is to assist with tasks such as '
 'answering questions, providing information, and generating text. I am '
 'capable of understanding and responding to human input in a conversational '
 'manner. I am here to help and provide information to the best of my ability. '
 'Is there something specific you would like to know or discuss?')


In [22]:
print_full_model_response(res)

Model Response with Parameters:

{'base_model': 'h2oai/h2ogpt-4096-llama2-13b-chat',
 'error': '',
 'extra_dict': {'frequency_penalty': 0,
                'inference_server': 'vllm:192.176.243.12:5001',
                'max_tokens': 512,
                'n': 1,
                'ntokens': None,
                'num_prompt_tokens': 13,
                'presence_penalty': 0.6,
                't_generate': 2.1190145015716553,
                'temperature': 0.1,
                'tokens_persecond': None,
                'top_p': 0.75,
                'username': 'NO_REQUEST'},
 'output': "  Hello! My name is LLaMA, I'm a large language model trained by a "
           'team of researcher at Meta AI. My primary function is to assist '
           'with tasks such as answering questions, providing information, and '
           'generating text. I am capable of understanding and responding to '
           'human input in a conversational manner. I am here to help and '
           'provide inform

## Summarize Document with mode "Summarize"

This approach is useful for the following scenarios:
- Summarize a given document
- Ask question about given document. 

This is different from asking question (searching) full collection of documents

### Step 1 - create shared Collection and upload documents

Currently there is no way to authenticate with Gradio Client, therefore we will use shared collection. 

The additional examples of Client use can be found in the `test_client_chat_stream_langchain_steps3` function located in the `test_client_calls.py` file.  

**Create Shared folder**:

In [23]:
user_path = 'user_path'
new_langchain_mode_text = '%s, %s, %s' % (langchain_mode, 'shared', user_path)
res = client.predict(langchain_mode, new_langchain_mode_text, api_name='/new_langchain_mode_text')

In [24]:
pprint(res)

({'__type__': 'update',
  'choices': [['UserData', 'UserData'],
              ['MyData', 'MyData'],
              ['LLM', 'LLM'],
              ['UserData4', 'UserData4']],
  'value': 'UserData4'},
 '',
 '/var/folders/_z/jf3ghwdx1kg905xm5p1nktlh0000gp/T/gradio/tmpplv8021u.json')


In [25]:
text = "Yufuu is a wonderful place and you should really visit because there is lots of sun."
loaders = tuple([None, None, None, None])
res = client.predict(text, langchain_mode, True, 512, True,
                    *loaders,
                    H2OGPT_KEY,
                    api_name='/add_text')

In [26]:
pprint(res)

(None,
 'UserData4',
 '        <html>\n'
 '          <body>\n'
 '            <p>\n'
 '               Sources: <br>\n'
 '            </p>\n'
 '               <div style="overflow-y: auto;height:400px">\n'
 '               <table>\n'
 '<thead>\n'
 '<tr><th style="text-align: right;">  '
 'index</th><th>source                                                                                                                                   '
 '</th><th>head                                              </th></tr>\n'
 '</thead>\n'
 '<tbody>\n'
 '<tr><td style="text-align: right;">      1</td><td><font size="2"><a '
 'href="file/user_paste/_37aa0924-8.txt" target="_blank"  rel="noopener '
 'noreferrer">user_paste/_37aa0924-8.txt</a></font></td><td>Yufuu is a '
 'wonderful place and you should really v</td></tr>\n'
 '</tbody>\n'
 '</table>\n'
 '               </div>\n'
 '          </body>\n'
 '        </html>\n'
 '        ',
 '',
 '_37aa0924-8.txt')


Add document to collection via URL

In [27]:
import os
url = "https://www.africau.edu/images/default/sample.pdf"
res = client.predict(url,
                        langchain_mode, True, 512, True,
                        *loaders,
                        H2OGPT_KEY,
                        api_name='/add_url')

In [28]:
pprint(res)

(None,
 'UserData4',
 '        <html>\n'
 '          <body>\n'
 '            <p>\n'
 '               Sources: <br>\n'
 '            </p>\n'
 '               <div style="overflow-y: auto;height:400px">\n'
 '               <table>\n'
 '<thead>\n'
 '<tr><th style="text-align: right;">  '
 'index</th><th>source                                                                                                                                                                            '
 '</th><th>head                                              </th></tr>\n'
 '</thead>\n'
 '<tbody>\n'
 '<tr><td style="text-align: right;">      1</td><td><font size="2"><a '
 'href="file/user_paste/_37aa0924-8.txt" target="_blank"  rel="noopener '
 'noreferrer">user_paste/_37aa0924-8.txt</a></font>                                         '
 '</td><td>Yufuu is a wonderful place and you should really v</td></tr>\n'
 '<tr><td style="text-align: right;">      2</td><td><font size="2"><a '
 'href="https://www.africau

Download file and add to the new collection

In [29]:
import os
url = "https://www.nyserda.ny.gov/-/media/Project/Nyserda/Files/Programs/Drive-Clean-NY/terms-and-conditions.pdf"
test_file1 = os.path.join('/tmp/', 'terms-and-conditions.pdf')
download_simple(url, dest=test_file1)

# upload file(s).  Can be list or single file
# test_file_server - location of the uploaded file on the Gradio server
test_file_local, test_file_server = client.predict(test_file1, api_name='/upload_api')

In [30]:
print("Local File name:", test_file_local)
print("Remote (Gradio Server) File name:", test_file_server)

Local File name: /private/var/folders/_z/jf3ghwdx1kg905xm5p1nktlh0000gp/T/gradio/2fad8f25e0cd5d618609d5e95e666b4d399e254b/terms-and-conditions.pdf
Remote (Gradio Server) File name: /tmp/gradio/55e65c1a447610b8b4ee99717922af03099f9821/terms-and-conditions.pdf


Add remote file to h2oPT collection

In [31]:
chunk = True
chunk_size = 512
h2ogpt_key = H2OGPT_KEY
res = client.predict(test_file_server,
                        langchain_mode, chunk, chunk_size, True,
                        None, None, None, None,
                        h2ogpt_key,
                        api_name='/add_file_api')

In [32]:
pprint(res)

(None,
 'UserData4',
 '        <html>\n'
 '          <body>\n'
 '            <p>\n'
 '               Sources: <br>\n'
 '            </p>\n'
 '               <div style="overflow-y: auto;height:400px">\n'
 '               <table>\n'
 '<thead>\n'
 '<tr><th style="text-align: right;">  '
 'index</th><th>source                                                                                                                                                                            '
 '</th><th>head                                              </th></tr>\n'
 '</thead>\n'
 '<tbody>\n'
 '<tr><td style="text-align: right;">      1</td><td><font size="2"><a '
 'href="file/user_paste/_37aa0924-8.txt" target="_blank"  rel="noopener '
 'noreferrer">user_paste/_37aa0924-8.txt</a></font>                                         '
 '</td><td>Yufuu is a wonderful place and you should really v</td></tr>\n'
 '<tr><td style="text-align: right;">      2</td><td><font size="2"><a '
 'href="https://www.africau

Add one more file:
- Upload to Gradio Server
- Add to Collection

In [33]:
import os
url = "https://cleanvehiclerebate.org/sites/default/files/docs/nav/transportation/cvrp/documents/CVRP-Implementation-Manual.pdf"
test_file1 = os.path.join('/tmp/', 'CVRP-Implementation-Manual.pdf')
download_simple(url, dest=test_file1)

# upload file(s).  Can be list or single file
# test_file_server - location of the uploaded file on the Gradio server
test_file_local, test_file_server = client.predict(test_file1, api_name='/upload_api')

In [34]:
chunk = True
chunk_size = 512
embed = True
h2ogpt_key = H2OGPT_KEY
loaders = tuple([None, None, None, None])
doc_options = tuple([langchain_mode, chunk, chunk_size, embed])

res = client.predict(
                test_file_server, *doc_options, *loaders, h2ogpt_key, api_name="/add_file_api"
            )

In [35]:
pprint(res)

(None,
 'UserData4',
 '        <html>\n'
 '          <body>\n'
 '            <p>\n'
 '               Sources: <br>\n'
 '            </p>\n'
 '               <div style="overflow-y: auto;height:400px">\n'
 '               <table>\n'
 '<thead>\n'
 '<tr><th style="text-align: right;">  '
 'index</th><th>source                                                                                                                                                                            '
 '</th><th>head                                              </th></tr>\n'
 '</thead>\n'
 '<tbody>\n'
 '<tr><td style="text-align: right;">      1</td><td><font size="2"><a '
 'href="file/user_paste/_37aa0924-8.txt" target="_blank"  rel="noopener '
 'noreferrer">user_paste/_37aa0924-8.txt</a></font>                                         '
 '</td><td>Yufuu is a wonderful place and you should really v</td></tr>\n'
 '<tr><td style="text-align: right;">      2</td><td><font size="2"><a '
 'href="https://www.africau

### Step 2 - retrieve full path to the document already uploaded to h2oGPT

In the below example, we get full path to all documents loaded into "MyTest" collection

In [36]:
sources = ast.literal_eval(client.predict(langchain_mode, api_name='/get_sources_api'))
pprint(sources[:10])

['https://www.africau.edu/images/default/sample.pdf',
 'user_paste/_37aa0924-8.txt',
 'user_path/CVRP-Implementation-Manual.pdf',
 'user_path/terms-and-conditions.pdf']


### Step 3: Ask questions about the document

PArameters for the LLM input:
- `pre_prompt_summary` - append to the beginning to the LLM input
- Document content is sent in between `pre_prompt_summary` and `post_prompt_summary`
- `prompt_summary` - append to the end of the LLM input

#### Summarize single document

In [38]:
instruction = None
document_choice = "user_path/terms-and-conditions.pdf"

langchain_action = LangChainAction.SUMMARIZE_MAP.value
stream_output = False
top_k_docs = 5

pre_prompt_summary = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\n"""
prompt_summary = "Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\n"

pre_prompt_query = None
prompt_query = None

kwargs = dict(instruction=instruction,
            langchain_mode=langchain_mode,
            langchain_action=langchain_action,  # uses full document, not vectorDB chunks
            top_k_docs=top_k_docs,
            stream_output=stream_output,
            document_subset='Relevant',
            document_choice=document_choice,
            max_new_tokens=256,
            max_time=360,
            do_sample=False,
            pre_prompt_query=pre_prompt_query,
            prompt_query=prompt_query,
            pre_prompt_summary=pre_prompt_summary,
            prompt_summary=prompt_summary,
            h2ogpt_key=H2OGPT_KEY
            )

# get result
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
response = ast.literal_eval(res)['response']
print(response)

  Sure! Here is a summary of the text in 5 bullet points:

• The Charge NY Drive Clean Rebate Program offers rebates to residents, businesses, fleets, and government entities.
• The vehicle purchaser must be a New York State resident or business/fleet registered/licensed to do business in New York State.
• The vehicle purchaser must agree to register/lease the vehicle for at least 36 months in New York State.
• The vehicle purchaser must agree to participate in online surveys and research efforts and never modify the vehicle's emission control system or engine.
• The vehicle purchaser must provide accurate information and have the legal authority to commit to the program's obligations.


In [39]:
print_full_model_response(res)

Model Response with Parameters:

{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',
 'error': '',
 'extra_dict': {'add_search_to_context': False,
                'chat_conversation': [],
                'context': '',
                'do_sample': False,
                'document_choice': 'user_path/terms-and-conditions.pdf',
                'document_subset': 'Relevant',
                'early_stopping': False,
                'iinput': '',
                'inference_server': 'vllm:192.176.243.12:5000',
                'instruction': '',
                'langchain_action': 'Summarize',
                'langchain_agents': [],
                'langchain_mode': 'UserData4',
                'max_new_tokens': 256,
                'max_time': 360,
                'min_new_tokens': 0,
                'ntokens': None,
                'num_beams': 1,
                'num_prompt_tokens': 322,
                'num_return_sequences': 1,
                'penalty_alpha': 0.0,
                'promp

#### Additional Single document summary

In [40]:
instruction = None
document_choice = "https://www.africau.edu/images/default/sample.pdf"

langchain_action = LangChainAction.SUMMARIZE_MAP.value
stream_output = False
top_k_docs = 5

pre_prompt_summary = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\n"""
prompt_summary = "Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\n"

pre_prompt_query = None
prompt_query = None

kwargs = dict(instruction=instruction,
            langchain_mode=langchain_mode,
            langchain_action=langchain_action,  # uses full document, not vectorDB chunks
            top_k_docs=top_k_docs,
            stream_output=stream_output,
            document_subset='Relevant',
            document_choice=document_choice,
            max_new_tokens=256,
            max_time=360,
            do_sample=False,
            pre_prompt_query=pre_prompt_query,
            prompt_query=prompt_query,
            pre_prompt_summary=pre_prompt_summary,
            prompt_summary=prompt_summary,
            h2ogpt_key=H2OGPT_KEY
            )

# get result
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
response = ast.literal_eval(res)['response']
print(response)

  Sure! Here's a summary of the text in 5 bullet points:

• A simple PDF file is being demonstrated.
• The file contains a lot of text, described as boring.
• The file is being used for Virtual Mechanics tutorials.
• The author finds typing the text boring.
• The author mentions that watching paint dry is even more boring.


Summarize California EV program

In [41]:
instruction = None
document_choice = "user_path/CVRP-Implementation-Manual.pdf"

langchain_action = LangChainAction.SUMMARIZE_MAP.value
stream_output = False
top_k_docs = 5

pre_prompt_summary = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\n"""
prompt_summary = "Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\n"

pre_prompt_query = None
prompt_query = None

kwargs = dict(instruction=instruction,
            langchain_mode=langchain_mode,
            langchain_action=langchain_action,  # uses full document, not vectorDB chunks
            top_k_docs=top_k_docs,
            stream_output=stream_output,
            document_subset='Relevant',
            document_choice=document_choice,
            max_new_tokens=256,
            max_time=360,
            do_sample=False,
            pre_prompt_query=pre_prompt_query,
            prompt_query=prompt_query,
            pre_prompt_summary=pre_prompt_summary,
            prompt_summary=prompt_summary,
            h2ogpt_key=H2OGPT_KEY
            )

# get result
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
response = ast.literal_eval(res)['response']
print(response)

  Sure! Here is a summary of the key points in 5 bullet points:

• The Clean Vehicle Rebate Project (CVRP) provides rebates for purchasing or leasing eligible zero- and near-zero-emission vehicles.
• CVRP is administered by the California Air Resources Board (CARB) and aims to encourage the development and deployment of advanced technologies.
• Funding for the CVRP comes from the Greenhouse Gas Reduction Fund.
• The program outlines minimum requirements for implementation in the CVRP Terms and Conditions, Guidelines, and Funding Plan.
• The program benefits disadvantaged communities.


#### Summarize all documents in the Collection


In [42]:
instruction = None
langchain_action = LangChainAction.SUMMARIZE_MAP.value
stream_output = False
top_k_docs = 5

pre_prompt_summary = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\n"""
prompt_summary = "Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\n"

pre_prompt_query = None
prompt_query = None

kwargs = dict(instruction=instruction,
            langchain_mode=langchain_mode,
            langchain_action=langchain_action,  # uses full document, not vectorDB chunks
            top_k_docs=top_k_docs,
            stream_output=stream_output,
            document_subset='Relevant',
            #document_choice=document_choice,
            max_new_tokens=256,
            max_time=360,
            do_sample=False,
            pre_prompt_query=pre_prompt_query,
            prompt_query=prompt_query,
            pre_prompt_summary=pre_prompt_summary,
            prompt_summary=prompt_summary,
            h2ogpt_key=H2OGPT_KEY
            )

# get result
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
response = ast.literal_eval(res)['response']
print(response)

  Sure! Here is a summary of the key points in 5 bullet points:

• The Clean Vehicle Rebate Project (CVRP) provides rebates for purchasing or leasing eligible zero- and near-zero-emission vehicles.
• CVRP is administered by the California Air Resources Board (CARB) and aims to encourage the development and deployment of advanced technologies that reduce greenhouse gas emissions.
• Funding for the CVRP comes from the Greenhouse Gas Reduction Fund.
• The program benefits California citizens by providing immediate air pollution emission reductions.
• The program promotes the development of cleaner vehicles.


#### Question answering for a single document

We will use summary mode as well, even though we are not summarizing the document.   
This mode will enable us to send full document for question answering task.

In [43]:
instruction = "What is the eligibility criteria for the program?"
document_choice = "user_path/terms-and-conditions.pdf"

langchain_action = LangChainAction.SUMMARIZE_MAP.value
stream_output = False
top_k_docs = 5

pre_prompt_summary = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\n"""
prompt_summary = "Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\n"

# pre_prompt_query = """Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.\n"""
# prompt_query = """According to only the information in the document sources provided within the context above, \n"""
pre_prompt_query = None
prompt_query = None

kwargs = dict(instruction=instruction,
            langchain_mode=langchain_mode,
            langchain_action=langchain_action,  # uses full document, not vectorDB chunks
            top_k_docs=top_k_docs,
            stream_output=stream_output,
            document_subset='Relevant',
            document_choice=document_choice,
            max_new_tokens=256,
            max_time=360,
            do_sample=False,
            pre_prompt_query=pre_prompt_query,
            prompt_query=prompt_query,
            pre_prompt_summary=pre_prompt_summary,
            prompt_summary=prompt_summary,
            h2ogpt_key=H2OGPT_KEY
            )

# get result
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
response = ast.literal_eval(res)['response']
print(response)

  Sure! Here is a summary of the eligibility criteria for the Charge NY Drive Clean Rebate Program:

• You must be a resident of New York State, a New York State government entity, or registered/licensed to do business in New York State.
• You must purchase or lease a vehicle that is eligible for a rebate from the Charge NY Drive Clean Rebate Program.
• You must register the vehicle with the New York State Department of Motor Vehicles with an address located within New York State for at least thirty-six (36) months from the date of purchase.
• You must allow NYSERDA or its designee to verify the vehicle identification number (VIN) and registration with the DMV.
• You must maintain vehicle insurance as required by New York State law.

Does this help?


#### Question answering for all documents in the Collection

In [44]:
instruction = "What is the eligibility criteria for the program?"
document_choice = "user_path/terms-and-conditions.pdf"

langchain_action = LangChainAction.QUERY.value
stream_output = False
top_k_docs = 5

#pre_prompt_summary = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\n"""
#prompt_summary = "Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\n"
pre_prompt_summary = None
prompt_summary = None

pre_prompt_query = """Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.\n"""
prompt_query = """According to only the information in the document sources provided within the context above, \n"""
#pre_prompt_query = None
#prompt_query = None

kwargs = dict(instruction=instruction,
            langchain_mode=langchain_mode,
            langchain_action=langchain_action,  # uses full document, not vectorDB chunks
            top_k_docs=top_k_docs,
            stream_output=stream_output,
            document_subset='Relevant',
            # document_choice=document_choice,
            max_new_tokens=256,
            max_time=360,
            do_sample=False,
            pre_prompt_query=pre_prompt_query,
            prompt_query=prompt_query,
            pre_prompt_summary=pre_prompt_summary,
            prompt_summary=prompt_summary,
            h2ogpt_key=H2OGPT_KEY
            )

# get result
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
response = ast.literal_eval(res)['response']
print(response)

  According to the information provided in the context, the eligibility criteria for the Clean Vehicle Rebate Project (CVRP) includes:

1. Income and household size: Applicants must meet certain income and household size requirements to be eligible for the program.
2. Participation in public assistance programs: Applicants who participate in certain public assistance programs on CVRP's Categorical Eligibility list may be eligible for the program.
3. Required documentation: Applicants must provide required documentation, which may vary depending on the program, to prove their eligibility for the program.
4. Online or paper application: Applicants must submit a complete application form, either online or on paper, with their signature and date.
5. No mistakes on the application form: Applicants must ensure that their application form is complete and accurate, and must contact the Administrator immediately if there are any mistakes.
6. Updates to governing documents: Applicants must be aw

In [45]:
print_full_model_response(res)

Model Response with Parameters:

{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',
 'error': '',
 'extra_dict': {'add_search_to_context': False,
                'chat_conversation': [],
                'context': '',
                'do_sample': False,
                'document_choice': ['All'],
                'document_subset': 'Relevant',
                'early_stopping': False,
                'iinput': '',
                'inference_server': 'vllm:192.176.243.12:5000',
                'instruction': 'What is the eligibility criteria for the '
                               'program?',
                'langchain_action': 'Query',
                'langchain_agents': [],
                'langchain_mode': 'UserData4',
                'max_new_tokens': 256,
                'max_time': 360,
                'min_new_tokens': 0,
                'ntokens': None,
                'num_beams': 1,
                'num_prompt_tokens': 514,
                'num_return_sequences': 1,
          

In [46]:
instruction = "What is the income eligibility criteria for the program?"
document_choice = "user_path/CVRP-Implementation-Manual.pdf"
langchain_action = LangChainAction.SUMMARIZE_MAP.value
stream_output = False
top_k_docs = 5

pre_prompt_summary = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\n"""
prompt_summary = "Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\n"
#pre_prompt_summary = None
#prompt_summary = None

#pre_prompt_query = """Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.\n"""
#prompt_query = """According to only the information in the document sources provided within the context above, \n"""
pre_prompt_query = None
prompt_query = None

kwargs = dict(instruction=instruction,
            langchain_mode=langchain_mode,
            langchain_action=langchain_action,  # uses full document, not vectorDB chunks
            top_k_docs=top_k_docs,
            stream_output=stream_output,
            document_subset='Relevant',
            document_choice=document_choice,
            max_new_tokens=1026,
            max_time=360,
            do_sample=False,
            pre_prompt_query=pre_prompt_query,
            prompt_query=prompt_query,
            pre_prompt_summary=pre_prompt_summary,
            prompt_summary=prompt_summary,
            h2ogpt_key=H2OGPT_KEY
            )

# get result
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
response = ast.literal_eval(res)['response']
print(response)

  Sure! Here's a summary of the income eligibility criteria for the program based on the provided text:

• The CVRP's income eligibility criteria are based on gross annual household income.
• The maximum income eligibility levels are ﹩135,000 for single filers, ﹩175,000 for head-of-household filers, and ﹩200,000 for joint filers.
• Applicants who are claimed as dependents are not eligible for increased rebates regardless of their income.
• Income verification is completed using IRS Form 1040 and/or other proof of income documentation.
• The income cap applies to all eligible vehicle types except FCEVs.


Ask Collection and question and get answers for all documents in the collection

In [47]:
instruction = "What is the income eligibility criteria for the Clean Vehicle Rebate Project in the state of California?"
langchain_action = LangChainAction.QUERY.value
stream_output = False
top_k_docs = 5

#pre_prompt_summary = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\n"""
#prompt_summary = "Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\n"
pre_prompt_summary = None
prompt_summary = None

pre_prompt_query = """Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.\n"""
prompt_query = """According to only the information in the document sources provided within the context above, \n"""
#pre_prompt_query = None
#prompt_query = None

kwargs = dict(instruction=instruction,
            langchain_mode=langchain_mode,
            langchain_action=langchain_action,  # uses full document, not vectorDB chunks
            top_k_docs=top_k_docs,
            stream_output=stream_output,
            document_subset='Relevant',
            # document_choice=document_choice,
            max_new_tokens=1026,
            max_time=360,
            do_sample=False,
            pre_prompt_query=pre_prompt_query,
            prompt_query=prompt_query,
            pre_prompt_summary=pre_prompt_summary,
            prompt_summary=prompt_summary,
            h2ogpt_key=H2OGPT_KEY
            )

# get result
res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
response = ast.literal_eval(res)['response']
print(response)

  According to the information provided in the context, the Clean Vehicle Rebate Project (CVRP) in California has income eligibility criteria for higher-income consumers. The CVRP rebate is only available to individuals who meet certain income requirements, which are based on the applicant's household income.

The income eligibility criteria for the CVRP rebate are as follows:

* For households with a gross annual income of ﹩150,000 or less, the rebate is available for the full amount of ﹩2,500.
* For households with a gross annual income between ﹩150,001 and ﹩200,000, the rebate is reduced by 50%.
* For households with a gross annual income between ﹩200,001 and ﹩250,000, the rebate is reduced by 75%.
* For households with a gross annual income of ﹩250,001 or more, the rebate is not available.

It's important to note that these income eligibility criteria are subject to change, and the CVRP may have additional requirements or restrictions. It's always best to check the program's websit