Spaces:
Running
Running
Retrieval in RAG demo working.
Browse files- data/Graph RAG +41 -19
- server/executors/one_by_one.py +4 -3
- server/llm_ops.py +8 -8
- server/test_llm_ops.py +31 -6
- web/src/NodeWithTableView.svelte +13 -2
data/Graph RAG
CHANGED
|
@@ -56,10 +56,7 @@
|
|
| 56 |
],
|
| 57 |
"data": [
|
| 58 |
[
|
| 59 |
-
"
|
| 60 |
-
],
|
| 61 |
-
[
|
| 62 |
-
"---"
|
| 63 |
],
|
| 64 |
[
|
| 65 |
"### 1. **Overview**"
|
|
@@ -345,8 +342,8 @@
|
|
| 345 |
}
|
| 346 |
},
|
| 347 |
"position": {
|
| 348 |
-
"x": -
|
| 349 |
-
"y": -
|
| 350 |
},
|
| 351 |
"parentId": null
|
| 352 |
},
|
|
@@ -357,7 +354,7 @@
|
|
| 357 |
"title": "Add neighbors",
|
| 358 |
"params": {},
|
| 359 |
"display": null,
|
| 360 |
-
"error":
|
| 361 |
"meta": {
|
| 362 |
"name": "Add neighbors",
|
| 363 |
"params": {},
|
|
@@ -395,11 +392,12 @@
|
|
| 395 |
},
|
| 396 |
"type": "basic",
|
| 397 |
"sub_nodes": null
|
| 398 |
-
}
|
|
|
|
| 399 |
},
|
| 400 |
"position": {
|
| 401 |
-
"x": -
|
| 402 |
-
"y":
|
| 403 |
},
|
| 404 |
"parentId": null
|
| 405 |
},
|
|
@@ -454,7 +452,7 @@
|
|
| 454 |
"title": "Create prompt",
|
| 455 |
"params": {
|
| 456 |
"save_as": "prompt",
|
| 457 |
-
"template":
|
| 458 |
},
|
| 459 |
"display": null,
|
| 460 |
"error": null,
|
|
@@ -499,8 +497,8 @@
|
|
| 499 |
}
|
| 500 |
},
|
| 501 |
"position": {
|
| 502 |
-
"x":
|
| 503 |
-
"y":
|
| 504 |
},
|
| 505 |
"parentId": null
|
| 506 |
},
|
|
@@ -510,7 +508,28 @@
|
|
| 510 |
"data": {
|
| 511 |
"title": "View",
|
| 512 |
"params": {},
|
| 513 |
-
"display":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
"error": null,
|
| 515 |
"meta": {
|
| 516 |
"name": "View",
|
|
@@ -527,13 +546,16 @@
|
|
| 527 |
"outputs": {},
|
| 528 |
"type": "table_view",
|
| 529 |
"sub_nodes": null
|
| 530 |
-
}
|
|
|
|
| 531 |
},
|
| 532 |
"position": {
|
| 533 |
-
"x":
|
| 534 |
-
"y":
|
| 535 |
},
|
| 536 |
-
"parentId": null
|
|
|
|
|
|
|
| 537 |
},
|
| 538 |
{
|
| 539 |
"id": "RAG 1",
|
|
@@ -613,7 +635,7 @@
|
|
| 613 |
}
|
| 614 |
},
|
| 615 |
"position": {
|
| 616 |
-
"x": -
|
| 617 |
"y": 56.69054032617606
|
| 618 |
},
|
| 619 |
"parentId": null
|
|
|
|
| 56 |
],
|
| 57 |
"data": [
|
| 58 |
[
|
| 59 |
+
"hello"
|
|
|
|
|
|
|
|
|
|
| 60 |
],
|
| 61 |
[
|
| 62 |
"### 1. **Overview**"
|
|
|
|
| 342 |
}
|
| 343 |
},
|
| 344 |
"position": {
|
| 345 |
+
"x": -92.52685728742009,
|
| 346 |
+
"y": -177.9645433826994
|
| 347 |
},
|
| 348 |
"parentId": null
|
| 349 |
},
|
|
|
|
| 354 |
"title": "Add neighbors",
|
| 355 |
"params": {},
|
| 356 |
"display": null,
|
| 357 |
+
"error": null,
|
| 358 |
"meta": {
|
| 359 |
"name": "Add neighbors",
|
| 360 |
"params": {},
|
|
|
|
| 392 |
},
|
| 393 |
"type": "basic",
|
| 394 |
"sub_nodes": null
|
| 395 |
+
},
|
| 396 |
+
"collapsed": false
|
| 397 |
},
|
| 398 |
"position": {
|
| 399 |
+
"x": -113.7488497864376,
|
| 400 |
+
"y": 145.42569409180135
|
| 401 |
},
|
| 402 |
"parentId": null
|
| 403 |
},
|
|
|
|
| 452 |
"title": "Create prompt",
|
| 453 |
"params": {
|
| 454 |
"save_as": "prompt",
|
| 455 |
+
"template": "{text}"
|
| 456 |
},
|
| 457 |
"display": null,
|
| 458 |
"error": null,
|
|
|
|
| 497 |
}
|
| 498 |
},
|
| 499 |
"position": {
|
| 500 |
+
"x": 324.81988008998496,
|
| 501 |
+
"y": -9.071826950189632
|
| 502 |
},
|
| 503 |
"parentId": null
|
| 504 |
},
|
|
|
|
| 508 |
"data": {
|
| 509 |
"title": "View",
|
| 510 |
"params": {},
|
| 511 |
+
"display": {
|
| 512 |
+
"dataframes": {
|
| 513 |
+
"df": {
|
| 514 |
+
"columns": [
|
| 515 |
+
"text",
|
| 516 |
+
"rag",
|
| 517 |
+
"prompt"
|
| 518 |
+
],
|
| 519 |
+
"data": [
|
| 520 |
+
[
|
| 521 |
+
"What's your cheapest drink?",
|
| 522 |
+
[
|
| 523 |
+
"### 6. **Drinks**",
|
| 524 |
+
"| Size | Price |\n|--------------------|---------------|\n| 20 oz Bottle | $1.99 |\n| 2-Liter Bottle | $3.50 |",
|
| 525 |
+
"Available options: Coke, Diet Coke, Sprite, Root Beer, Lemonade."
|
| 526 |
+
],
|
| 527 |
+
"{text}"
|
| 528 |
+
]
|
| 529 |
+
]
|
| 530 |
+
}
|
| 531 |
+
}
|
| 532 |
+
},
|
| 533 |
"error": null,
|
| 534 |
"meta": {
|
| 535 |
"name": "View",
|
|
|
|
| 546 |
"outputs": {},
|
| 547 |
"type": "table_view",
|
| 548 |
"sub_nodes": null
|
| 549 |
+
},
|
| 550 |
+
"beingResized": false
|
| 551 |
},
|
| 552 |
"position": {
|
| 553 |
+
"x": 659.7852850905575,
|
| 554 |
+
"y": -41.48719521129472
|
| 555 |
},
|
| 556 |
+
"parentId": null,
|
| 557 |
+
"width": 492,
|
| 558 |
+
"height": 391
|
| 559 |
},
|
| 560 |
{
|
| 561 |
"id": "RAG 1",
|
|
|
|
| 635 |
}
|
| 636 |
},
|
| 637 |
"position": {
|
| 638 |
+
"x": -449.6099563104567,
|
| 639 |
"y": 56.69054032617606
|
| 640 |
},
|
| 641 |
"parentId": null
|
server/executors/one_by_one.py
CHANGED
|
@@ -76,12 +76,13 @@ def execute(ws, catalog, cache=None):
|
|
| 76 |
for node in ws.nodes:
|
| 77 |
node.data.error = None
|
| 78 |
op = catalog[node.data.title]
|
| 79 |
-
# Start tasks for nodes that have no inputs.
|
| 80 |
-
if
|
| 81 |
tasks[node.id] = [NO_INPUT]
|
| 82 |
batch_inputs = {}
|
| 83 |
# Run the rest until we run out of tasks.
|
| 84 |
-
|
|
|
|
| 85 |
next_stage = {}
|
| 86 |
while tasks:
|
| 87 |
n, ts = tasks.popitem()
|
|
|
|
| 76 |
for node in ws.nodes:
|
| 77 |
node.data.error = None
|
| 78 |
op = catalog[node.data.title]
|
| 79 |
+
# Start tasks for nodes that have no non-batch inputs.
|
| 80 |
+
if all([i.position == 'top' for i in op.inputs.values()]):
|
| 81 |
tasks[node.id] = [NO_INPUT]
|
| 82 |
batch_inputs = {}
|
| 83 |
# Run the rest until we run out of tasks.
|
| 84 |
+
stages = get_stages(ws, catalog)
|
| 85 |
+
for stage in stages:
|
| 86 |
next_stage = {}
|
| 87 |
while tasks:
|
| 88 |
n, ts = tasks.popitem()
|
server/llm_ops.py
CHANGED
|
@@ -54,32 +54,32 @@ def split_document(input, *, delimiter: str = '\\n\\n'):
|
|
| 54 |
@ops.input_position(input="top")
|
| 55 |
@op("Build document graph")
|
| 56 |
def build_document_graph(input):
|
| 57 |
-
|
| 58 |
-
return pd.DataFrame([{'source': i, 'target': i+1} for i in range(len(chunks)-1)]),
|
| 59 |
|
| 60 |
@ops.input_position(nodes="top", edges="top")
|
| 61 |
@op("Predict links")
|
| 62 |
def predict_links(nodes, edges):
|
| 63 |
'''A placeholder for a real algorithm. For now just adds 2-hop neighbors.'''
|
| 64 |
-
edges = edges.to_dict(orient='records')
|
| 65 |
edge_map = {} # Source -> [Targets]
|
| 66 |
for edge in edges:
|
| 67 |
edge_map.setdefault(edge['source'], [])
|
| 68 |
edge_map[edge['source']].append(edge['target'])
|
| 69 |
new_edges = []
|
| 70 |
-
for
|
| 71 |
-
for t in edge_map.get(target, []):
|
| 72 |
-
new_edges.append({'source': source, 'target': t})
|
| 73 |
-
return
|
| 74 |
|
| 75 |
@ops.input_position(nodes="top", edges="top")
|
| 76 |
@op("Add neighbors")
|
| 77 |
def add_neighbors(nodes, edges, item):
|
|
|
|
|
|
|
| 78 |
matches = item['rag']
|
| 79 |
additional_matches = []
|
| 80 |
for m in matches:
|
| 81 |
node = nodes[nodes['text'] == m].index[0]
|
| 82 |
-
neighbors = edges[edges['source'] == node]['target']
|
| 83 |
additional_matches.extend(nodes.loc[neighbors, 'text'])
|
| 84 |
return {**item, 'rag': matches + additional_matches}
|
| 85 |
|
|
|
|
| 54 |
@ops.input_position(input="top")
|
| 55 |
@op("Build document graph")
|
| 56 |
def build_document_graph(input):
|
| 57 |
+
return [{'source': i, 'target': i+1} for i in range(len(input)-1)]
|
|
|
|
| 58 |
|
| 59 |
@ops.input_position(nodes="top", edges="top")
|
| 60 |
@op("Predict links")
|
| 61 |
def predict_links(nodes, edges):
|
| 62 |
'''A placeholder for a real algorithm. For now just adds 2-hop neighbors.'''
|
|
|
|
| 63 |
edge_map = {} # Source -> [Targets]
|
| 64 |
for edge in edges:
|
| 65 |
edge_map.setdefault(edge['source'], [])
|
| 66 |
edge_map[edge['source']].append(edge['target'])
|
| 67 |
new_edges = []
|
| 68 |
+
for edge in edges:
|
| 69 |
+
for t in edge_map.get(edge['target'], []):
|
| 70 |
+
new_edges.append({'source': edge['source'], 'target': t})
|
| 71 |
+
return edges + new_edges
|
| 72 |
|
| 73 |
@ops.input_position(nodes="top", edges="top")
|
| 74 |
@op("Add neighbors")
|
| 75 |
def add_neighbors(nodes, edges, item):
|
| 76 |
+
nodes = pd.DataFrame(nodes)
|
| 77 |
+
edges = pd.DataFrame(edges)
|
| 78 |
matches = item['rag']
|
| 79 |
additional_matches = []
|
| 80 |
for m in matches:
|
| 81 |
node = nodes[nodes['text'] == m].index[0]
|
| 82 |
+
neighbors = edges[edges['source'] == node]['target'].to_list()
|
| 83 |
additional_matches.extend(nodes.loc[neighbors, 'text'])
|
| 84 |
return {**item, 'rag': matches + additional_matches}
|
| 85 |
|
server/test_llm_ops.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
import unittest
|
|
|
|
| 2 |
from . import llm_ops
|
|
|
|
| 3 |
from . import workspace
|
| 4 |
|
| 5 |
def make_node(id, op, type='basic', **params):
|
|
@@ -11,7 +13,7 @@ def make_node(id, op, type='basic', **params):
|
|
| 11 |
)
|
| 12 |
def make_input(id):
|
| 13 |
return make_node(
|
| 14 |
-
id, 'Input',
|
| 15 |
filename='/Users/danieldarabos/Downloads/aimo-train.csv',
|
| 16 |
key='problem')
|
| 17 |
def make_edge(source, target, targetHandle='input'):
|
|
@@ -22,7 +24,7 @@ class LLMOpsTest(unittest.TestCase):
|
|
| 22 |
def testExecute(self):
|
| 23 |
ws = workspace.Workspace(env='LLM logic', nodes=[
|
| 24 |
make_node(
|
| 25 |
-
'0', 'Input',
|
| 26 |
filename='/Users/danieldarabos/Downloads/aimo-train.csv',
|
| 27 |
key='problem'),
|
| 28 |
make_node(
|
|
@@ -30,8 +32,9 @@ class LLMOpsTest(unittest.TestCase):
|
|
| 30 |
], edges=[
|
| 31 |
make_edge('0', '1')
|
| 32 |
])
|
| 33 |
-
|
| 34 |
-
|
|
|
|
| 35 |
|
| 36 |
def testStages(self):
|
| 37 |
ws = workspace.Workspace(env='LLM logic', nodes=[
|
|
@@ -43,8 +46,30 @@ class LLMOpsTest(unittest.TestCase):
|
|
| 43 |
make_edge('rag1', 'p1'), make_edge('p1', 'rag2', 'db'),
|
| 44 |
make_edge('in3', 'p2'), make_edge('p3', 'rag2'),
|
| 45 |
])
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
if __name__ == '__main__':
|
| 50 |
unittest.main()
|
|
|
|
| 1 |
import unittest
|
| 2 |
+
from . import ops
|
| 3 |
from . import llm_ops
|
| 4 |
+
from .executors import one_by_one
|
| 5 |
from . import workspace
|
| 6 |
|
| 7 |
def make_node(id, op, type='basic', **params):
|
|
|
|
| 13 |
)
|
| 14 |
def make_input(id):
|
| 15 |
return make_node(
|
| 16 |
+
id, 'Input CSV',
|
| 17 |
filename='/Users/danieldarabos/Downloads/aimo-train.csv',
|
| 18 |
key='problem')
|
| 19 |
def make_edge(source, target, targetHandle='input'):
|
|
|
|
| 24 |
def testExecute(self):
|
| 25 |
ws = workspace.Workspace(env='LLM logic', nodes=[
|
| 26 |
make_node(
|
| 27 |
+
'0', 'Input CSV',
|
| 28 |
filename='/Users/danieldarabos/Downloads/aimo-train.csv',
|
| 29 |
key='problem'),
|
| 30 |
make_node(
|
|
|
|
| 32 |
], edges=[
|
| 33 |
make_edge('0', '1')
|
| 34 |
])
|
| 35 |
+
catalog = ops.CATALOGS[ws.env]
|
| 36 |
+
one_by_one.execute(ws, catalog)
|
| 37 |
+
# self.assertEqual('', ws.nodes[1].data.display)
|
| 38 |
|
| 39 |
def testStages(self):
|
| 40 |
ws = workspace.Workspace(env='LLM logic', nodes=[
|
|
|
|
| 46 |
make_edge('rag1', 'p1'), make_edge('p1', 'rag2', 'db'),
|
| 47 |
make_edge('in3', 'p2'), make_edge('p3', 'rag2'),
|
| 48 |
])
|
| 49 |
+
catalog = ops.CATALOGS[ws.env]
|
| 50 |
+
stages = one_by_one.get_stages(ws, catalog)
|
| 51 |
+
print(stages)
|
| 52 |
+
# self.assertEqual('', stages)
|
| 53 |
+
|
| 54 |
+
def testStagesMultiInput(self):
|
| 55 |
+
ws = workspace.Workspace(env='LLM logic', nodes=[
|
| 56 |
+
make_node('doc', 'Input document'),
|
| 57 |
+
make_node('split', 'Split document'),
|
| 58 |
+
make_node('graph', 'Build document graph'),
|
| 59 |
+
make_node('chat', 'Input chat'),
|
| 60 |
+
make_node('rag', 'RAG'),
|
| 61 |
+
make_node('neighbors', 'Add neighbors'),
|
| 62 |
+
], edges=[
|
| 63 |
+
make_edge('doc', 'split'), make_edge('split', 'graph'),
|
| 64 |
+
make_edge('split', 'rag', 'db'), make_edge('chat', 'rag', 'input'),
|
| 65 |
+
make_edge('split', 'neighbors', 'nodes'),
|
| 66 |
+
make_edge('graph', 'neighbors', 'edges'),
|
| 67 |
+
make_edge('rag', 'neighbors', 'item'),
|
| 68 |
+
])
|
| 69 |
+
catalog = ops.CATALOGS[ws.env]
|
| 70 |
+
stages = one_by_one.get_stages(ws, catalog)
|
| 71 |
+
print(stages)
|
| 72 |
+
# self.assertEqual('', stages)
|
| 73 |
|
| 74 |
if __name__ == '__main__':
|
| 75 |
unittest.main()
|
web/src/NodeWithTableView.svelte
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
<script lang="ts">
|
| 2 |
import { type NodeProps } from '@xyflow/svelte';
|
| 3 |
-
import { Tabulator } from 'tabulator-tables';
|
| 4 |
import LynxKiteNode from './LynxKiteNode.svelte';
|
| 5 |
import Table from './Table.svelte';
|
| 6 |
type $$Props = NodeProps;
|
|
@@ -14,7 +13,16 @@
|
|
| 14 |
{#each Object.entries(data.display.dataframes || {}) as [name, df]}
|
| 15 |
{#if !single}<div class="df-head" on:click={() => open[name] = !open[name]}>{name}</div>{/if}
|
| 16 |
{#if single || open[name]}
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
{/if}
|
| 19 |
{/each}
|
| 20 |
{#each Object.entries(data.display.others || {}) as [name, o]}
|
|
@@ -35,4 +43,7 @@
|
|
| 35 |
table {
|
| 36 |
table-layout: fixed;
|
| 37 |
}
|
|
|
|
|
|
|
|
|
|
| 38 |
</style>
|
|
|
|
| 1 |
<script lang="ts">
|
| 2 |
import { type NodeProps } from '@xyflow/svelte';
|
|
|
|
| 3 |
import LynxKiteNode from './LynxKiteNode.svelte';
|
| 4 |
import Table from './Table.svelte';
|
| 5 |
type $$Props = NodeProps;
|
|
|
|
| 13 |
{#each Object.entries(data.display.dataframes || {}) as [name, df]}
|
| 14 |
{#if !single}<div class="df-head" on:click={() => open[name] = !open[name]}>{name}</div>{/if}
|
| 15 |
{#if single || open[name]}
|
| 16 |
+
{#if df.data.length > 1}
|
| 17 |
+
<Table columns={df.columns} data={df.data} />
|
| 18 |
+
{:else}
|
| 19 |
+
<dl>
|
| 20 |
+
{#each df.columns as c, i}
|
| 21 |
+
<dt>{c}</dt>
|
| 22 |
+
<dd>{df.data[0][i]}</dd>
|
| 23 |
+
{/each}
|
| 24 |
+
</dl>
|
| 25 |
+
{/if}
|
| 26 |
{/if}
|
| 27 |
{/each}
|
| 28 |
{#each Object.entries(data.display.others || {}) as [name, o]}
|
|
|
|
| 43 |
table {
|
| 44 |
table-layout: fixed;
|
| 45 |
}
|
| 46 |
+
dl {
|
| 47 |
+
margin: 10px;
|
| 48 |
+
}
|
| 49 |
</style>
|