|
|
|
|
|
import os |
|
|
import shutil |
|
|
import sys |
|
|
from copy import deepcopy |
|
|
from glob import glob |
|
|
|
|
|
import func_argparse |
|
|
import jsonlines |
|
|
import tqdm |
|
|
|
|
|
from apis import CodeExecutor, HistoryMark |
|
|
from presentation import Presentation |
|
|
from utils import Config, pjoin, ppt_to_images |
|
|
|
|
|
config = Config("/tmp") |
|
|
code_executor = CodeExecutor(0) |
|
|
|
|
|
|
|
|
def rebuild_pptx(agent_steps: str, prs: Presentation): |
|
|
slides = [] |
|
|
steps = list(jsonlines.open(agent_steps)) |
|
|
if len(steps) == 0: |
|
|
os.remove(agent_steps) |
|
|
raise ValueError(f"Jump {agent_steps} as no steps") |
|
|
if steps[-1][0] != HistoryMark.API_CALL_CORRECT: |
|
|
raise ValueError(f"Jump {agent_steps} as last step is failed") |
|
|
for mark, slide_idx, actions in steps: |
|
|
if mark != HistoryMark.API_CALL_CORRECT: |
|
|
continue |
|
|
slides.append(deepcopy(prs.slides[slide_idx - 1])) |
|
|
feedback = code_executor.execute_actions(actions, slides[-1]) |
|
|
assert feedback is None, feedback |
|
|
return slides |
|
|
|
|
|
|
|
|
def rebuild_all( |
|
|
setting: str = "*", topic: str = "*", out_filename: str = "rebuild.pptx" |
|
|
): |
|
|
for folder in tqdm.tqdm(glob(f"data/{topic}/pptx/*")): |
|
|
prs = Presentation.from_file(pjoin(folder, "source.pptx"), config) |
|
|
pptx_container = deepcopy(prs) |
|
|
for agent_steps in glob(pjoin(folder, setting, "*", "agent_steps.jsonl")): |
|
|
dst = pjoin(os.path.dirname(agent_steps), out_filename) |
|
|
if os.path.exists(dst): |
|
|
continue |
|
|
try: |
|
|
pptx_container.slides = rebuild_pptx(agent_steps, prs) |
|
|
pptx_container.save(dst) |
|
|
except Exception as e: |
|
|
continue |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
if len(sys.argv) != 1: |
|
|
func_argparse.main(rebuild_all) |
|
|
|
|
|
else: |
|
|
shutil.rmtree("./test", ignore_errors=True) |
|
|
os.makedirs("./test", exist_ok=True) |
|
|
|
|
|
source_folder = ( |
|
|
"data/education/pptx/Open Science - PhD Human Rights - 2021 - module 3" |
|
|
) |
|
|
setting = "PPTCrew-Qwen2.5+Qwen2.5+Qwen2-VL" |
|
|
pdf = "37-105-1-PB (3)" |
|
|
|
|
|
prs = Presentation.from_file(pjoin(source_folder, "source.pptx"), config) |
|
|
container = deepcopy(prs) |
|
|
container.slides = rebuild_pptx( |
|
|
pjoin(source_folder, setting, pdf, "agent_steps.jsonl"), prs |
|
|
) |
|
|
|