File size: 3,547 Bytes
e2cc090
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import google.generativeai as genai
from pathlib import Path
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class AnalysisPostProcessor:
    def __init__(self):
        api_key = os.getenv("GOOGLE_API_KEY")
        if not api_key:
            raise ValueError("GOOGLE_API_KEY not found")

        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-pro')

    def read_sections(self, filepath: str) -> dict:
        """Read and separate the analysis into sections"""
        with open(filepath, 'r') as f:
            content = f.read()

        sections = {}
        current_section = None
        current_content = []

        for line in content.split('\n'):
            if line.startswith('### ') and line.endswith(' ###'):
                if current_section:
                    sections[current_section] = '\n'.join(current_content)
                current_section = line.strip('#').strip()
                current_content = []
            else:
                current_content.append(line)

        if current_section:
            sections[current_section] = '\n'.join(current_content)

        return sections

    def clean_section(self, title: str, content: str) -> str:
        """Clean individual section using Gemini"""
        prompt = f"""You are processing a section of screenplay analysis titled "{title}".
        The original analysis was generated by analyzing chunks of the screenplay, 
        which may have led to some redundancy and discontinuity.

        Your task:
        1. Remove any redundant observations
        2. Stitch together related insights that may be separated
        3. Ensure the analysis flows naturally from beginning to end
        4. Preserve ALL unique insights and specific examples
        5. Maintain the analytical depth while making it more coherent

        Original {title} section:
        {content}

        Provide the cleaned and coherent version maintaining the same analytical depth."""

        try:
            response = self.model.generate_content(prompt)
            return response.text
        except Exception as e:
            logger.error(f"Error cleaning {title}: {str(e)}")
            return content

    def process_analysis(self, input_path: str, output_path: str):
        """Process the entire analysis file"""
        try:
            # Read and separate sections
            sections = self.read_sections(input_path)

            # Process each section
            cleaned_sections = {}
            for title, content in sections.items():
                logger.info(f"Processing {title}")
                cleaned_sections[title] = self.clean_section(title, content)

            # Combine sections
            final_analysis = "SCREENPLAY CREATIVE ANALYSIS\n\n"
            for title, content in cleaned_sections.items():
                final_analysis += f"### {title} ###\n\n{content}\n\n"

            # Save result
            with open(output_path, 'w') as f:
                f.write(final_analysis)

            logger.info(f"Cleaned analysis saved to: {output_path}")
            return True

        except Exception as e:
            logger.error(f"Error in post-processing: {str(e)}")
            return False

def main():
    processor = AnalysisPostProcessor()
    input_file = "path/to/creative_analysis.txt"
    output_file = "path/to/cleaned_creative_analysis.txt"
    processor.process_analysis(input_file, output_file)

if __name__ == "__main__":
    main()