File size: 3,547 Bytes
e2cc090 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
import google.generativeai as genai
from pathlib import Path
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class AnalysisPostProcessor:
def __init__(self):
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY not found")
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel('gemini-pro')
def read_sections(self, filepath: str) -> dict:
"""Read and separate the analysis into sections"""
with open(filepath, 'r') as f:
content = f.read()
sections = {}
current_section = None
current_content = []
for line in content.split('\n'):
if line.startswith('### ') and line.endswith(' ###'):
if current_section:
sections[current_section] = '\n'.join(current_content)
current_section = line.strip('#').strip()
current_content = []
else:
current_content.append(line)
if current_section:
sections[current_section] = '\n'.join(current_content)
return sections
def clean_section(self, title: str, content: str) -> str:
"""Clean individual section using Gemini"""
prompt = f"""You are processing a section of screenplay analysis titled "{title}".
The original analysis was generated by analyzing chunks of the screenplay,
which may have led to some redundancy and discontinuity.
Your task:
1. Remove any redundant observations
2. Stitch together related insights that may be separated
3. Ensure the analysis flows naturally from beginning to end
4. Preserve ALL unique insights and specific examples
5. Maintain the analytical depth while making it more coherent
Original {title} section:
{content}
Provide the cleaned and coherent version maintaining the same analytical depth."""
try:
response = self.model.generate_content(prompt)
return response.text
except Exception as e:
logger.error(f"Error cleaning {title}: {str(e)}")
return content
def process_analysis(self, input_path: str, output_path: str):
"""Process the entire analysis file"""
try:
# Read and separate sections
sections = self.read_sections(input_path)
# Process each section
cleaned_sections = {}
for title, content in sections.items():
logger.info(f"Processing {title}")
cleaned_sections[title] = self.clean_section(title, content)
# Combine sections
final_analysis = "SCREENPLAY CREATIVE ANALYSIS\n\n"
for title, content in cleaned_sections.items():
final_analysis += f"### {title} ###\n\n{content}\n\n"
# Save result
with open(output_path, 'w') as f:
f.write(final_analysis)
logger.info(f"Cleaned analysis saved to: {output_path}")
return True
except Exception as e:
logger.error(f"Error in post-processing: {str(e)}")
return False
def main():
processor = AnalysisPostProcessor()
input_file = "path/to/creative_analysis.txt"
output_file = "path/to/cleaned_creative_analysis.txt"
processor.process_analysis(input_file, output_file)
if __name__ == "__main__":
main() |