Spaces:

BinKhoaLe1812
/

Cooking_Tutor

Running

App Files Files Community

LiamKhoaLe commited on 17 days ago

Commit

8e753b3

1 Parent(s): 190ea81

Rm image as citation

Browse files

Files changed (2) hide show

api/chatbot.py +67 -32
search/engines/image.py +42 -13

api/chatbot.py CHANGED Viewed

@@ -18,7 +18,7 @@ class GeminiClient:
             logger.warning("FlashAPI not set - Gemini client will use fallback responses")
             self.client = None
         else:
-            self.client = genai.Client(api_key=gemini_flash_api_key)
     def generate_content(self, prompt: str, model: str = "gemini-2.5-flash", temperature: float = 0.7) -> str:
         """Generate content using Gemini API"""
@@ -205,8 +205,8 @@ class CookingTutorChatbot:
             images = source_aggregation['images']
             if images:
                 logger.info(f"Found {len(images)} images from search")
-                # Create enhanced image data with better frontend integration
-                enhanced_images = self._enhance_images_for_frontend(images[:3], user_query)
                 response_data['images'] = enhanced_images
                 # Create structured content with image placement suggestions
@@ -446,7 +446,7 @@ class CookingTutorChatbot:
         return ''.join(enhanced_sections)
     def _create_structured_content(self, text: str, images: List[Dict]) -> List[Dict]:
-        """Create structured content blocks for optimal frontend rendering"""
         if not images:
             return [{'type': 'text', 'content': text}]
@@ -457,49 +457,84 @@ class CookingTutorChatbot:
         image_index = 0
         for section in sections:
-            # Add text section
-            structured_blocks.append({
-                'type': 'text',
-                'content': section['content'].strip(),
-                'section_type': section['type']
-            })
-            # Check if we should add an image after this section
-            if image_index < len(images):
-                image = images[image_index]
-                placement_context = image['placement_context']
-                should_add_image = (
-                    (section['type'] == 'ingredients' and placement_context == 'after_ingredients') or
-                    (section['type'] == 'instructions' and placement_context == 'after_instructions') or
-                    (section['type'] == 'tips' and placement_context == 'after_tips') or
-                    (section['type'] == 'intro' and placement_context == 'after_intro')
-                )
-                if should_add_image:
                     structured_blocks.append({
-                        'type': 'image',
-                        'image_data': image,
-                        'placement': 'after_section',
                         'section_type': section['type']
                     })
-                    image_index += 1
-        # Add any remaining images at the end
         while image_index < len(images):
             image = images[image_index]
             structured_blocks.append({
                 'type': 'image',
                 'image_data': image,
-                'placement': 'end'
             })
             image_index += 1
         return structured_blocks
     def _process_citations(self, response: str, url_mapping: Dict[int, str]) -> str:
         """Replace citation tags with actual URLs, handling various citation formats flexibly"""
         # More flexible pattern to match various citation formats
         citation_patterns = [
             r'<#([^>]+)>',           # Standard format: <#1>, <#1,2,3>
@@ -551,8 +586,8 @@ class CookingTutorChatbot:
                 doc_id = extract_numeric_id(citation_id)
                 if doc_id is not None and doc_id in url_mapping:
-                    url = url_mapping[doc_id]
-                    urls.append(f'<{url}>')
                     logger.info(f"[CITATION] Replacing <#{citation_id}> with {url}")
                 else:
                     if doc_id is None:

             logger.warning("FlashAPI not set - Gemini client will use fallback responses")
             self.client = None
         else:
+        self.client = genai.Client(api_key=gemini_flash_api_key)
     def generate_content(self, prompt: str, model: str = "gemini-2.5-flash", temperature: float = 0.7) -> str:
         """Generate content using Gemini API"""
             images = source_aggregation['images']
             if images:
                 logger.info(f"Found {len(images)} images from search")
+                # Create enhanced image data with better frontend integration - get more images
+                enhanced_images = self._enhance_images_for_frontend(images[:6], user_query)
                 response_data['images'] = enhanced_images
                 # Create structured content with image placement suggestions
         return ''.join(enhanced_sections)
     def _create_structured_content(self, text: str, images: List[Dict]) -> List[Dict]:
+        """Create structured content blocks for optimal frontend rendering with inline image placement"""
         if not images:
             return [{'type': 'text', 'content': text}]
         image_index = 0
         for section in sections:
+            # Split section content into paragraphs for better inline placement
+            paragraphs = section['content'].strip().split('\n\n')
+            for i, paragraph in enumerate(paragraphs):
+                if paragraph.strip():
+                    # Add paragraph as text block
                     structured_blocks.append({
+                        'type': 'text',
+                        'content': paragraph.strip(),
                         'section_type': section['type']
                     })
+                    # Check if we should add an image after this paragraph
+                    if image_index < len(images):
+                        image = images[image_index]
+                        placement_context = image['placement_context']
+                        # More aggressive inline placement
+                        should_add_image = (
+                            # Add images more frequently for better visual flow
+                            (section['type'] == 'ingredients' and placement_context == 'after_ingredients' and i == 0) or
+                            (section['type'] == 'instructions' and placement_context == 'after_instructions' and i == 0) or
+                            (section['type'] == 'tips' and placement_context == 'after_tips' and i == 0) or
+                            (section['type'] == 'intro' and placement_context == 'after_intro' and i == 0) or
+                            # Add images between paragraphs for better distribution
+                            (i == 1 and image_index < len(images) - 1) or  # Second paragraph gets an image
+                            (i == 2 and image_index < len(images) - 2)     # Third paragraph gets an image
+                        )
+                        if should_add_image:
+                            structured_blocks.append({
+                                'type': 'image',
+                                'image_data': image,
+                                'placement': 'inline',
+                                'section_type': section['type']
+                            })
+                            image_index += 1
+        # Add any remaining images at strategic points
         while image_index < len(images):
             image = images[image_index]
             structured_blocks.append({
                 'type': 'image',
                 'image_data': image,
+                'placement': 'inline'
             })
             image_index += 1
         return structured_blocks
+    def _remove_image_urls_from_text(self, text: str) -> str:
+        """Remove image URLs from text to prevent them from being processed as citations"""
+        import re
+        # Remove common image URL patterns that might appear in text
+        image_url_patterns = [
+            r'https?://[^\s]+\.(jpg|jpeg|png|gif|webp|svg)(\?[^\s]*)?',  # Direct image URLs
+            r'<img[^>]*src=["\']([^"\']+)["\'][^>]*>',  # HTML img tags
+            r'!\[[^\]]*\]\([^)]+\)',  # Markdown image syntax
+        ]
+        cleaned_text = text
+        for pattern in image_url_patterns:
+            cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE)
+        # Clean up any extra whitespace left behind
+        cleaned_text = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned_text)
+        cleaned_text = cleaned_text.strip()
+        return cleaned_text
     def _process_citations(self, response: str, url_mapping: Dict[int, str]) -> str:
         """Replace citation tags with actual URLs, handling various citation formats flexibly"""
+        # First, remove any image URLs from the response to prevent them from being processed as citations
+        # This prevents image URLs from appearing as citations in the text
+        response = self._remove_image_urls_from_text(response)
         # More flexible pattern to match various citation formats
         citation_patterns = [
             r'<#([^>]+)>',           # Standard format: <#1>, <#1,2,3>
                 doc_id = extract_numeric_id(citation_id)
                 if doc_id is not None and doc_id in url_mapping:
+                        url = url_mapping[doc_id]
+                        urls.append(f'<{url}>')
                     logger.info(f"[CITATION] Replacing <#{citation_id}> with {url}")
                 else:
                     if doc_id is None:

search/engines/image.py CHANGED Viewed

@@ -88,37 +88,57 @@ class ImageSearchEngine:
             'query': final_dish_query,
             'context': 'final_dish',
             'type': 'final_dish',
-            'max_results': max(1, num_results // 3)
         })
-        # 2. Ingredients query - more specific
         if any(keyword in query_lower for keyword in ['pad thai', 'noodles', 'pasta']):
             ingredients_query = f"pad thai ingredients rice noodles shrimp"
         elif any(keyword in query_lower for keyword in ['fusion', 'western']):
             ingredients_query = f"fusion cooking ingredients fresh"
         else:
             ingredients_query = f"{clean_query} ingredients fresh"
         queries.append({
             'query': ingredients_query,
             'context': 'ingredients',
             'type': 'ingredients',
-            'max_results': max(1, num_results // 3)
         })
         # 3. Cooking technique/process query - more specific
         if any(keyword in query_lower for keyword in ['pad thai', 'noodles', 'pasta']):
             technique_query = f"pad thai cooking technique wok stir fry"
         elif any(keyword in query_lower for keyword in ['fusion', 'western']):
             technique_query = f"fusion cooking technique western"
         else:
             technique_query = f"{clean_query} cooking technique"
         queries.append({
             'query': technique_query,
             'context': 'technique',
             'type': 'technique',
-            'max_results': max(1, num_results // 3)
         })
         return queries
@@ -140,24 +160,33 @@ class ImageSearchEngine:
             else:
                 type_groups['other'].append(result)
-        # Select diverse results
         diverse_results = []
-        # Prioritize: 1 final dish, 1 ingredients, 1 technique, then fill with others
-        if type_groups['final_dish']:
-            diverse_results.append(type_groups['final_dish'][0])
-        if type_groups['ingredients'] and len(diverse_results) < num_results:
-            diverse_results.append(type_groups['ingredients'][0])
-        if type_groups['technique'] and len(diverse_results) < num_results:
-            diverse_results.append(type_groups['technique'][0])
         # Fill remaining slots with other results
         all_remaining = []
         for group in type_groups.values():
-            all_remaining.extend(group[1:])  # Skip first item (already used)
         diverse_results.extend(all_remaining[:num_results - len(diverse_results)])
         return diverse_results[:num_results]
     def _validate_image_results(self, results: List[Dict]) -> List[Dict]:

             'query': final_dish_query,
             'context': 'final_dish',
             'type': 'final_dish',
+            'max_results': max(2, num_results // 4)  # More final dish images
         })
+        # 2. Ingredients query - more specific and diverse
         if any(keyword in query_lower for keyword in ['pad thai', 'noodles', 'pasta']):
             ingredients_query = f"pad thai ingredients rice noodles shrimp"
+            ingredients_query2 = f"pad thai fresh vegetables herbs"
         elif any(keyword in query_lower for keyword in ['fusion', 'western']):
             ingredients_query = f"fusion cooking ingredients fresh"
+            ingredients_query2 = f"western cooking ingredients vegetables"
         else:
             ingredients_query = f"{clean_query} ingredients fresh"
+            ingredients_query2 = f"{clean_query} raw ingredients vegetables"
         queries.append({
             'query': ingredients_query,
             'context': 'ingredients',
             'type': 'ingredients',
+            'max_results': max(2, num_results // 4)  # More ingredient images
+        })
+        queries.append({
+            'query': ingredients_query2,
+            'context': 'ingredients',
+            'type': 'ingredients',
+            'max_results': max(1, num_results // 6)  # Additional ingredient variety
         })
         # 3. Cooking technique/process query - more specific
         if any(keyword in query_lower for keyword in ['pad thai', 'noodles', 'pasta']):
             technique_query = f"pad thai cooking technique wok stir fry"
+            technique_query2 = f"pad thai preparation cooking process"
         elif any(keyword in query_lower for keyword in ['fusion', 'western']):
             technique_query = f"fusion cooking technique western"
+            technique_query2 = f"fusion cooking preparation method"
         else:
             technique_query = f"{clean_query} cooking technique"
+            technique_query2 = f"{clean_query} preparation method"
         queries.append({
             'query': technique_query,
             'context': 'technique',
             'type': 'technique',
+            'max_results': max(2, num_results // 4)  # More technique images
+        })
+        queries.append({
+            'query': technique_query2,
+            'context': 'technique',
+            'type': 'technique',
+            'max_results': max(1, num_results // 6)  # Additional technique variety
         })
         return queries
             else:
                 type_groups['other'].append(result)
+        # Select diverse results with emphasis on ingredients and techniques
         diverse_results = []
+        # Prioritize: 2 ingredients, 2 techniques, 2 final dishes for better diversity
+        for _ in range(2):  # Get 2 of each type
+            if type_groups['ingredients'] and len(diverse_results) < num_results:
+                diverse_results.append(type_groups['ingredients'].pop(0))
+            if type_groups['technique'] and len(diverse_results) < num_results:
+                diverse_results.append(type_groups['technique'].pop(0))
+            if type_groups['final_dish'] and len(diverse_results) < num_results:
+                diverse_results.append(type_groups['final_dish'].pop(0))
         # Fill remaining slots with other results
         all_remaining = []
         for group in type_groups.values():
+            all_remaining.extend(group)  # Include all remaining results
+        # Sort by quality score if available
+        all_remaining.sort(key=lambda x: x.get('quality_score', 0), reverse=True)
         diverse_results.extend(all_remaining[:num_results - len(diverse_results)])
+        logger.info(f"Prioritized {len(diverse_results)} diverse images: "
+                   f"ingredients={len([r for r in diverse_results if r.get('image_type') == 'ingredients'])}, "
+                   f"technique={len([r for r in diverse_results if r.get('image_type') == 'technique'])}, "
+                   f"final_dish={len([r for r in diverse_results if r.get('image_type') == 'final_dish'])}")
         return diverse_results[:num_results]
     def _validate_image_results(self, results: List[Dict]) -> List[Dict]: