TorchTransformers-CV-SFT

Running

App Files Files Community

awacke1 commited on Mar 20

Commit

adeee4c

verified ·

1 Parent(s): 27fb02e

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -100

app.py CHANGED Viewed

@@ -7,8 +7,8 @@ import time
 from dataclasses import dataclass
 import zipfile
 import logging
-import av
-from streamlit_webrtc import webrtc_streamer, VideoProcessorBase, WebRtcMode
 # Logging setup
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -133,7 +133,6 @@ class DiffusionBuilder:
     def fine_tune(self, images, texts):
         try:
             import torch
-            from PIL import Image
             import numpy as np
             logger.info("Starting diffusion fine-tuning")
             optimizer = torch.optim.AdamW(self.pipeline.unet.parameters(), lr=1e-5)
@@ -189,32 +188,162 @@ def zip_files(files, zip_name):
             zipf.write(file, os.path.basename(file))
     return zip_name
-# Video Processor for WebRTC
-class CameraProcessor(VideoProcessorBase):
-    def __init__(self):
-        self.frame = None
-    def recv(self, frame):
-        from PIL import Image
-        img = frame.to_image()
-        self.frame = img
-        return av.VideoFrame.from_image(img)
-    def capture_frame(self):
-        from PIL import Image
-        return self.frame
-    def capture_video(self):
-        from PIL import Image
-        frames = []
-        start_time = time.time()
-        while time.time() - start_time < 10 and self.frame:
-            frames.append(np.array(self.frame))
-            time.sleep(0.033)  # ~30 FPS
-        return frames
 # Main App
-st.title("SFT Tiny Titans 🚀 (Dual Cam Action!)")
 # Sidebar Galleries
 st.sidebar.header("Captured Media 🎨")
@@ -227,7 +356,6 @@ def update_gallery():
             for idx, file in enumerate(media_files[:4]):
                 with cols[idx % 2]:
                     if file.endswith(".png"):
-                        from PIL import Image
                         st.image(Image.open(file), caption=file.split('/')[-1], use_container_width=True)
                     elif file.endswith(".mp4"):
                         st.video(file)
@@ -273,77 +401,26 @@ with tab1:
 with tab2:
     st.header("Camera Snap 📷 (Dual Live Feed!)")
     cols = st.columns(2)
-    processors = {}
     for i in range(2):
         with cols[i]:
-            st.subheader(f"Camera {i}")
-            key = f"camera_{i}"
-            processors[key] = webrtc_streamer(
-                key=key,
-                mode=WebRtcMode.SENDRECV,
-                video_processor_factory=CameraProcessor,
-                frontend_rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
-            )
-            if st.button(f"Capture Frame 📸 Cam {i}", key=f"snap_{i}"):
-                logger.info(f"Capturing frame from Camera {i}")
-                try:
-                    if processors[key].video_processor and processors[key].video_processor.frame:
-                        snapshot = processors[key].video_processor.capture_frame()
-                        filename = generate_filename(i)
-                        snapshot.save(filename)
-                        st.image(snapshot, caption=filename, use_container_width=True)
-                        logger.info(f"Saved snapshot: {filename}")
-                        if 'captured_images' not in st.session_state:
-                            st.session_state['captured_images'] = []
-                        st.session_state['captured_images'].append(filename)
-                        update_gallery()
-                    else:
-                        st.error("No frame available!")
-                        logger.error(f"No frame captured from Camera {i}")
-                except Exception as e:
-                    st.error(f"Frame capture failed: {str(e)}")
-                    logger.error(f"Error capturing frame: {str(e)}")
-            if st.button(f"Capture Video 🎥 Cam {i}", key=f"rec_{i}"):
-                logger.info(f"Capturing 10s video from Camera {i}")
-                try:
-                    if processors[key].video_processor:
-                        frames = processors[key].video_processor.capture_video()
-                        if frames:
-                            mp4_filename = generate_filename(i, "mp4")
-                            with av.open(mp4_filename, "w") as container:
-                                stream = container.add_stream("h264", rate=30)
-                                stream.width = frames[0].shape[1]
-                                stream.height = frames[0].shape[0]
-                                for frame in frames:
-                                    av_frame = av.VideoFrame.from_ndarray(frame, format="rgb24")
-                                    for packet in stream.encode(av_frame):
-                                        container.mux(packet)
-                                for packet in stream.encode():
-                                    container.mux(packet)
-                            st.video(mp4_filename)
-                            logger.info(f"Saved video: {mp4_filename}")
-                            sliced_images = []
-                            step = max(1, len(frames) // 10)
-                            for j in range(0, len(frames), step):
-                                if len(sliced_images) < 10:
-                                    img = Image.fromarray(frames[j])
-                                    img_filename = generate_filename(f"{i}_{len(sliced_images)}")
-                                    img.save(img_filename)
-                                    sliced_images.append(img_filename)
-                                    st.image(img, caption=img_filename, use_container_width=True)
-                            st.session_state['captured_images'] = st.session_state.get('captured_images', []) + sliced_images
-                            logger.info(f"Sliced video into {len(sliced_images)} images")
-                            update_gallery()
-                        else:
-                            st.error("No frames recorded!")
-                            logger.error("No frames captured during video recording")
-                    else:
-                        st.error("Camera processor not initialized!")
-                        logger.error(f"Processor not ready for Camera {i}")
-                except Exception as e:
-                    st.error(f"Video capture failed: {str(e)}")
-                    logger.error(f"Error capturing video: {str(e)}")
 with tab3:
     st.header("Fine-Tune Titans 🔧 (Tune Fast!)")
@@ -370,14 +447,13 @@ with tab3:
                 if st.button("Tune CV 🔄"):
                     logger.info("Initiating CV fine-tune")
                     try:
-                        from PIL import Image
                         images = [Image.open(img) for img in captured_images]
                         st.session_state['builder'].fine_tune(images, texts)
                         st.success("CV polished! 🎉")
                     except Exception as e:
                         st.error(f"CV fine-tune failed: {str(e)}")
             else:
-                st.warning("Capture at least 2 images first! ⚠️")
 with tab4:
     st.header("Test Titans 🧪 (Image Agent Demo!)")
@@ -401,7 +477,6 @@ with tab4:
                 if st.button("Run CV Demo ▶️"):
                     logger.info("Running CV image set demo")
                     try:
-                        from PIL import Image
                         images = [Image.open(img) for img in captured_images[:10]]
                         prompts = ["Neon " + os.path.basename(img).split('.')[0] for img in captured_images[:10]]
                         generated_images = []
@@ -425,7 +500,7 @@ with tab4:
                         st.error(f"CV demo failed: {str(e)}")
                         logger.error(f"Error in CV demo: {str(e)}")
             else:
-                st.warning("Capture at least 2 images first! ⚠️")
 # Display Logs
 st.sidebar.subheader("Action Logs 📜")

 from dataclasses import dataclass
 import zipfile
 import logging
+from streamlit.components.v1 import html
+from PIL import Image
 # Logging setup
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
     def fine_tune(self, images, texts):
         try:
             import torch
             import numpy as np
             logger.info("Starting diffusion fine-tuning")
             optimizer = torch.optim.AdamW(self.pipeline.unet.parameters(), lr=1e-5)
             zipf.write(file, os.path.basename(file))
     return zip_name
+# JavaScript/HTML Components
+camera_selector_html = """
+<div>
+  <h3>Camera & Audio Source Selector</h3>
+  <select id="videoSource"></select>
+  <select id="audioSource"></select>
+  <button onclick="startStream()">Start Stream</button>
+  <video id="video" autoplay playsinline style="width: 100%;"></video>
+</div>
+<script>
+  const videoSource = document.getElementById('videoSource');
+  const audioSource = document.getElementById('audioSource');
+  const video = document.getElementById('video');
+  let stream;
+  navigator.mediaDevices.enumerateDevices().then(devices => {
+    devices.forEach(device => {
+      const option = document.createElement('option');
+      option.value = device.deviceId;
+      option.text = device.label || `${device.kind} ${device.deviceId}`;
+      if (device.kind === 'videoinput') {
+        videoSource.appendChild(option);
+      } else if (device.kind === 'audioinput') {
+        audioSource.appendChild(option);
+      }
+    });
+  }).catch(err => console.error('Error enumerating devices:', err));
+  function startStream() {
+    if (stream) {
+      stream.getTracks().forEach(track => track.stop());
+    }
+    const constraints = {
+      video: { deviceId: videoSource.value ? { exact: videoSource.value } : undefined },
+      audio: { deviceId: audioSource.value ? { exact: audioSource.value } : undefined }
+    };
+    navigator.mediaDevices.getUserMedia(constraints)
+      .then(mediaStream => {
+        stream = mediaStream;
+        video.srcObject = stream;
+        console.log('Stream started');
+      })
+      .catch(err => console.error('Error starting stream:', err));
+  }
+</script>
+"""
+image_capture_html = """
+<div>
+  <h3>Image Capture - Camera {id}</h3>
+  <video id="video{id}" autoplay playsinline style="width: 100%;"></video>
+  <button onclick="captureFrame{id}()">Capture Frame 📸</button>
+  <canvas id="canvas{id}" style="display: none;"></canvas>
+</div>
+<script>
+  const video{id} = document.getElementById('video{id}');
+  const canvas{id} = document.getElementById('canvas{id}');
+  let stream{id};
+  navigator.mediaDevices.getUserMedia({ video: true })
+    .then(mediaStream => {
+      stream{id} = mediaStream;
+      video{id}.srcObject = stream{id};
+      console.log('Camera {id} stream started');
+    })
+    .catch(err => console.error('Error starting Camera {id}:', err));
+  function captureFrame{id}() {
+    canvas{id}.width = video{id}.videoWidth;
+    canvas{id}.height = video{id}.videoHeight;
+    const ctx = canvas{id}.getContext('2d');
+    ctx.drawImage(video{id}, 0, 0, canvas{id}.width, canvas{id}.height);
+    const dataUrl = canvas{id}.toDataURL('image/png');
+    const filename = `{id}${new Date().toISOString().replace(/[^0-9]/g, '')}.png`;
+    const link = document.createElement('a');
+    link.href = dataUrl;
+    link.download = filename;
+    link.click();
+    console.log('Captured frame:', filename);
+  }
+</script>
+"""
+video_capture_html = """
+<div>
+  <h3>Video Capture - Camera {id}</h3>
+  <video id="video{id}" autoplay playsinline style="width: 100%;"></video>
+  <button onclick="captureVideo{id}()">Capture Video 🎥</button>
+  <canvas id="canvas{id}" style="display: none;"></canvas>
+</div>
+<script>
+  const video{id} = document.getElementById('video{id}');
+  const canvas{id} = document.getElementById('canvas{id}');
+  let stream{id}, recorder{id};
+  navigator.mediaDevices.getUserMedia({ video: true, audio: true })
+    .then(mediaStream => {
+      stream{id} = mediaStream;
+      video{id}.srcObject = stream{id};
+      recorder{id} = new MediaRecorder(stream{id});
+      const chunks = [];
+      recorder{id}.ondataavailable = e => chunks.push(e.data);
+      recorder{id}.onstop = () => {
+        const blob = new Blob(chunks, { type: 'video/mp4' });
+        const filename = `{id}${new Date().toISOString().replace(/[^0-9]/g, '')}.mp4`;
+        const url = URL.createObjectURL(blob);
+        const link = document.createElement('a');
+        link.href = url;
+        link.download = filename;
+        link.click();
+        console.log('Captured video:', filename);
+        sliceVideo{id}(blob);
+      };
+      console.log('Camera {id} stream started');
+    })
+    .catch(err => console.error('Error starting Camera {id}:', err));
+  function captureVideo{id}() {
+    recorder{id}.start();
+    setTimeout(() => recorder{id}.stop(), 10000); // 10 seconds
+    console.log('Recording started for Camera {id}');
+  }
+  function sliceVideo{id}(blob) {
+    const video = document.createElement('video');
+    video.src = URL.createObjectURL(blob);
+    video.onloadedmetadata = () => {
+      const ctx = canvas{id}.getContext('2d');
+      canvas{id}.width = video.videoWidth;
+      canvas{id}.height = video.videoHeight;
+      let frameCount = 0;
+      const interval = video.duration / 10;
+      video.currentTime = 0;
+      const captureFrame = () => {
+        if (frameCount < 10) {
+          ctx.drawImage(video, 0, 0, canvas{id}.width, canvas{id}.height);
+          const dataUrl = canvas{id}.toDataURL('image/png');
+          const filename = `{id}${new Date().toISOString().replace(/[^0-9]/g, '')}_${frameCount}.png`;
+          const link = document.createElement('a');
+          link.href = dataUrl;
+          link.download = filename;
+          link.click();
+          console.log('Captured frame:', filename);
+          frameCount++;
+          video.currentTime += interval;
+          setTimeout(captureFrame, 100);
+        }
+      };
+      video.play().then(captureFrame);
+    };
+  }
+</script>
+"""
 # Main App
+st.title("SFT Tiny Titans 🚀 (Web Cam Action!)")
 # Sidebar Galleries
 st.sidebar.header("Captured Media 🎨")
             for idx, file in enumerate(media_files[:4]):
                 with cols[idx % 2]:
                     if file.endswith(".png"):
                         st.image(Image.open(file), caption=file.split('/')[-1], use_container_width=True)
                     elif file.endswith(".mp4"):
                         st.video(file)
 with tab2:
     st.header("Camera Snap 📷 (Dual Live Feed!)")
+    st.subheader("Source Configuration")
+    html(camera_selector_html, height=400)
     cols = st.columns(2)
     for i in range(2):
         with cols[i]:
+            html(image_capture_html.format(id=i), height=300)
+            html(video_capture_html.format(id=i), height=300)
+    st.subheader("Upload Captured Files")
+    uploaded_files = st.file_uploader("Upload PNGs/MP4s from Downloads", type=["png", "mp4"], accept_multiple_files=True)
+    if uploaded_files:
+        for file in uploaded_files:
+            filename = file.name
+            with open(filename, "wb") as f:
+                f.write(file.read())
+            logger.info(f"Saved uploaded file: {filename}")
+            if filename.endswith(".png"):
+                st.image(Image.open(filename), caption=filename, use_container_width=True)
+            elif filename.endswith(".mp4"):
+                st.video(filename)
+        update_gallery()
 with tab3:
     st.header("Fine-Tune Titans 🔧 (Tune Fast!)")
                 if st.button("Tune CV 🔄"):
                     logger.info("Initiating CV fine-tune")
                     try:
                         images = [Image.open(img) for img in captured_images]
                         st.session_state['builder'].fine_tune(images, texts)
                         st.success("CV polished! 🎉")
                     except Exception as e:
                         st.error(f"CV fine-tune failed: {str(e)}")
             else:
+                st.warning("Upload at least 2 PNGs in Camera Snap first! ⚠️")
 with tab4:
     st.header("Test Titans 🧪 (Image Agent Demo!)")
                 if st.button("Run CV Demo ▶️"):
                     logger.info("Running CV image set demo")
                     try:
                         images = [Image.open(img) for img in captured_images[:10]]
                         prompts = ["Neon " + os.path.basename(img).split('.')[0] for img in captured_images[:10]]
                         generated_images = []
                         st.error(f"CV demo failed: {str(e)}")
                         logger.error(f"Error in CV demo: {str(e)}")
             else:
+                st.warning("Upload at least 2 PNGs in Camera Snap first! ⚠️")
 # Display Logs
 st.sidebar.subheader("Action Logs 📜")