Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Commit 
							
							·
						
						776d5b3
	
1
								Parent(s):
							
							e03a824
								
add
Browse files
    	
        app.py
    CHANGED
    
    | @@ -15,6 +15,7 @@ from shap_e.models.download import load_model, load_config | |
| 15 | 
             
            from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
         | 
| 16 | 
             
            import spaces
         | 
| 17 | 
             
            from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
         | 
|  | |
| 18 |  | 
| 19 | 
             
            from src.utils.train_util import instantiate_from_config
         | 
| 20 | 
             
            from src.utils.camera_util import (
         | 
| @@ -27,15 +28,14 @@ from src.utils.mesh_util import save_obj, save_glb | |
| 27 | 
             
            from src.utils.infer_util import remove_background, resize_foreground
         | 
| 28 |  | 
| 29 | 
             
            def create_custom_cameras(size: int, device: torch.device, azimuths: list, elevations: list, 
         | 
| 30 | 
            -
                                      fov_degrees: float,distance) -> DifferentiableCameraBatch:
         | 
| 31 | 
             
                # Object is in a 2x2x2 bounding box (-1 to 1 in each dimension)
         | 
| 32 | 
            -
                object_diagonal = | 
| 33 |  | 
| 34 | 
             
                # Calculate radius based on object size and FOV
         | 
| 35 | 
             
                fov_radians = math.radians(fov_degrees)
         | 
| 36 | 
             
                radius = (object_diagonal / 2) / math.tan(fov_radians / 2)  # Correct radius calculation
         | 
| 37 | 
            -
                 | 
| 38 | 
            -
                # exit(0)
         | 
| 39 | 
             
                origins = []
         | 
| 40 | 
             
                xs = []
         | 
| 41 | 
             
                ys = []
         | 
| @@ -75,8 +75,6 @@ def create_custom_cameras(size: int, device: torch.device, azimuths: list, eleva | |
| 75 | 
             
                    ),
         | 
| 76 | 
             
                )
         | 
| 77 |  | 
| 78 | 
            -
             | 
| 79 | 
            -
            @spaces.GPU(duration=60)
         | 
| 80 | 
             
            def load_models():
         | 
| 81 | 
             
                """Initialize and load all required models"""
         | 
| 82 | 
             
                config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
         | 
| @@ -231,15 +229,22 @@ def create_mesh(refined_image, model, infer_config): | |
| 231 |  | 
| 232 | 
             
            class ShapERenderer:
         | 
| 233 | 
             
                def __init__(self, device):
         | 
| 234 | 
            -
                    print(" | 
| 235 | 
             
                    self.device = device
         | 
| 236 | 
            -
                    self.xm =  | 
| 237 | 
            -
                    self.model =  | 
| 238 | 
            -
                    self.diffusion =  | 
| 239 | 
            -
                    print("Shap-E models  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 240 |  | 
| 241 | 
            -
                @spaces.GPU(duration=60)
         | 
| 242 | 
             
                def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
         | 
|  | |
|  | |
| 243 | 
             
                    # Generate latents using the text-to-3D model
         | 
| 244 | 
             
                    batch_size = 1
         | 
| 245 | 
             
                    guidance_scale = float(guidance_scale)
         | 
| @@ -272,13 +277,13 @@ class ShapERenderer: | |
| 272 | 
             
                        rendered_image = decode_latent_images(
         | 
| 273 | 
             
                            self.xm,
         | 
| 274 | 
             
                            latents[0],
         | 
| 275 | 
            -
                             | 
| 276 | 
            -
                             | 
| 277 | 
             
                        )
         | 
| 278 | 
            -
                        images.append(rendered_image | 
| 279 |  | 
| 280 | 
             
                    # Convert images to uint8
         | 
| 281 | 
            -
                    images = [(image) | 
| 282 |  | 
| 283 | 
             
                    # Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
         | 
| 284 | 
             
                    layout = np.zeros((960, 640, 3), dtype=np.uint8)
         | 
| @@ -292,12 +297,19 @@ class ShapERenderer: | |
| 292 | 
             
            class RefinerInterface:
         | 
| 293 | 
             
                def __init__(self):
         | 
| 294 | 
             
                    print("Initializing InstantMesh models...")
         | 
| 295 | 
            -
                    self.pipeline | 
| 296 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 297 |  | 
| 298 | 
            -
                @spaces.GPU(duration=65)
         | 
| 299 | 
             
                def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
         | 
| 300 | 
             
                    """Main refinement function"""
         | 
|  | |
|  | |
| 301 | 
             
                    # Process image and get refined output
         | 
| 302 | 
             
                    input_image = Image.fromarray(input_image)
         | 
| 303 |  | 
| @@ -434,11 +446,13 @@ def create_demo(): | |
| 434 | 
             
                        )
         | 
| 435 |  | 
| 436 | 
             
                    # Set up event handlers
         | 
|  | |
| 437 | 
             
                    def generate(prompt, guidance_scale, num_steps):
         | 
| 438 | 
             
                        with torch.no_grad():
         | 
| 439 | 
             
                            layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
         | 
| 440 | 
             
                        return layout
         | 
| 441 |  | 
|  | |
| 442 | 
             
                    def refine(input_image, prompt, steps, guidance_scale):
         | 
| 443 | 
             
                        refined_img, mesh_path = refiner.refine_model(
         | 
| 444 | 
             
                            input_image, 
         | 
|  | |
| 15 | 
             
            from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
         | 
| 16 | 
             
            import spaces
         | 
| 17 | 
             
            from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
         | 
| 18 | 
            +
            import math
         | 
| 19 |  | 
| 20 | 
             
            from src.utils.train_util import instantiate_from_config
         | 
| 21 | 
             
            from src.utils.camera_util import (
         | 
|  | |
| 28 | 
             
            from src.utils.infer_util import remove_background, resize_foreground
         | 
| 29 |  | 
| 30 | 
             
            def create_custom_cameras(size: int, device: torch.device, azimuths: list, elevations: list, 
         | 
| 31 | 
            +
                                      fov_degrees: float, distance: float) -> DifferentiableCameraBatch:
         | 
| 32 | 
             
                # Object is in a 2x2x2 bounding box (-1 to 1 in each dimension)
         | 
| 33 | 
            +
                object_diagonal = distance # Correct diagonal calculation for the cube
         | 
| 34 |  | 
| 35 | 
             
                # Calculate radius based on object size and FOV
         | 
| 36 | 
             
                fov_radians = math.radians(fov_degrees)
         | 
| 37 | 
             
                radius = (object_diagonal / 2) / math.tan(fov_radians / 2)  # Correct radius calculation
         | 
| 38 | 
            +
                
         | 
|  | |
| 39 | 
             
                origins = []
         | 
| 40 | 
             
                xs = []
         | 
| 41 | 
             
                ys = []
         | 
|  | |
| 75 | 
             
                    ),
         | 
| 76 | 
             
                )
         | 
| 77 |  | 
|  | |
|  | |
| 78 | 
             
            def load_models():
         | 
| 79 | 
             
                """Initialize and load all required models"""
         | 
| 80 | 
             
                config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
         | 
|  | |
| 229 |  | 
| 230 | 
             
            class ShapERenderer:
         | 
| 231 | 
             
                def __init__(self, device):
         | 
| 232 | 
            +
                    print("Initializing Shap-E models...")
         | 
| 233 | 
             
                    self.device = device
         | 
| 234 | 
            +
                    self.xm = None
         | 
| 235 | 
            +
                    self.model = None
         | 
| 236 | 
            +
                    self.diffusion = None
         | 
| 237 | 
            +
                    print("Shap-E models initialized!")
         | 
| 238 | 
            +
                
         | 
| 239 | 
            +
                def ensure_models_loaded(self):
         | 
| 240 | 
            +
                    if self.model is None:
         | 
| 241 | 
            +
                        self.xm = load_model('transmitter', device=self.device)
         | 
| 242 | 
            +
                        self.model = load_model('text300M', device=self.device)
         | 
| 243 | 
            +
                        self.diffusion = diffusion_from_config(load_config('diffusion'))
         | 
| 244 |  | 
|  | |
| 245 | 
             
                def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
         | 
| 246 | 
            +
                    self.ensure_models_loaded()
         | 
| 247 | 
            +
                    
         | 
| 248 | 
             
                    # Generate latents using the text-to-3D model
         | 
| 249 | 
             
                    batch_size = 1
         | 
| 250 | 
             
                    guidance_scale = float(guidance_scale)
         | 
|  | |
| 277 | 
             
                        rendered_image = decode_latent_images(
         | 
| 278 | 
             
                            self.xm,
         | 
| 279 | 
             
                            latents[0],
         | 
| 280 | 
            +
                            cameras=cameras,
         | 
| 281 | 
            +
                            rendering_mode='stf'
         | 
| 282 | 
             
                        )
         | 
| 283 | 
            +
                        images.append(rendered_image[0])
         | 
| 284 |  | 
| 285 | 
             
                    # Convert images to uint8
         | 
| 286 | 
            +
                    images = [np.array(image) for image in images]
         | 
| 287 |  | 
| 288 | 
             
                    # Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
         | 
| 289 | 
             
                    layout = np.zeros((960, 640, 3), dtype=np.uint8)
         | 
|  | |
| 297 | 
             
            class RefinerInterface:
         | 
| 298 | 
             
                def __init__(self):
         | 
| 299 | 
             
                    print("Initializing InstantMesh models...")
         | 
| 300 | 
            +
                    self.pipeline = None
         | 
| 301 | 
            +
                    self.model = None
         | 
| 302 | 
            +
                    self.infer_config = None
         | 
| 303 | 
            +
                    print("InstantMesh models initialized!")
         | 
| 304 | 
            +
                
         | 
| 305 | 
            +
                def ensure_models_loaded(self):
         | 
| 306 | 
            +
                    if self.pipeline is None:
         | 
| 307 | 
            +
                        self.pipeline, self.model, self.infer_config = load_models()
         | 
| 308 |  | 
|  | |
| 309 | 
             
                def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
         | 
| 310 | 
             
                    """Main refinement function"""
         | 
| 311 | 
            +
                    self.ensure_models_loaded()
         | 
| 312 | 
            +
                    
         | 
| 313 | 
             
                    # Process image and get refined output
         | 
| 314 | 
             
                    input_image = Image.fromarray(input_image)
         | 
| 315 |  | 
|  | |
| 446 | 
             
                        )
         | 
| 447 |  | 
| 448 | 
             
                    # Set up event handlers
         | 
| 449 | 
            +
                    @spaces.GPU(duration=60)
         | 
| 450 | 
             
                    def generate(prompt, guidance_scale, num_steps):
         | 
| 451 | 
             
                        with torch.no_grad():
         | 
| 452 | 
             
                            layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
         | 
| 453 | 
             
                        return layout
         | 
| 454 |  | 
| 455 | 
            +
                    @spaces.GPU(duration=60)
         | 
| 456 | 
             
                    def refine(input_image, prompt, steps, guidance_scale):
         | 
| 457 | 
             
                        refined_img, mesh_path = refiner.refine_model(
         | 
| 458 | 
             
                            input_image, 
         | 
    	
        app2.py
    CHANGED
    
    | @@ -12,7 +12,8 @@ from einops import rearrange | |
| 12 | 
             
            from shap_e.diffusion.sample import sample_latents
         | 
| 13 | 
             
            from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
         | 
| 14 | 
             
            from shap_e.models.download import load_model, load_config
         | 
| 15 | 
            -
            from shap_e.util.notebooks import create_pan_cameras, decode_latent_images | 
|  | |
| 16 |  | 
| 17 | 
             
            from src.utils.train_util import instantiate_from_config
         | 
| 18 | 
             
            from src.utils.camera_util import (
         | 
|  | |
| 12 | 
             
            from shap_e.diffusion.sample import sample_latents
         | 
| 13 | 
             
            from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
         | 
| 14 | 
             
            from shap_e.models.download import load_model, load_config
         | 
| 15 | 
            +
            from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
         | 
| 16 | 
            +
            from util import create_custom_cameras
         | 
| 17 |  | 
| 18 | 
             
            from src.utils.train_util import instantiate_from_config
         | 
| 19 | 
             
            from src.utils.camera_util import (
         |