Spaces:
				
			
			
	
			
			
		Build error
		
	
	
	
			
			
	
	
	
	
		
		
		Build error
		
	fix FOV
Browse files- .gitignore +46 -0
- app.py +71 -43
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1,46 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Python build
         | 
| 2 | 
            +
            .eggs/
         | 
| 3 | 
            +
            gradio.egg-info/*
         | 
| 4 | 
            +
            !gradio.egg-info/requires.txt
         | 
| 5 | 
            +
            !gradio.egg-info/PKG-INFO
         | 
| 6 | 
            +
            dist/
         | 
| 7 | 
            +
            *.pyc
         | 
| 8 | 
            +
            __pycache__/
         | 
| 9 | 
            +
            *.py[cod]
         | 
| 10 | 
            +
            *$py.class
         | 
| 11 | 
            +
            build/
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # JS build
         | 
| 14 | 
            +
            gradio/templates/frontend
         | 
| 15 | 
            +
            # Secrets
         | 
| 16 | 
            +
            .env
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            # Gradio run artifacts
         | 
| 19 | 
            +
            *.db
         | 
| 20 | 
            +
            *.sqlite3
         | 
| 21 | 
            +
            gradio/launches.json
         | 
| 22 | 
            +
            flagged/
         | 
| 23 | 
            +
            gradio_cached_examples/
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            # Tests
         | 
| 26 | 
            +
            .coverage
         | 
| 27 | 
            +
            coverage.xml
         | 
| 28 | 
            +
            test.txt
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            # Demos
         | 
| 31 | 
            +
            demo/tmp.zip
         | 
| 32 | 
            +
            demo/files/*.avi
         | 
| 33 | 
            +
            demo/files/*.mp4
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            # Etc
         | 
| 36 | 
            +
            .idea/*
         | 
| 37 | 
            +
            .DS_Store
         | 
| 38 | 
            +
            *.bak
         | 
| 39 | 
            +
            workspace.code-workspace
         | 
| 40 | 
            +
            *.h5
         | 
| 41 | 
            +
            .vscode/
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            # log files
         | 
| 44 | 
            +
            .pnpm-debug.log
         | 
| 45 | 
            +
            venv/
         | 
| 46 | 
            +
            *.db-journal
         | 
    	
        app.py
    CHANGED
    
    | @@ -4,72 +4,100 @@ import torch | |
| 4 | 
             
            import numpy as np
         | 
| 5 | 
             
            from PIL import Image
         | 
| 6 | 
             
            import open3d as o3d
         | 
| 7 | 
            -
             | 
| 8 | 
            -
            torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
         | 
| 9 |  | 
| 10 | 
             
            feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
         | 
| 11 | 
             
            model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
         | 
| 12 |  | 
| 13 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 14 | 
             
                # prepare image for the model
         | 
| 15 | 
             
                encoding = feature_extractor(image, return_tensors="pt")
         | 
| 16 | 
            -
             | 
| 17 | 
             
                # forward pass
         | 
| 18 | 
             
                with torch.no_grad():
         | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
             
                # interpolate to original size
         | 
| 23 | 
             
                prediction = torch.nn.functional.interpolate(
         | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
             
                output = prediction.cpu().numpy()
         | 
| 30 | 
             
                depth_image = (output * 255 / np.max(output)).astype('uint8')
         | 
| 31 | 
            -
                 | 
| 32 | 
            -
                 | 
| 33 | 
            -
                # img = Image.fromarray(formatted)
         | 
| 34 | 
            -
                return "output.gltf"
         | 
| 35 | 
            -
                
         | 
| 36 | 
            -
                # return result
         | 
| 37 |  | 
| 38 | 
            -
                 | 
| 39 |  | 
| 40 | 
            -
             | 
|  | |
| 41 | 
             
                depth_o3d = o3d.geometry.Image(depth_image)
         | 
| 42 | 
             
                image_o3d = o3d.geometry.Image(rgb_image)
         | 
| 43 | 
            -
                rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( | 
| 44 | 
            -
             | 
| 45 | 
            -
                 | 
|  | |
| 46 |  | 
| 47 | 
            -
                FOV = np.pi/4
         | 
| 48 | 
             
                camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
         | 
| 49 | 
            -
                camera_intrinsic.set_intrinsics(w, h,  | 
|  | |
|  | |
|  | |
| 50 |  | 
| 51 | 
            -
                pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image,camera_intrinsic)
         | 
| 52 | 
             
                print('normals')
         | 
| 53 | 
            -
                pcd.normals = o3d.utility.Vector3dVector( | 
| 54 | 
            -
             | 
| 55 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 56 | 
             
                print('run Poisson surface reconstruction')
         | 
| 57 | 
             
                with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
         | 
| 58 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 59 | 
             
                print(mesh)
         | 
| 60 | 
            -
                 | 
| 61 | 
            -
                 | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 70 | 
             
                                 title=title,
         | 
| 71 | 
             
                                 description=description,
         | 
| 72 | 
             
                                 examples=examples,
         | 
| 73 | 
            -
                                 allow_flagging="never" | 
| 74 | 
            -
             | 
| 75 | 
            -
            iface.launch(debug=True)
         | 
|  | |
| 4 | 
             
            import numpy as np
         | 
| 5 | 
             
            from PIL import Image
         | 
| 6 | 
             
            import open3d as o3d
         | 
| 7 | 
            +
            from pathlib import Path
         | 
|  | |
| 8 |  | 
| 9 | 
             
            feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
         | 
| 10 | 
             
            model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
         | 
| 11 |  | 
| 12 | 
            +
             | 
| 13 | 
            +
            def process_image(image_path):
         | 
| 14 | 
            +
                image_path = Path(image_path)
         | 
| 15 | 
            +
                print(image_path)
         | 
| 16 | 
            +
                image = Image.open(image_path)
         | 
| 17 | 
             
                # prepare image for the model
         | 
| 18 | 
             
                encoding = feature_extractor(image, return_tensors="pt")
         | 
| 19 | 
            +
             | 
| 20 | 
             
                # forward pass
         | 
| 21 | 
             
                with torch.no_grad():
         | 
| 22 | 
            +
                    outputs = model(**encoding)
         | 
| 23 | 
            +
                    predicted_depth = outputs.predicted_depth
         | 
| 24 | 
            +
             | 
| 25 | 
             
                # interpolate to original size
         | 
| 26 | 
             
                prediction = torch.nn.functional.interpolate(
         | 
| 27 | 
            +
                    predicted_depth.unsqueeze(1),
         | 
| 28 | 
            +
                    size=image.size[::-1],
         | 
| 29 | 
            +
                    mode="bicubic",
         | 
| 30 | 
            +
                    align_corners=False,
         | 
| 31 | 
            +
                ).squeeze()
         | 
| 32 | 
             
                output = prediction.cpu().numpy()
         | 
| 33 | 
             
                depth_image = (output * 255 / np.max(output)).astype('uint8')
         | 
| 34 | 
            +
                gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
         | 
| 35 | 
            +
                img = Image.fromarray(depth_image)
         | 
|  | |
|  | |
|  | |
|  | |
| 36 |  | 
| 37 | 
            +
                return [img, gltf_path, gltf_path]
         | 
| 38 |  | 
| 39 | 
            +
             | 
| 40 | 
            +
            def create_3d_obj(rgb_image, depth_image, image_path):
         | 
| 41 | 
             
                depth_o3d = o3d.geometry.Image(depth_image)
         | 
| 42 | 
             
                image_o3d = o3d.geometry.Image(rgb_image)
         | 
| 43 | 
            +
                rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
         | 
| 44 | 
            +
                    image_o3d, depth_o3d, convert_rgb_to_intensity=False)
         | 
| 45 | 
            +
                w = int(depth_image.shape[1])
         | 
| 46 | 
            +
                h = int(depth_image.shape[0])
         | 
| 47 |  | 
|  | |
| 48 | 
             
                camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
         | 
| 49 | 
            +
                camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
         | 
| 52 | 
            +
                    rgbd_image, camera_intrinsic)
         | 
| 53 |  | 
|  | |
| 54 | 
             
                print('normals')
         | 
| 55 | 
            +
                pcd.normals = o3d.utility.Vector3dVector(
         | 
| 56 | 
            +
                    np.zeros((1, 3)))  # invalidate existing normals
         | 
| 57 | 
            +
                pcd.estimate_normals(
         | 
| 58 | 
            +
                    search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
         | 
| 59 | 
            +
                pcd.transform([[1, 0, 0, 0],
         | 
| 60 | 
            +
                            [0, -1, 0, 0],
         | 
| 61 | 
            +
                            [0, 0, 1, 0],
         | 
| 62 | 
            +
                            [0, 0, 0, 1]])
         | 
| 63 | 
            +
             | 
| 64 | 
            +
             | 
| 65 | 
             
                print('run Poisson surface reconstruction')
         | 
| 66 | 
             
                with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
         | 
| 67 | 
            +
                    mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
         | 
| 68 | 
            +
                        pcd, depth=10, width=0, scale=1.1, linear_fit=True)
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 128
         | 
| 71 | 
            +
                print(f'voxel_size = {voxel_size:e}')
         | 
| 72 | 
            +
                mesh = mesh_raw.simplify_vertex_clustering(
         | 
| 73 | 
            +
                    voxel_size=voxel_size,
         | 
| 74 | 
            +
                    contraction=o3d.geometry.SimplificationContraction.Average)
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                # vertices_to_remove = densities < np.quantile(densities, 0.001)
         | 
| 77 | 
            +
                # mesh.remove_vertices_by_mask(vertices_to_remove)
         | 
| 78 | 
            +
                bbox = pcd.get_axis_aligned_bounding_box()
         | 
| 79 | 
            +
                mesh_crop = mesh.crop(bbox)
         | 
| 80 | 
             
                print(mesh)
         | 
| 81 | 
            +
                gltf_path = f'./{image_path.stem}.gltf'
         | 
| 82 | 
            +
                o3d.io.write_triangle_mesh(
         | 
| 83 | 
            +
                    gltf_path, mesh_crop, write_triangle_uvs=True)
         | 
| 84 | 
            +
                return gltf_path
         | 
| 85 | 
            +
             | 
| 86 | 
            +
             | 
| 87 | 
            +
            title = "Demo: zero-shot depth estimation with DPT + 3D Point Cloud"
         | 
| 88 | 
            +
            description = "This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object."
         | 
| 89 | 
            +
            examples = [['./examples/jonathan-borba-CgWTqYxHEkg-unsplash.jpeg'],
         | 
| 90 | 
            +
                        ['./examples/amber-kipp-75715CVEJhI-unsplash.jpeg']]
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            iface = gr.Interface(fn=process_image,
         | 
| 93 | 
            +
                                 inputs=[gr.inputs.Image(
         | 
| 94 | 
            +
                                     type="filepath", label="Input Image")],
         | 
| 95 | 
            +
                                 outputs=[gr.outputs.Image(label="predicted depth", type="pil"),
         | 
| 96 | 
            +
                                          gr.outputs.Image3D(label="3d mesh reconstruction", clear_color=[
         | 
| 97 | 
            +
                                                             1.0, 1.0, 1.0, 1.0]),
         | 
| 98 | 
            +
                                          gr.outputs.File(label="3d gLTF")],
         | 
| 99 | 
             
                                 title=title,
         | 
| 100 | 
             
                                 description=description,
         | 
| 101 | 
             
                                 examples=examples,
         | 
| 102 | 
            +
                                 allow_flagging="never")
         | 
| 103 | 
            +
            iface.launch(debug=True, enable_queue=True, cache_examples=True)
         | 
|  | 
