Spaces:
Sleeping
Sleeping
Commit
·
5129aaa
1
Parent(s):
3a8d69b
moved code to earthview.py (and other things)
Browse files- app.py +42 -44
- earthview.py +75 -9
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from datasets import load_dataset
|
| 2 |
from functools import partial
|
| 3 |
from pandas import DataFrame
|
| 4 |
import earthview as ev
|
|
@@ -6,73 +6,70 @@ import gradio as gr
|
|
| 6 |
import tqdm
|
| 7 |
import os
|
| 8 |
|
| 9 |
-
DEBUG = False
|
| 10 |
|
| 11 |
-
if DEBUG:
|
| 12 |
import numpy as np
|
| 13 |
|
| 14 |
-
def open_dataset(dataset,
|
| 15 |
-
if shard == -1:
|
| 16 |
-
# Trick to open the whole dataset
|
| 17 |
-
data_files = None
|
| 18 |
-
shards = 100
|
| 19 |
-
else:
|
| 20 |
-
config = ev.sets[set_name].get("config", set_name)
|
| 21 |
-
shards = ev.sets[set_name]["shards"]
|
| 22 |
-
path = ev.sets[set_name].get("path", set_name)
|
| 23 |
-
data_files = {"train":[f"{path}/{split}-{shard:05d}-of-{shards:05d}.parquet"]}
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
else:
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
| 38 |
|
| 39 |
-
state["
|
| 40 |
state["dsi"] = dsi
|
| 41 |
return (
|
| 42 |
-
gr.update(label=f"
|
| 43 |
-
*get_images(batch_size, state),
|
| 44 |
state
|
| 45 |
)
|
| 46 |
|
| 47 |
-
def get_images(batch_size, state):
|
| 48 |
-
|
| 49 |
|
| 50 |
images = []
|
| 51 |
metadatas = []
|
| 52 |
|
| 53 |
for i in tqdm.trange(batch_size, desc=f"Getting images"):
|
| 54 |
-
if DEBUG:
|
| 55 |
images.append(np.random.randint(0,255,(384,384,3)))
|
| 56 |
-
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
else:
|
| 60 |
try:
|
| 61 |
item = next(state["dsi"])
|
| 62 |
except StopIteration:
|
| 63 |
break
|
| 64 |
metadata = item["metadata"]
|
| 65 |
-
item = ev.item_to_images(
|
| 66 |
|
| 67 |
-
if
|
| 68 |
images.extend(item["rgb"])
|
| 69 |
-
|
| 70 |
-
|
|
|
|
| 71 |
images.extend(item["10m"])
|
| 72 |
-
if
|
| 73 |
images.extend(item["rgb"])
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
| 76 |
metadatas.append(item["metadata"])
|
| 77 |
|
| 78 |
return images, DataFrame(metadatas)
|
|
@@ -84,7 +81,7 @@ def new_state():
|
|
| 84 |
return gr.State({})
|
| 85 |
|
| 86 |
if __name__ == "__main__":
|
| 87 |
-
with gr.Blocks(title="
|
| 88 |
state = new_state()
|
| 89 |
|
| 90 |
gr.Markdown(f"# Viewer for [{ev.DATASET}](https://huggingface.co/datasets/satellogic/EarthView) Dataset")
|
|
@@ -101,13 +98,14 @@ if __name__ == "__main__":
|
|
| 101 |
|
| 102 |
with gr.Row():
|
| 103 |
dataset = gr.Textbox(label="Dataset", value=ev.DATASET, interactive=False)
|
| 104 |
-
|
| 105 |
split = gr.Textbox(label="Split", value="train")
|
| 106 |
initial_shard = gr.Number(label = "Initial shard", value=10, info="-1 for whole dataset")
|
|
|
|
| 107 |
|
| 108 |
gr.Button("Load (minutes)").click(
|
| 109 |
open_dataset,
|
| 110 |
-
inputs=[dataset,
|
| 111 |
outputs=[shard, gallery, table, state])
|
| 112 |
|
| 113 |
gallery.render()
|
|
@@ -125,11 +123,11 @@ if __name__ == "__main__":
|
|
| 125 |
shard.render()
|
| 126 |
shard.release(
|
| 127 |
open_dataset,
|
| 128 |
-
inputs=[dataset,
|
| 129 |
outputs=[shard, gallery, table, state])
|
| 130 |
|
| 131 |
btn = gr.Button("Next Batch (same shard)", scale=0)
|
| 132 |
-
btn.click(get_images, [batch_size, state], [gallery, table])
|
| 133 |
btn.click()
|
| 134 |
|
| 135 |
table.render()
|
|
|
|
| 1 |
+
from datasets import load_dataset
|
| 2 |
from functools import partial
|
| 3 |
from pandas import DataFrame
|
| 4 |
import earthview as ev
|
|
|
|
| 6 |
import tqdm
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
DEBUG = "samples" # False, "random", "samples"
|
| 10 |
|
| 11 |
+
if DEBUG == "random":
|
| 12 |
import numpy as np
|
| 13 |
|
| 14 |
+
def open_dataset(dataset, subset, split, batch_size, shard, only_rgb, state):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
nshards = ev.get_nshards(subset)
|
| 17 |
+
|
| 18 |
+
if shard == -1:
|
| 19 |
+
shards = None
|
| 20 |
else:
|
| 21 |
+
shards = [shard]
|
| 22 |
+
|
| 23 |
+
if DEBUG == "random":
|
| 24 |
+
ds = range(batch_size)
|
| 25 |
+
elif DEBUG == "samples":
|
| 26 |
+
ds = ev.load_parquet(subset, batch_size=batch_size)
|
| 27 |
+
elif not DEBUG:
|
| 28 |
+
ds = ev.load_dataset(subset, dataset=dataset, split=split, shards=shards, cache_dir="dataset")
|
| 29 |
|
| 30 |
+
dsi = iter(ds)
|
| 31 |
|
| 32 |
+
state["subset"] = subset
|
| 33 |
state["dsi"] = dsi
|
| 34 |
return (
|
| 35 |
+
gr.update(label=f"Shard (max {nshards})", value=shard, maximum=nshards),
|
| 36 |
+
*get_images(batch_size, only_rgb, state),
|
| 37 |
state
|
| 38 |
)
|
| 39 |
|
| 40 |
+
def get_images(batch_size, only_rgb, state):
|
| 41 |
+
subset = state["subset"]
|
| 42 |
|
| 43 |
images = []
|
| 44 |
metadatas = []
|
| 45 |
|
| 46 |
for i in tqdm.trange(batch_size, desc=f"Getting images"):
|
| 47 |
+
if DEBUG == "random":
|
| 48 |
images.append(np.random.randint(0,255,(384,384,3)))
|
| 49 |
+
if not only_rgb:
|
| 50 |
+
images.append(np.random.randint(0,255,(100,100,3)))
|
| 51 |
|
| 52 |
+
metadatas.append({"bounds":[[1,1,4,4]], })
|
| 53 |
else:
|
| 54 |
try:
|
| 55 |
item = next(state["dsi"])
|
| 56 |
except StopIteration:
|
| 57 |
break
|
| 58 |
metadata = item["metadata"]
|
| 59 |
+
item = ev.item_to_images(subset, item)
|
| 60 |
|
| 61 |
+
if subset == "satellogic":
|
| 62 |
images.extend(item["rgb"])
|
| 63 |
+
if not only_rgb:
|
| 64 |
+
images.extend(item["1m"])
|
| 65 |
+
if subset == "sentinel_1":
|
| 66 |
images.extend(item["10m"])
|
| 67 |
+
if subset == "neon":
|
| 68 |
images.extend(item["rgb"])
|
| 69 |
+
if not only_rgb:
|
| 70 |
+
images.extend(item["chm"])
|
| 71 |
+
images.extend(item["1m"])
|
| 72 |
+
|
| 73 |
metadatas.append(item["metadata"])
|
| 74 |
|
| 75 |
return images, DataFrame(metadatas)
|
|
|
|
| 81 |
return gr.State({})
|
| 82 |
|
| 83 |
if __name__ == "__main__":
|
| 84 |
+
with gr.Blocks(title="EarthView Viewer", fill_height = True) as demo:
|
| 85 |
state = new_state()
|
| 86 |
|
| 87 |
gr.Markdown(f"# Viewer for [{ev.DATASET}](https://huggingface.co/datasets/satellogic/EarthView) Dataset")
|
|
|
|
| 98 |
|
| 99 |
with gr.Row():
|
| 100 |
dataset = gr.Textbox(label="Dataset", value=ev.DATASET, interactive=False)
|
| 101 |
+
subset = gr.Dropdown(choices=ev.get_subsets(), label="Subset", value="satellogic", )
|
| 102 |
split = gr.Textbox(label="Split", value="train")
|
| 103 |
initial_shard = gr.Number(label = "Initial shard", value=10, info="-1 for whole dataset")
|
| 104 |
+
only_rgb = gr.Checkbox(label="Only RGB", value=True)
|
| 105 |
|
| 106 |
gr.Button("Load (minutes)").click(
|
| 107 |
open_dataset,
|
| 108 |
+
inputs=[dataset, subset, split, batch_size, initial_shard, only_rgb, state],
|
| 109 |
outputs=[shard, gallery, table, state])
|
| 110 |
|
| 111 |
gallery.render()
|
|
|
|
| 123 |
shard.render()
|
| 124 |
shard.release(
|
| 125 |
open_dataset,
|
| 126 |
+
inputs=[dataset, subset, split, batch_size, shard, only_rgb, state],
|
| 127 |
outputs=[shard, gallery, table, state])
|
| 128 |
|
| 129 |
btn = gr.Button("Next Batch (same shard)", scale=0)
|
| 130 |
+
btn.click(get_images, [batch_size, only_rgb, state], [gallery, table])
|
| 131 |
btn.click()
|
| 132 |
|
| 133 |
table.render()
|
earthview.py
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
|
|
|
|
|
| 1 |
from PIL import Image
|
| 2 |
import numpy as np
|
| 3 |
import json
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
DATASET = "satellogic/EarthView"
|
| 6 |
|
| 7 |
sets = {
|
|
@@ -18,10 +24,58 @@ sets = {
|
|
| 18 |
}
|
| 19 |
}
|
| 20 |
|
| 21 |
-
def
|
| 22 |
return sets.keys()
|
| 23 |
|
| 24 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
metadata = item["metadata"]
|
| 26 |
if type(metadata) == str:
|
| 27 |
metadata = json.loads(metadata)
|
|
@@ -33,16 +87,24 @@ def item_to_images(config, item):
|
|
| 33 |
}
|
| 34 |
item["metadata"] = metadata
|
| 35 |
|
| 36 |
-
if
|
| 37 |
-
item["rgb"] = [
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
item["1m"] = [
|
| 42 |
Image.fromarray(image[0,:,:])
|
| 43 |
for image in item["1m"]
|
| 44 |
]
|
| 45 |
-
|
|
|
|
| 46 |
# Mapping of V and H to RGB. May not be correct
|
| 47 |
# https://gis.stackexchange.com/questions/400726/creating-composite-rgb-images-from-sentinel-1-channels
|
| 48 |
i10m = item["10m"]
|
|
@@ -59,7 +121,8 @@ def item_to_images(config, item):
|
|
| 59 |
Image.fromarray(image.transpose(1,2,0))
|
| 60 |
for image in i10m
|
| 61 |
]
|
| 62 |
-
|
|
|
|
| 63 |
item["rgb"] = [
|
| 64 |
Image.fromarray(image.transpose(1,2,0))
|
| 65 |
for image in item["rgb"]
|
|
@@ -80,5 +143,8 @@ def item_to_images(config, item):
|
|
| 80 |
,2).astype("uint8"))
|
| 81 |
for image in item["1m"]
|
| 82 |
]
|
|
|
|
|
|
|
|
|
|
| 83 |
return item
|
| 84 |
|
|
|
|
| 1 |
+
from datasets import load_dataset as _load_dataset
|
| 2 |
+
from os import environ
|
| 3 |
from PIL import Image
|
| 4 |
import numpy as np
|
| 5 |
import json
|
| 6 |
|
| 7 |
+
from pyarrow.parquet import ParquetFile
|
| 8 |
+
from pyarrow import Table as pa_Table
|
| 9 |
+
from datasets import Dataset
|
| 10 |
+
|
| 11 |
DATASET = "satellogic/EarthView"
|
| 12 |
|
| 13 |
sets = {
|
|
|
|
| 24 |
}
|
| 25 |
}
|
| 26 |
|
| 27 |
+
def get_subsets():
|
| 28 |
return sets.keys()
|
| 29 |
|
| 30 |
+
def get_nshards(subset):
|
| 31 |
+
return sets[subset]["shards"]
|
| 32 |
+
|
| 33 |
+
def get_path(subset):
|
| 34 |
+
return sets[subset].get("path", subset)
|
| 35 |
+
|
| 36 |
+
def get_config(subset):
|
| 37 |
+
return sets[subset].get("config", subset)
|
| 38 |
+
|
| 39 |
+
def load_dataset(subset, dataset="satellogic/EarthView", split="train", shards = None, streaming=True, **kwargs):
|
| 40 |
+
config = get_config(subset)
|
| 41 |
+
nshards = get_nshards(subset)
|
| 42 |
+
path = get_path(subset)
|
| 43 |
+
if shards is None:
|
| 44 |
+
data_files = None
|
| 45 |
+
else:
|
| 46 |
+
data_files = [f"{path}/{split}-{shard:05d}-of-{nshards:05d}.parquet" for shard in shards]
|
| 47 |
+
data_files = {split: data_files}
|
| 48 |
+
|
| 49 |
+
ds = _load_dataset(
|
| 50 |
+
path=dataset,
|
| 51 |
+
name=config,
|
| 52 |
+
save_infos=True,
|
| 53 |
+
split=split,
|
| 54 |
+
data_files=data_files,
|
| 55 |
+
streaming=streaming,
|
| 56 |
+
token=environ.get("HF_TOKEN", None),
|
| 57 |
+
**kwargs)
|
| 58 |
+
|
| 59 |
+
return ds
|
| 60 |
+
|
| 61 |
+
def load_parquet(subset_or_filename, batch_size=100):
|
| 62 |
+
if subset_or_filename in get_subsets():
|
| 63 |
+
pqfile = ParquetFile(f"dataset/{subset_or_filename}/sample.parquet")
|
| 64 |
+
else:
|
| 65 |
+
pqfile = subset_or_filename
|
| 66 |
+
|
| 67 |
+
batch = pqfile.iter_batches(batch_size=batch_size)
|
| 68 |
+
return Dataset(pa_Table.from_batches(batch))
|
| 69 |
+
|
| 70 |
+
def item_to_images(subset, item):
|
| 71 |
+
"""
|
| 72 |
+
Converts the images within an item (arrays), as retrieved from the dataset to proper PIL.Image
|
| 73 |
+
|
| 74 |
+
subset: The name of the Subset, one of "satellogic", "default", "sentinel-1"
|
| 75 |
+
item: The item as retrieved from the subset
|
| 76 |
+
|
| 77 |
+
returns the item, with arrays converted to PIL.Image
|
| 78 |
+
"""
|
| 79 |
metadata = item["metadata"]
|
| 80 |
if type(metadata) == str:
|
| 81 |
metadata = json.loads(metadata)
|
|
|
|
| 87 |
}
|
| 88 |
item["metadata"] = metadata
|
| 89 |
|
| 90 |
+
if subset == "satellogic":
|
| 91 |
+
# item["rgb"] = [
|
| 92 |
+
# Image.fromarray(np.average(image.transpose(1,2,0), 2).astype("uint8"))
|
| 93 |
+
# for image in item["rgb"]
|
| 94 |
+
# ]
|
| 95 |
+
rgbs = []
|
| 96 |
+
for rgb in item["rgb"]:
|
| 97 |
+
rgbs.append(Image.fromarray(rgb.transpose(1,2,0)))
|
| 98 |
+
# rgbs.append(Image.fromarray(rgb[0,:,:])) # Red
|
| 99 |
+
# rgbs.append(Image.fromarray(rgb[1,:,:])) # Green
|
| 100 |
+
# rgbs.append(Image.fromarray(rgb[2,:,:])) # Blue
|
| 101 |
+
item["rgb"] = rgbs
|
| 102 |
item["1m"] = [
|
| 103 |
Image.fromarray(image[0,:,:])
|
| 104 |
for image in item["1m"]
|
| 105 |
]
|
| 106 |
+
count = len(item["1m"])
|
| 107 |
+
elif subset == "sentinel_1":
|
| 108 |
# Mapping of V and H to RGB. May not be correct
|
| 109 |
# https://gis.stackexchange.com/questions/400726/creating-composite-rgb-images-from-sentinel-1-channels
|
| 110 |
i10m = item["10m"]
|
|
|
|
| 121 |
Image.fromarray(image.transpose(1,2,0))
|
| 122 |
for image in i10m
|
| 123 |
]
|
| 124 |
+
count = len(item["10m"])
|
| 125 |
+
elif subset == "neon":
|
| 126 |
item["rgb"] = [
|
| 127 |
Image.fromarray(image.transpose(1,2,0))
|
| 128 |
for image in item["rgb"]
|
|
|
|
| 143 |
,2).astype("uint8"))
|
| 144 |
for image in item["1m"]
|
| 145 |
]
|
| 146 |
+
count = len(item["rgb"])
|
| 147 |
+
|
| 148 |
+
item["metadata"]["count"] = count
|
| 149 |
return item
|
| 150 |
|