Spaces:

BGLab
/

BioTrove-CLIP-Demo

Sleeping

App Files Files Community

BGLab commited on Nov 3, 2024

Commit

81c61bf

verified ·

1 Parent(s): 2c39a46

BioTrove

Browse files

Files changed (16) hide show

.gitattributes +3 -0
components/metadata.csv +3 -0
components/metadata_readme.md +11 -0
components/query.py +116 -0
components/sync_samples_to_s3.bash +34 -0
examples/Actinostola-abyssorum.png +3 -0
examples/Amanita-muscaria.jpeg +3 -0
examples/Carnegiea-gigantea.png +0 -0
examples/Felis-catus.jpeg +0 -0
examples/Onoclea-hintonii.jpg +0 -0
examples/Onoclea-sensibilis.jpg +0 -0
examples/Phoca-vitulina.png +0 -0
examples/Sarcoscypha-coccinea.jpeg +0 -0
examples/Ursus-arctos.jpeg +0 -0
examples/coral-snake.jpeg +0 -0
examples/milk-snake.png +0 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 name_lookup.json filter=lfs diff=lfs merge=lfs -text
 txt_emb_species.json filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 name_lookup.json filter=lfs diff=lfs merge=lfs -text
 txt_emb_species.json filter=lfs diff=lfs merge=lfs -text
+components/metadata.csv filter=lfs diff=lfs merge=lfs -text
+examples/Actinostola-abyssorum.png filter=lfs diff=lfs merge=lfs -text
+examples/Amanita-muscaria.jpeg filter=lfs diff=lfs merge=lfs -text

components/metadata.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8576f6ca106f35387506369a70df01fb92192a740c3b5da2a12ad8303976aad
+size 233934143

components/metadata_readme.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Bioclip Demo
+emoji: 🐘
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: 4.36.1
+app_file: app.py
+pinned: false
+license: mit
+---

components/query.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import io
+import boto3
+import requests
+import numpy as np
+import polars as pl
+from PIL import Image
+from botocore.config import Config
+import logging
+logger = logging.getLogger(__name__)
+# S3 for sample images
+my_config = Config(
+    region_name='us-east-1'
+)
+s3_client = boto3.client('s3', config=my_config)
+# Set basepath for EOL pages for info
+EOL_URL = "https://eol.org/pages/"
+RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species"]
+def get_sample(df, pred_taxon, rank):
+    '''
+    Function to retrieve a sample image of the predicted taxon and EOL page link for more info.
+    Parameters:
+    -----------
+    df : DataFrame
+        DataFrame with all sample images listed and their filepaths (in "file_path" column).
+    pred_taxon : str
+        Predicted taxon of the uploaded image.
+    rank : int
+        Index of rank in RANKS chosen for prediction.
+    Returns:
+    --------
+    img : PIL.Image
+        Sample image of predicted taxon for display.
+    eol_page : str
+        URL to EOL page for the taxon (may be a lower rank, e.g., species sample).
+    '''
+    logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
+    try:
+        filepath, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank)
+    except Exception as e:
+        logger.error(f"Error retrieving sample data: {e}")
+        return None, f"We encountered the following error trying to retrieve a sample image: {e}."
+    if filepath is None:
+        logger.warning(f"No sample image found for taxon: {pred_taxon}")
+        return None, f"Sorry, our EOL images do not include {pred_taxon}."
+    # Get sample image of selected individual
+    try:
+        img_src = s3_client.generate_presigned_url('get_object',
+                                                   Params={'Bucket': 'treeoflife-10m-sample-images',
+                                                           'Key': filepath}
+                                                   )
+        img_resp = requests.get(img_src)
+        img = Image.open(io.BytesIO(img_resp.content))
+        full_eol_url = EOL_URL + eol_page_id
+        if is_exact:
+            eol_page = f"<p>Check out the EOL entry for {pred_taxon} to learn more: <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>"
+        else:
+            eol_page = f"<p>Check out an example EOL entry within {pred_taxon} to learn more: {full_name} <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>"
+        logger.info(f"Successfully retrieved sample image and EOL page for {pred_taxon}")
+        return img, eol_page
+    except Exception as e:
+        logger.error(f"Error retrieving sample image: {e}")
+        return None, f"We encountered the following error trying to retrieve a sample image: {e}."
+def get_sample_data(df, pred_taxon, rank):
+    '''
+    Function to randomly select a sample individual of the given taxon and provide associated native location.
+    Parameters:
+    -----------
+    df : DataFrame
+        DataFrame with all sample images listed and their filepaths (in "file_path" column).
+    pred_taxon : str
+        Predicted taxon of the uploaded image.
+    rank : int
+        Index of rank in RANKS chosen for prediction.
+    Returns:
+    --------
+    filepath : str
+        Filepath of selected sample image for predicted taxon.
+    eol_page_id : str
+        EOL page ID associated with predicted taxon for more information.
+    full_name : str
+        Full taxonomic name of the selected sample.
+    is_exact : bool
+        Flag indicating if the match is exact (i.e., with empty lower ranks).
+    '''
+    for idx in range(rank + 1):
+        taxon = RANKS[idx]
+        target_taxon = pred_taxon.split(" ")[idx]
+        df = df.filter(pl.col(taxon) == target_taxon)
+    if df.shape[0] == 0:
+        return None, np.nan, "", False
+    # First, try to find entries with empty lower ranks
+    exact_df = df
+    for lower_rank in RANKS[rank + 1:]:
+        exact_df = exact_df.filter((pl.col(lower_rank).is_null()) | (pl.col(lower_rank) == ""))
+    if exact_df.shape[0] > 0:
+        df_filtered = exact_df.sample()
+        full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
+        return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True
+    # If no exact matches, return any entry with the specified rank
+    df_filtered = df.sample()
+    full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
+    return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False

components/sync_samples_to_s3.bash ADDED Viewed

	@@ -0,0 +1,34 @@

+#!/bin/bash
+<<COMMENT
+Usage:
+bash sync_samples_to_s3.bash <BASE_DIR>
+Dependencies:
+- awscli (https://aws.amazon.com/cli/)
+Credentials to export as environment variables:
+- AWS_ACCESS_KEY_ID
+- AWS_SECRET_ACCESS_KEY
+COMMENT
+# Check if a valid directory is provided as an argument
+if [ -z "$1" ]; then
+  echo "Usage: $0 <BASE_DIR>"
+  exit 1
+fi
+if [ ! -d "$1" ]; then
+  echo "Error: $1 is not a valid directory"
+  exit 1
+fi
+BASE_DIR="$1"
+S3_BUCKET="s3://treeoflife-10m-sample-images"
+# Loop through all directories and sync them to S3
+for dir in $BASE_DIR/*; do
+  if [ -d "$dir" ]; then
+    dir_name=$(basename "$dir")
+    aws s3 sync "$dir" "$S3_BUCKET/$dir_name/"
+  fi
+done