Spaces:

ritvik77
/

ContributionChartHuggingFace

Sleeping

App Files Files Community

Ritvik commited on Mar 28

Commit

b68ab8a

1 Parent(s): 7268fc2

Updated app

Browse files

Files changed (1) hide show

app.py +193 -69

app.py CHANGED Viewed

@@ -5,33 +5,85 @@ import matplotlib.pyplot as plt
 import seaborn as sns
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
 st.set_page_config(page_title="HF Contributions", layout="wide")
 api = HfApi()
 # Function to fetch commits for a repository (optimized)
 def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
     try:
         # Skip private/gated repos upfront
-        repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type)
         if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated):
-            return []
-        commits = api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)
-        commit_dates = [
-            pd.to_datetime(commit.created_at).tz_localize(None).date()
-            for commit in commits
-            if any(
-                (isinstance(author, str) and author.lower() == username.lower()) or
-                (isinstance(author, dict) and "user" in author and author["user"].lower() == username.lower())
-                for author in commit.authors
-            ) and pd.to_datetime(commit.created_at).year == selected_year
-        ]
-        return commit_dates
     except Exception:
-        return []  # Silently skip inaccessible or errored repos
-# Function to get commit events for a user
 def get_commit_events(username, kind=None, selected_year=None):
     commit_dates = []
     items_with_type = []
@@ -39,54 +91,84 @@ def get_commit_events(username, kind=None, selected_year=None):
     for k in kinds:
         try:
-            if k == "model":
-                items = list(api.list_models(author=username))
-            elif k == "dataset":
-                items = list(api.list_datasets(author=username))
-            elif k == "space":
-                items = list(api.list_spaces(author=username))
-            else:
-                items = []
             items_with_type.extend((item, k) for item in items)
             repo_ids = [item.id for item in items]
-            # Parallel fetch commits
-            with ThreadPoolExecutor(max_workers=10) as executor:
-                future_to_repo = {
-                    executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
-                    for repo_id in repo_ids
-                }
-                for future in as_completed(future_to_repo):
-                    commit_dates.extend(future.result())
         except Exception as e:
             st.warning(f"Error fetching {k}s for {username}: {str(e)}")
-    return pd.DataFrame(commit_dates, columns=["date"]), items_with_type
-# Calendar heatmap function
-def make_calendar_heatmap(df, title, year, color_palette="Greens"):
     if df.empty:
         st.info(f"No {title.lower()} found for {year}.")
         return
     df["count"] = 1
-    df = df.groupby("date").sum().reset_index()
     df["date"] = pd.to_datetime(df["date"])
     start = pd.Timestamp(f"{year}-01-01")
     end = pd.Timestamp(f"{year}-12-31")
     all_days = pd.date_range(start=start, end=end)
-    heatmap_data = pd.DataFrame(index=all_days).assign(count=0)
-    heatmap_data.loc[df.set_index("date").index, "count"] = df.set_index("date")["count"]
-    heatmap_data["dow"] = heatmap_data.index.dayofweek
-    heatmap_data["week"] = ((heatmap_data.index - start).days // 7)
-    heatmap_data = heatmap_data.reset_index().rename(columns={"index": "date"})
     pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0)
-    month_labels = [d.strftime("%b") for d in pd.date_range(start, end, freq="MS")]
-    month_positions = [((pd.Timestamp(f"{year}-{i + 1}-01") - start).days // 7) for i in range(12)]
     fig, ax = plt.subplots(figsize=(12, 1.2))
-    sns.heatmap(pivot, ax=ax, cmap=color_palette, linewidths=0.5, linecolor="white", square=True, cbar=False,
-                yticklabels=["M", "T", "W", "T", "F", "S", "S"])
-    ax.set_title(f"{title} ({year})", fontsize=12, pad=10)
     ax.set_xlabel("")
     ax.set_ylabel("")
     ax.set_xticks(month_positions)
@@ -94,6 +176,7 @@ def make_calendar_heatmap(df, title, year, color_palette="Greens"):
     ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
     st.pyplot(fig)
 # Sidebar
 with st.sidebar:
     st.title("👤 Contributor")
@@ -113,23 +196,57 @@ with st.sidebar:
 st.title("🤗 Hugging Face Contributions")
 if username:
     with st.spinner("Fetching commit data..."):
-        all_df, all_items = get_commit_events(username, selected_year=selected_year)
         st.subheader(f"{username}'s Activity in {selected_year}")
-        st.metric("Total Commits", len(all_df))
-        make_calendar_heatmap(all_df, "All Commits", selected_year)
-        # Updated Color Scheme Legend with five shades
-        st.markdown("""
-        <div style='text-align: center; margin-top: -10px; margin-bottom: 20px;'>
-            <span style='font-size: 12px; margin-right: 10px;'>Less</span>
-            <span style='display: inline-block; width: 15px; height: 15px; background-color: #f0f7f0; border: 1px solid #ccc;'></span>
-            <span style='display: inline-block; width: 15px; height: 15px; background-color: #c6e0c6; border: 1px solid #ccc;'></span>
-            <span style='display: inline-block; width: 15px; height: 15px; background-color: #77b577; border: 1px solid #ccc;'></span>
-            <span style='display: inline-block; width: 15px; height: 15px; background-color: #2e6b2e; border: 1px solid #ccc;'></span>
-            <span style='display: inline-block; width: 15px; height: 15px; background-color: #1a3c1a; border: 1px solid #ccc;'></span>
-            <span style='font-size: 12px; margin-left: 10px;'>More</span>
-        </div>
-        """, unsafe_allow_html=True)
         # Metrics and heatmaps for each type
         col1, col2, col3 = st.columns(3)
@@ -139,11 +256,18 @@ if username:
             (col3, "space", "🚀", "Spaces")
         ]:
             with col:
-                df_kind, _ = get_commit_events(username, kind=kind, selected_year=selected_year)
                 try:
-                    total = len(list(getattr(api, f"list_{kind}s")(author=username)))
-                except Exception:
-                    total = 0
-                st.metric(f"{emoji} {label}", total)
-                st.metric(f"Commits in {selected_year}", len(df_kind))
-                make_calendar_heatmap(df_kind, f"{label} Commits", selected_year)

 import seaborn as sns
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from functools import lru_cache
+import time
 st.set_page_config(page_title="HF Contributions", layout="wide")
 api = HfApi()
+# Cache for API responses
+@lru_cache(maxsize=1000)
+def cached_repo_info(repo_id, repo_type):
+    return api.repo_info(repo_id=repo_id, repo_type=repo_type)
+@lru_cache(maxsize=1000)
+def cached_list_commits(repo_id, repo_type):
+    return list(api.list_repo_commits(repo_id=repo_id, repo_type=repo_type))
+@lru_cache(maxsize=100)
+def cached_list_items(username, kind):
+    if kind == "model":
+        return list(api.list_models(author=username))
+    elif kind == "dataset":
+        return list(api.list_datasets(author=username))
+    elif kind == "space":
+        return list(api.list_spaces(author=username))
+    return []
+# Rate limiting
+class RateLimiter:
+    def __init__(self, calls_per_second=10):
+        self.calls_per_second = calls_per_second
+        self.last_call = 0
+    def wait(self):
+        current_time = time.time()
+        time_since_last_call = current_time - self.last_call
+        if time_since_last_call < (1.0 / self.calls_per_second):
+            time.sleep((1.0 / self.calls_per_second) - time_since_last_call)
+        self.last_call = time.time()
+rate_limiter = RateLimiter()
 # Function to fetch commits for a repository (optimized)
 def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
     try:
+        rate_limiter.wait()
         # Skip private/gated repos upfront
+        repo_info = cached_repo_info(repo_id, repo_type)
         if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated):
+            return [], []
+        # Get initial commit date
+        initial_commit_date = pd.to_datetime(repo_info.created_at).tz_localize(None).date()
+        commit_dates = []
+        commit_count = 0
+        # Add initial commit if it's from the selected year
+        if initial_commit_date.year == selected_year:
+            commit_dates.append(initial_commit_date)
+            commit_count += 1
+        # Get all commits
+        commits = cached_list_commits(repo_id, repo_type)
+        for commit in commits:
+            commit_date = pd.to_datetime(commit.created_at).tz_localize(None).date()
+            if commit_date.year == selected_year:
+                commit_dates.append(commit_date)
+                commit_count += 1
+        return commit_dates, commit_count
     except Exception:
+        return [], 0
+# Function to get commit events for a user (optimized)
 def get_commit_events(username, kind=None, selected_year=None):
     commit_dates = []
     items_with_type = []
     for k in kinds:
         try:
+            items = cached_list_items(username, k)
             items_with_type.extend((item, k) for item in items)
             repo_ids = [item.id for item in items]
+            # Optimized parallel fetch with chunking
+            chunk_size = 5  # Process 5 repos at a time
+            for i in range(0, len(repo_ids), chunk_size):
+                chunk = repo_ids[i:i + chunk_size]
+                with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
+                    future_to_repo = {
+                        executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
+                        for repo_id in chunk
+                    }
+                    for future in as_completed(future_to_repo):
+                        repo_commits, repo_count = future.result()
+                        if repo_commits:  # Only extend if we got commits
+                            commit_dates.extend(repo_commits)
         except Exception as e:
             st.warning(f"Error fetching {k}s for {username}: {str(e)}")
+    # Create DataFrame with all commits
+    df = pd.DataFrame(commit_dates, columns=["date"])
+    if not df.empty:
+        df = df.drop_duplicates()  # Remove any duplicate dates
+    return df, items_with_type
+# Calendar heatmap function (optimized)
+def make_calendar_heatmap(df, title, year):
     if df.empty:
         st.info(f"No {title.lower()} found for {year}.")
         return
+    # Optimize DataFrame operations
     df["count"] = 1
+    df = df.groupby("date", as_index=False).sum()
     df["date"] = pd.to_datetime(df["date"])
+    # Create date range more efficiently
     start = pd.Timestamp(f"{year}-01-01")
     end = pd.Timestamp(f"{year}-12-31")
     all_days = pd.date_range(start=start, end=end)
+    # Optimize DataFrame creation and merging
+    heatmap_data = pd.DataFrame({"date": all_days, "count": 0})
+    heatmap_data = heatmap_data.merge(df, on="date", how="left", suffixes=("", "_y"))
+    heatmap_data["count"] = heatmap_data["count_y"].fillna(0)
+    heatmap_data = heatmap_data.drop("count_y", axis=1)
+    # Calculate week and day of week more efficiently
+    heatmap_data["dow"] = heatmap_data["date"].dt.dayofweek
+    heatmap_data["week"] = (heatmap_data["date"] - start).dt.days // 7
+    # Create pivot table more efficiently
     pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0)
+    # Optimize month labels calculation
+    month_labels = pd.date_range(start, end, freq="MS").strftime("%b")
+    month_positions = pd.date_range(start, end, freq="MS").map(lambda x: (x - start).days // 7)
+    # Create custom colormap with specific boundaries
+    from matplotlib.colors import ListedColormap, BoundaryNorm
+    colors = ['#ebedf0', '#9be9a8', '#40c463', '#30a14e', '#216e39']  # GitHub-style green colors
+    bounds = [0, 1, 3, 11, 31, float('inf')]  # Boundaries for color transitions
+    cmap = ListedColormap(colors)
+    norm = BoundaryNorm(bounds, cmap.N)
+    # Create plot more efficiently
     fig, ax = plt.subplots(figsize=(12, 1.2))
+    # Convert pivot values to integers to ensure proper color mapping
+    pivot_int = pivot.astype(int)
+    # Create heatmap with explicit vmin and vmax
+    sns.heatmap(pivot_int, ax=ax, cmap=cmap, norm=norm, linewidths=0.5, linecolor="white",
+                square=True, cbar=False, yticklabels=["M", "T", "W", "T", "F", "S", "S"])
+    ax.set_title(f"{title}", fontsize=12, pad=10)
     ax.set_xlabel("")
     ax.set_ylabel("")
     ax.set_xticks(month_positions)
     ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
     st.pyplot(fig)
 # Sidebar
 with st.sidebar:
     st.title("👤 Contributor")
 st.title("🤗 Hugging Face Contributions")
 if username:
     with st.spinner("Fetching commit data..."):
+        # Create a dictionary to store commits by type
+        commits_by_type = {}
+        commit_counts_by_type = {}
+        # Fetch commits for each type separately
+        for kind in ["model", "dataset", "space"]:
+            try:
+                items = cached_list_items(username, kind)
+                repo_ids = [item.id for item in items]
+                # Process repos in chunks
+                chunk_size = 5
+                total_commits = 0
+                all_commit_dates = []
+                for i in range(0, len(repo_ids), chunk_size):
+                    chunk = repo_ids[i:i + chunk_size]
+                    with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
+                        future_to_repo = {
+                            executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year): repo_id
+                            for repo_id in chunk
+                        }
+                        for future in as_completed(future_to_repo):
+                            repo_commits, repo_count = future.result()
+                            if repo_commits:
+                                all_commit_dates.extend(repo_commits)
+                                total_commits += repo_count
+                commits_by_type[kind] = all_commit_dates
+                commit_counts_by_type[kind] = total_commits
+            except Exception as e:
+                st.warning(f"Error fetching {kind}s for {username}: {str(e)}")
+                commits_by_type[kind] = []
+                commit_counts_by_type[kind] = 0
+        # Calculate total commits across all types
+        total_commits = sum(commit_counts_by_type.values())
         st.subheader(f"{username}'s Activity in {selected_year}")
+        st.metric("Total Commits", total_commits)
+        # Create DataFrame for all commits
+        all_commits = []
+        for commits in commits_by_type.values():
+            all_commits.extend(commits)
+        all_df = pd.DataFrame(all_commits, columns=["date"])
+        if not all_df.empty:
+            all_df = all_df.drop_duplicates()  # Remove any duplicate dates
+        make_calendar_heatmap(all_df, "All Commits", selected_year)
         # Metrics and heatmaps for each type
         col1, col2, col3 = st.columns(3)
             (col3, "space", "🚀", "Spaces")
         ]:
             with col:
                 try:
+                    total = len(cached_list_items(username, kind))
+                    commits = commits_by_type.get(kind, [])
+                    commit_count = commit_counts_by_type.get(kind, 0)
+                    df_kind = pd.DataFrame(commits, columns=["date"])
+                    if not df_kind.empty:
+                        df_kind = df_kind.drop_duplicates()  # Remove any duplicate dates
+                    st.metric(f"{emoji} {label}", total)
+                    st.metric(f"Commits in {selected_year}", commit_count)
+                    make_calendar_heatmap(df_kind, f"{label} Commits", selected_year)
+                except Exception as e:
+                    st.warning(f"Error processing {label}: {str(e)}")
+                    st.metric(f"{emoji} {label}", 0)
+                    st.metric(f"Commits in {selected_year}", 0)
+                    make_calendar_heatmap(pd.DataFrame(), f"{label} Commits", selected_year)