Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -69,31 +69,27 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
|
|
| 69 |
|
| 70 |
try:
|
| 71 |
print("Decoding base64 key...")
|
| 72 |
-
# --- Add Debugging Prints Here ---
|
| 73 |
-
print(f"Raw Base64 string (first 50 chars): {service_account_key_base64[:50]}...")
|
| 74 |
-
print(f"Raw Base64 string length: {len(service_account_key_base64)}")
|
| 75 |
-
# --- End Debugging Prints ---
|
| 76 |
-
|
| 77 |
key_bytes = base64.b64decode(service_account_key_base64)
|
| 78 |
-
|
| 79 |
-
# --- Add More Debugging Prints Here ---
|
| 80 |
-
print(f"Decoded bytes (first 50): {key_bytes[:50]}")
|
| 81 |
-
print(f"Decoded bytes length: {len(key_bytes)}")
|
| 82 |
-
# --- End Debugging Prints ---
|
| 83 |
-
|
| 84 |
-
# This is the line that is likely failing internally after base64.b64decode
|
| 85 |
-
# because key_bytes cannot be decoded as UTF-8
|
| 86 |
-
key_dict = json.loads(key_bytes.decode('utf-8')) # Explicitly decode to see if it throws error here
|
| 87 |
-
|
| 88 |
print("Base64 key decoded and parsed.")
|
| 89 |
|
| 90 |
print("Authenticating with service account...")
|
| 91 |
from google.oauth2 import service_account
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
client = gspread.authorize(creds)
|
| 94 |
print("Authentication successful.")
|
| 95 |
|
| 96 |
print(f"Opening sheet with key '{sheet_id}'...")
|
|
|
|
|
|
|
|
|
|
| 97 |
sheet = client.open_by_key(sheet_id).sheet1
|
| 98 |
print(f"Successfully opened Google Sheet with ID: {sheet_id}")
|
| 99 |
|
|
@@ -111,19 +107,22 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
|
|
| 111 |
|
| 112 |
if not filtered_data:
|
| 113 |
print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
|
|
|
|
| 114 |
if sheet_data and ('Service' not in sheet_data[0] or 'Description' not in sheet_data[0]):
|
| 115 |
print("Error: 'Service' or 'Description' headers are missing or misspelled in the sheet.")
|
| 116 |
return [], [], torch.tensor([])
|
| 117 |
|
|
|
|
| 118 |
if 'Service' not in filtered_data[0] or 'Description' not in filtered_data[0]:
|
| 119 |
print("Error: Filtered Google Sheet data must contain 'Service' and 'Description' columns. This should not happen if filtering worked.")
|
| 120 |
return [], [], torch.tensor([])
|
| 121 |
|
|
|
|
| 122 |
services = [row["Service"] for row in filtered_data]
|
| 123 |
descriptions = [row["Description"] for row in filtered_data]
|
| 124 |
print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
|
| 125 |
|
| 126 |
-
return filtered_data, descriptions, None
|
| 127 |
|
| 128 |
except gspread.exceptions.SpreadsheetNotFound:
|
| 129 |
print(f"Error: Google Sheet with ID '{sheet_id}' not found.")
|
|
@@ -131,7 +130,6 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
|
|
| 131 |
return [], [], torch.tensor([])
|
| 132 |
except Exception as e:
|
| 133 |
print(f"An error occurred while accessing the Google Sheet: {e}")
|
| 134 |
-
# Consider adding print(f"Type of error: {type(e)}") to see if it's specifically UnicodeDecodeError
|
| 135 |
return [], [], torch.tensor([])
|
| 136 |
|
| 137 |
|
|
|
|
| 69 |
|
| 70 |
try:
|
| 71 |
print("Decoding base64 key...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
key_bytes = base64.b64decode(service_account_key_base64)
|
| 73 |
+
key_dict = json.loads(key_bytes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
print("Base64 key decoded and parsed.")
|
| 75 |
|
| 76 |
print("Authenticating with service account...")
|
| 77 |
from google.oauth2 import service_account
|
| 78 |
+
|
| 79 |
+
# --- Suggested Change: Add the Google Sheets Scope ---
|
| 80 |
+
# Define the scopes needed. This is the standard scope for Google Sheets.
|
| 81 |
+
scopes = ['https://www.googleapis.com/auth/spreadsheets.readonly'] # Use read-only if only reading, 'https://www.googleapis.com/auth/spreadsheets' for read/write
|
| 82 |
+
|
| 83 |
+
creds = service_account.Credentials.from_service_account_info(key_dict, scopes=scopes)
|
| 84 |
+
# --- End Suggested Change ---
|
| 85 |
+
|
| 86 |
client = gspread.authorize(creds)
|
| 87 |
print("Authentication successful.")
|
| 88 |
|
| 89 |
print(f"Opening sheet with key '{sheet_id}'...")
|
| 90 |
+
# *** IMPORTANT: If your sheet is NOT the first sheet, change 'sheet1'
|
| 91 |
+
# *** For example, if your sheet is named 'Data', use:
|
| 92 |
+
# sheet = client.open_by_key(sheet_id).worksheet("Data")
|
| 93 |
sheet = client.open_by_key(sheet_id).sheet1
|
| 94 |
print(f"Successfully opened Google Sheet with ID: {sheet_id}")
|
| 95 |
|
|
|
|
| 107 |
|
| 108 |
if not filtered_data:
|
| 109 |
print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
|
| 110 |
+
# Check if headers exist at all if filtered_data is empty but sheet_data isn't
|
| 111 |
if sheet_data and ('Service' not in sheet_data[0] or 'Description' not in sheet_data[0]):
|
| 112 |
print("Error: 'Service' or 'Description' headers are missing or misspelled in the sheet.")
|
| 113 |
return [], [], torch.tensor([])
|
| 114 |
|
| 115 |
+
# Re-checking column existence on filtered_data (redundant after filter but safe)
|
| 116 |
if 'Service' not in filtered_data[0] or 'Description' not in filtered_data[0]:
|
| 117 |
print("Error: Filtered Google Sheet data must contain 'Service' and 'Description' columns. This should not happen if filtering worked.")
|
| 118 |
return [], [], torch.tensor([])
|
| 119 |
|
| 120 |
+
|
| 121 |
services = [row["Service"] for row in filtered_data]
|
| 122 |
descriptions = [row["Description"] for row in filtered_data]
|
| 123 |
print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
|
| 124 |
|
| 125 |
+
return filtered_data, descriptions, None # Return descriptions, embeddings encoded later
|
| 126 |
|
| 127 |
except gspread.exceptions.SpreadsheetNotFound:
|
| 128 |
print(f"Error: Google Sheet with ID '{sheet_id}' not found.")
|
|
|
|
| 130 |
return [], [], torch.tensor([])
|
| 131 |
except Exception as e:
|
| 132 |
print(f"An error occurred while accessing the Google Sheet: {e}")
|
|
|
|
| 133 |
return [], [], torch.tensor([])
|
| 134 |
|
| 135 |
|