multimodalart HF Staff commited on
Commit
99e84c5
·
verified ·
1 Parent(s): 15ff602

Update js/index.js

Browse files
Files changed (1) hide show
  1. js/index.js +161 -57
js/index.js CHANGED
@@ -79,15 +79,28 @@ document.getElementById('repoForm').addEventListener('submit', async function (e
79
 
80
  if (lastString) {
81
  console.log("Fetching HF references for lastString:", lastString);
82
- const refs = await getHuggingFaceReferences(owner, repo, repo_type, accessToken);
83
- console.log("Available HF Refs:", refs);
84
- const matchingRef = refs.find(ref => lastString.startsWith(ref + '/'));
85
- if (matchingRef) {
86
- refFromUrl = matchingRef;
87
- pathFromUrl = lastString.slice(matchingRef.length + 1);
88
- } else {
89
- refFromUrl = lastString.split('/')[0];
90
- pathFromUrl = lastString.substring(refFromUrl.length + 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  }
92
  }
93
 
@@ -96,7 +109,7 @@ document.getElementById('repoForm').addEventListener('submit', async function (e
96
  tree = await fetchHuggingFaceTree(owner, repo, repo_type, refFromUrl, pathFromUrl, accessToken);
97
  }
98
 
99
- console.log("Final tree object before display:", tree);
100
  displayDirectoryStructure(tree);
101
  document.getElementById('generateTextButton').style.display = 'flex';
102
  document.getElementById('downloadZipButton').style.display = 'flex';
@@ -133,27 +146,34 @@ document.getElementById('generateTextButton').addEventListener('click', async fu
133
  document.getElementById('copyButton').style.display = 'flex';
134
  document.getElementById('downloadButton').style.display = 'flex';
135
  } catch (error) {
 
136
  outputText.value = `Error generating text file: ${error.message}\n\n` +
137
  "Please ensure:\n" +
138
  "1. You have selected at least one file from the directory structure.\n" +
139
  "2. Your access token (if provided) is valid and has the necessary permissions.\n" +
140
- "3. You have a stable internet connection.";
 
141
  }
142
  });
143
 
144
  // Event listener for downloading zip file
145
  document.getElementById('downloadZipButton').addEventListener('click', async function () {
146
  const accessToken = document.getElementById('accessToken').value;
 
 
147
 
148
  try {
149
  const selectedFiles = getSelectedFiles();
150
  if (selectedFiles.length === 0) {
151
  throw new Error('No files selected');
152
  }
 
 
153
  const fileContents = await fetchFileContents(selectedFiles, accessToken, currentRepoInfo.source);
154
  await createAndDownloadZip(fileContents);
 
155
  } catch (error) {
156
- const outputText = document.getElementById('outputText');
157
  outputText.value = `Error generating zip file: ${error.message}\n\n` +
158
  "Please ensure:\n" +
159
  "1. You have selected at least one file from the directory structure.\n" +
@@ -188,36 +208,66 @@ document.getElementById('downloadButton').addEventListener('click', function ()
188
 
189
  // Parse GitHub or Hugging Face repository URL
190
  function parseRepoUrl(url) {
191
- url = url.replace(/\/$/, '');
 
192
  const githubPattern = /^https:\/\/github\.com\/([^\/]+)\/([^\/]+)(?:\/tree\/(.+))?$/;
193
- const hfPattern = /^https:\/\/huggingface\.co\/(?:(datasets|spaces)\/)?([^\/]+)\/([^\/]+)(?:\/tree\/(.+))?$/;
194
-
195
  let match = url.match(githubPattern);
196
  if (match) {
197
  return {
198
  source: 'github',
199
  owner: match[1],
200
  repo: match[2],
201
- lastString: match[4] || ''
202
  };
203
  }
204
 
205
- match = url.match(hfPattern);
206
- if (match) {
 
 
 
 
 
 
207
  let repo_type = 'model';
208
- if (match[1] === 'datasets') repo_type = 'dataset';
209
- if (match[1] === 'spaces') repo_type = 'space';
210
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  return {
212
  source: 'huggingface',
213
  repo_type: repo_type,
214
- owner: match[2],
215
- repo: match[3],
216
- lastString: match[4] || ''
217
  };
218
  }
219
 
220
- throw new Error('Invalid GitHub or Hugging Face repository URL.');
221
  }
222
 
223
  // Fetch GitHub repository references
@@ -251,15 +301,20 @@ async function getHuggingFaceReferences(owner, repo, repo_type, token) {
251
  if (token) headers['Authorization'] = `Bearer ${token}`;
252
 
253
  const response = await fetch(url, { headers });
254
- if (!response.ok) handleFetchError(response, 'huggingface');
 
 
 
 
255
 
256
  const data = await response.json();
257
  const branches = data.branches ? data.branches.map(b => b.name) : [];
258
  const tags = data.tags ? data.tags.map(t => t.name) : [];
259
- return [...branches, ...tags];
 
260
  }
261
 
262
- // Fetch repository SHA
263
  async function fetchRepoSha(owner, repo, ref, path, token) {
264
  const url = `https://api.github.com/repos/${owner}/${repo}/contents/${path ? `${path}` : ''}${ref ? `?ref=${ref}` : ''}`;
265
  const headers = { 'Accept': 'application/vnd.github.object+json' };
@@ -269,7 +324,16 @@ async function fetchRepoSha(owner, repo, ref, path, token) {
269
  if (!response.ok) handleFetchError(response, 'github');
270
 
271
  const data = await response.json();
272
- return data.sha;
 
 
 
 
 
 
 
 
 
273
  }
274
 
275
  // Fetch GitHub repository tree
@@ -278,17 +342,27 @@ async function fetchGitHubRepoTree(owner, repo, sha, token) {
278
  const headers = { 'Accept': 'application/vnd.github+json' };
279
  if (token) headers['Authorization'] = `token ${token}`;
280
 
 
281
  const response = await fetch(url, { headers });
282
  if (!response.ok) handleFetchError(response, 'github');
283
 
284
  const data = await response.json();
285
- return data.tree;
 
 
 
 
 
 
 
286
  }
287
 
288
  // Fetch Hugging Face repository tree
289
  async function fetchHuggingFaceTree(owner, repo, repo_type, ref, path, token) {
290
  const typePath = repo_type === 'model' ? 'models' : repo_type === 'dataset' ? 'datasets' : 'spaces';
291
- const url = `https://huggingface.co/api/${typePath}/${owner}/${repo}/tree/${ref}`;
 
 
292
  console.log("Fetching HF tree from URL:", url);
293
 
294
  const headers = {};
@@ -297,17 +371,20 @@ async function fetchHuggingFaceTree(owner, repo, repo_type, ref, path, token) {
297
  const response = await fetch(url, { headers });
298
  if (!response.ok) handleFetchError(response, 'huggingface');
299
 
300
- let tree = await response.json();
301
- console.log("Raw HF API response:", JSON.parse(JSON.stringify(tree)));
302
-
303
 
 
304
  if (path) {
305
- console.log("Filtering HF tree with path:", path);
306
- tree = tree.filter(item => item.path.startsWith(path + '/') || item.path === path);
307
- console.log("Filtered HF tree:", JSON.parse(JSON.stringify(tree)));
308
  }
309
 
310
- const mappedTree = tree.map(item => {
 
 
 
311
  let repoIdForUrl;
312
  switch (repo_type) {
313
  case 'dataset':
@@ -319,14 +396,16 @@ async function fetchHuggingFaceTree(owner, repo, repo_type, ref, path, token) {
319
  default: // model
320
  repoIdForUrl = `${owner}/${repo}`;
321
  }
322
- const mappedItem = {
 
323
  path: item.path,
324
- type: (item.type === 'file' || item.type === 'lfs') ? 'blob' : 'tree',
 
 
325
  urlType: 'hf',
326
- url: `https://huggingface.co/${repoIdForUrl}/raw/${ref}/${item.path}`
 
327
  };
328
- // console.log("Mapping item:", item, "to:", mappedItem);
329
- return mappedItem;
330
  });
331
 
332
  console.log("Mapped HF tree:", mappedTree);
@@ -334,35 +413,59 @@ async function fetchHuggingFaceTree(owner, repo, repo_type, ref, path, token) {
334
  }
335
 
336
  // Handle fetch errors
337
- function handleFetchError(response, source = 'github') {
 
338
  if (response.status === 403 && source === 'github' && response.headers.get('X-RateLimit-Remaining') === '0') {
339
- throw new Error('GitHub API rate limit exceeded. Please try again later or provide a valid access token to increase your rate limit.');
340
  }
341
  if (response.status === 401) {
342
- throw new Error(`Authentication error. Please check if your access token is valid and has the required permissions.`);
343
  }
344
  if (response.status === 404) {
345
- throw new Error(`Repository, branch, or path not found. Please check that the URL, branch/tag, and path are correct and accessible.`);
346
  }
347
- throw new Error(`Failed to fetch repository data. Status: ${response.status}. Please check your input and try again.`);
348
  }
349
 
350
  // Fetch contents of selected files
351
  async function fetchFileContents(files, token, source) {
352
  const contents = await Promise.all(files.map(async file => {
 
353
  let headers = {};
354
- if (token) {
355
- headers['Authorization'] = source === 'github' ? `token ${token}` : `Bearer ${token}`;
356
- }
357
  if (source === 'github') {
 
358
  headers['Accept'] = 'application/vnd.github.v3.raw';
 
 
 
359
  }
360
 
361
- const response = await fetch(file.url, { headers });
362
- if (!response.ok) handleFetchError(response, source);
363
-
364
- const text = await response.text();
365
- return { url: file.url, path: file.path, text };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  }));
367
  return contents;
368
  }
@@ -390,6 +493,7 @@ async function createAndDownloadZip(fileContents) {
390
  const zip = new JSZip();
391
 
392
  fileContents.forEach(file => {
 
393
  const filePath = file.path.startsWith('/') ? file.path.slice(1) : file.path;
394
  zip.file(filePath, file.text);
395
  });
@@ -398,7 +502,7 @@ async function createAndDownloadZip(fileContents) {
398
  const url = URL.createObjectURL(content);
399
  const a = document.createElement('a');
400
  a.href = url;
401
- a.download = 'repo_contents.zip';
402
  document.body.appendChild(a);
403
  a.click();
404
  document.body.removeChild(a);
 
79
 
80
  if (lastString) {
81
  console.log("Fetching HF references for lastString:", lastString);
82
+ // Note: HF API doesn't always require querying refs if we assume main,
83
+ // but good for robustness if user pastes a non-main branch URL.
84
+ try {
85
+ const refs = await getHuggingFaceReferences(owner, repo, repo_type, accessToken);
86
+ console.log("Available HF Refs:", refs);
87
+ const matchingRef = refs.find(ref => lastString.startsWith(ref + '/'));
88
+ if (matchingRef) {
89
+ refFromUrl = matchingRef;
90
+ pathFromUrl = lastString.slice(matchingRef.length + 1);
91
+ } else if (refs.includes(lastString)) {
92
+ refFromUrl = lastString;
93
+ } else {
94
+ // Fallback logic if refs fetch fails or format is different
95
+ const parts = lastString.split('/');
96
+ refFromUrl = parts[0];
97
+ pathFromUrl = parts.slice(1).join('/');
98
+ }
99
+ } catch (e) {
100
+ console.warn("Could not fetch HF refs, trying to parse from URL directly.", e);
101
+ const parts = lastString.split('/');
102
+ refFromUrl = parts[0] || 'main';
103
+ pathFromUrl = parts.slice(1).join('/');
104
  }
105
  }
106
 
 
109
  tree = await fetchHuggingFaceTree(owner, repo, repo_type, refFromUrl, pathFromUrl, accessToken);
110
  }
111
 
112
+ console.log("Final tree object passed to display:", tree);
113
  displayDirectoryStructure(tree);
114
  document.getElementById('generateTextButton').style.display = 'flex';
115
  document.getElementById('downloadZipButton').style.display = 'flex';
 
146
  document.getElementById('copyButton').style.display = 'flex';
147
  document.getElementById('downloadButton').style.display = 'flex';
148
  } catch (error) {
149
+ console.error("Error generating text:", error);
150
  outputText.value = `Error generating text file: ${error.message}\n\n` +
151
  "Please ensure:\n" +
152
  "1. You have selected at least one file from the directory structure.\n" +
153
  "2. Your access token (if provided) is valid and has the necessary permissions.\n" +
154
+ "3. You have a stable internet connection.\n" +
155
+ "4. Note: Very large files or LFS files might fail to fetch as text.";
156
  }
157
  });
158
 
159
  // Event listener for downloading zip file
160
  document.getElementById('downloadZipButton').addEventListener('click', async function () {
161
  const accessToken = document.getElementById('accessToken').value;
162
+ const outputText = document.getElementById('outputText');
163
+ outputText.value = 'Generating zip file...';
164
 
165
  try {
166
  const selectedFiles = getSelectedFiles();
167
  if (selectedFiles.length === 0) {
168
  throw new Error('No files selected');
169
  }
170
+ // For zip, we might want to handle binary files differently in future,
171
+ // but currently fetchFileContents gets text.
172
  const fileContents = await fetchFileContents(selectedFiles, accessToken, currentRepoInfo.source);
173
  await createAndDownloadZip(fileContents);
174
+ outputText.value = 'Zip file downloaded successfully.';
175
  } catch (error) {
176
+ console.error("Error generating zip:", error);
177
  outputText.value = `Error generating zip file: ${error.message}\n\n` +
178
  "Please ensure:\n" +
179
  "1. You have selected at least one file from the directory structure.\n" +
 
208
 
209
  // Parse GitHub or Hugging Face repository URL
210
  function parseRepoUrl(url) {
211
+ url = url.trim().replace(/\/$/, '');
212
+ // GitHub pattern
213
  const githubPattern = /^https:\/\/github\.com\/([^\/]+)\/([^\/]+)(?:\/tree\/(.+))?$/;
 
 
214
  let match = url.match(githubPattern);
215
  if (match) {
216
  return {
217
  source: 'github',
218
  owner: match[1],
219
  repo: match[2],
220
+ lastString: match[3] || ''
221
  };
222
  }
223
 
224
+ // Hugging Face pattern
225
+ // Matches: https://huggingface.co/username/repo
226
+ // Matches: https://huggingface.co/datasets/username/repo
227
+ // Matches: https://huggingface.co/spaces/username/repo
228
+ // Handles optional /tree/branch/path
229
+ const hfUrlParts = new URL(url);
230
+ if (hfUrlParts.hostname === 'huggingface.co') {
231
+ const pathSegments = hfUrlParts.pathname.split('/').filter(Boolean);
232
  let repo_type = 'model';
233
+ let owner, repo, treeIndex;
234
+
235
+ if (pathSegments[0] === 'datasets') {
236
+ repo_type = 'dataset';
237
+ owner = pathSegments[1];
238
+ repo = pathSegments[2];
239
+ treeIndex = pathSegments.indexOf('tree', 3);
240
+ } else if (pathSegments[0] === 'spaces') {
241
+ repo_type = 'space';
242
+ owner = pathSegments[1];
243
+ repo = pathSegments[2];
244
+ treeIndex = pathSegments.indexOf('tree', 3);
245
+ } else {
246
+ // Models don't have a prefix
247
+ owner = pathSegments[0];
248
+ repo = pathSegments[1];
249
+ treeIndex = pathSegments.indexOf('tree', 2);
250
+ }
251
+
252
+ if (!owner || !repo) {
253
+ throw new Error('Invalid Hugging Face URL format.');
254
+ }
255
+
256
+ let lastString = '';
257
+ if (treeIndex !== -1 && treeIndex + 1 < pathSegments.length) {
258
+ lastString = pathSegments.slice(treeIndex + 1).join('/');
259
+ }
260
+
261
  return {
262
  source: 'huggingface',
263
  repo_type: repo_type,
264
+ owner: owner,
265
+ repo: repo,
266
+ lastString: lastString
267
  };
268
  }
269
 
270
+ throw new Error('Invalid URL. Please provide a valid GitHub or Hugging Face repository URL.');
271
  }
272
 
273
  // Fetch GitHub repository references
 
301
  if (token) headers['Authorization'] = `Bearer ${token}`;
302
 
303
  const response = await fetch(url, { headers });
304
+ if (!response.ok) {
305
+ // HF might return 404 for refs on private repos without token, or just empty list.
306
+ console.warn("Could not fetch HF refs:", response.status);
307
+ return ['main']; // fallback
308
+ }
309
 
310
  const data = await response.json();
311
  const branches = data.branches ? data.branches.map(b => b.name) : [];
312
  const tags = data.tags ? data.tags.map(t => t.name) : [];
313
+ const refs = [...branches, ...tags];
314
+ return refs.length > 0 ? refs : ['main'];
315
  }
316
 
317
+ // Fetch repository SHA (GitHub)
318
  async function fetchRepoSha(owner, repo, ref, path, token) {
319
  const url = `https://api.github.com/repos/${owner}/${repo}/contents/${path ? `${path}` : ''}${ref ? `?ref=${ref}` : ''}`;
320
  const headers = { 'Accept': 'application/vnd.github.object+json' };
 
324
  if (!response.ok) handleFetchError(response, 'github');
325
 
326
  const data = await response.json();
327
+ // If path leads to a file, data is object. If dir, data is array.
328
+ if (Array.isArray(data)) {
329
+ // It's a directory, get last commit sha for this dir to get tree
330
+ const commitUrl = `https://api.github.com/repos/${owner}/${repo}/commits?path=${path || ''}&sha=${ref || ''}&per_page=1`;
331
+ const commitResp = await fetch(commitUrl, { headers });
332
+ if (!commitResp.ok) handleFetchError(commitResp, 'github');
333
+ const commitData = await commitResp.json();
334
+ return commitData[0].commit.tree.sha;
335
+ }
336
+ return data.sha; // It's a file
337
  }
338
 
339
  // Fetch GitHub repository tree
 
342
  const headers = { 'Accept': 'application/vnd.github+json' };
343
  if (token) headers['Authorization'] = `token ${token}`;
344
 
345
+ console.log("Fetching GitHub tree from:", url);
346
  const response = await fetch(url, { headers });
347
  if (!response.ok) handleFetchError(response, 'github');
348
 
349
  const data = await response.json();
350
+
351
+ // Map GitHub tree to internal format
352
+ return data.tree.map(item => ({
353
+ path: item.path,
354
+ type: item.type, // 'blob' or 'tree'
355
+ urlType: 'github',
356
+ url: item.url // git blob api url
357
+ }));
358
  }
359
 
360
  // Fetch Hugging Face repository tree
361
  async function fetchHuggingFaceTree(owner, repo, repo_type, ref, path, token) {
362
  const typePath = repo_type === 'model' ? 'models' : repo_type === 'dataset' ? 'datasets' : 'spaces';
363
+ // FIX: Added ?recursive=true to get all files, including those in subfolders
364
+ const url = `https://huggingface.co/api/${typePath}/${owner}/${repo}/tree/${ref}?recursive=true`;
365
+
366
  console.log("Fetching HF tree from URL:", url);
367
 
368
  const headers = {};
 
371
  const response = await fetch(url, { headers });
372
  if (!response.ok) handleFetchError(response, 'huggingface');
373
 
374
+ let rawTree = await response.json();
375
+ console.log("Raw HF API response (recursive):", rawTree);
 
376
 
377
+ // Filter by path if provided in URL
378
  if (path) {
379
+ console.log("Filtering HF tree by path:", path);
380
+ // Keep files that start with the path
381
+ rawTree = rawTree.filter(item => item.path.startsWith(path + '/') || item.path === path);
382
  }
383
 
384
+ // Map HF tree to internal format
385
+ // HF recursive tree returns only files. Directory objects are not included.
386
+ // This is good for utils.js as it builds directores from file paths.
387
+ const mappedTree = rawTree.map(item => {
388
  let repoIdForUrl;
389
  switch (repo_type) {
390
  case 'dataset':
 
396
  default: // model
397
  repoIdForUrl = `${owner}/${repo}`;
398
  }
399
+
400
+ return {
401
  path: item.path,
402
+ // HF types: 'file', 'lfs', 'directory' (shallow only).
403
+ // With recursive=true, we mainly get files. Map to 'blob' for utils.js
404
+ type: (item.type === 'directory') ? 'tree' : 'blob',
405
  urlType: 'hf',
406
+ // Construct raw URL
407
+ url: `https://huggingface.co/${repoIdForUrl}/resolve/${ref}/${item.path}`
408
  };
 
 
409
  });
410
 
411
  console.log("Mapped HF tree:", mappedTree);
 
413
  }
414
 
415
  // Handle fetch errors
416
+ function handleFetchError(response, source) {
417
+ console.error(`Fetch Error (${source}):`, response.status, response.statusText);
418
  if (response.status === 403 && source === 'github' && response.headers.get('X-RateLimit-Remaining') === '0') {
419
+ throw new Error('GitHub API rate limit exceeded. Please try again later or provide a valid access token.');
420
  }
421
  if (response.status === 401) {
422
+ throw new Error(`Authentication error. Please check if your access token is valid for ${source}.`);
423
  }
424
  if (response.status === 404) {
425
+ throw new Error(`Repository, branch, or path not found on ${source}. Check URL.`);
426
  }
427
+ throw new Error(`Failed to fetch data from ${source}. Status: ${response.status}.`);
428
  }
429
 
430
  // Fetch contents of selected files
431
  async function fetchFileContents(files, token, source) {
432
  const contents = await Promise.all(files.map(async file => {
433
+ console.log(`Fetching content for: ${file.path} from ${source}`);
434
  let headers = {};
435
+ let fetchUrl = file.url;
436
+
 
437
  if (source === 'github') {
438
+ if (token) headers['Authorization'] = `token ${token}`;
439
  headers['Accept'] = 'application/vnd.github.v3.raw';
440
+ } else if (source === 'huggingface') {
441
+ if (token) headers['Authorization'] = `Bearer ${token}`;
442
+ // HF raw URLs don't need specific accept headers for text
443
  }
444
 
445
+ try {
446
+ const response = await fetch(fetchUrl, { headers });
447
+ if (!response.ok) handleFetchError(response, source);
448
+
449
+ // Check for LFS pointer in HF
450
+ if (source === 'huggingface') {
451
+ const contentLength = response.headers.get('content-length');
452
+ if (contentLength && parseInt(contentLength) > 5000000) { // Arbitrary 5MB limit for text representation
453
+ return { url: file.url, path: file.path, text: `[File too large to display directly: ${contentLength} bytes]` };
454
+ }
455
+ }
456
+
457
+ const text = await response.text();
458
+
459
+ // Basic check if it looks like an LFS pointer
460
+ if (source === 'huggingface' && text.startsWith('version https://git-lfs.github.com/spec/v1')) {
461
+ return { url: file.url, path: file.path, text: `[Git LFS Pointer]\n${text}` };
462
+ }
463
+
464
+ return { url: file.url, path: file.path, text };
465
+ } catch (error) {
466
+ console.error(`Error fetching ${file.path}:`, error);
467
+ return { url: file.url, path: file.path, text: `[Error fetching file: ${error.message}]` };
468
+ }
469
  }));
470
  return contents;
471
  }
 
493
  const zip = new JSZip();
494
 
495
  fileContents.forEach(file => {
496
+ // Remove leading slash if present for zip path
497
  const filePath = file.path.startsWith('/') ? file.path.slice(1) : file.path;
498
  zip.file(filePath, file.text);
499
  });
 
502
  const url = URL.createObjectURL(content);
503
  const a = document.createElement('a');
504
  a.href = url;
505
+ a.download = `${currentRepoInfo.repo}_contents.zip`;
506
  document.body.appendChild(a);
507
  a.click();
508
  document.body.removeChild(a);