Spaces:
Runtime error
Runtime error
ABarLT
commited on
Configurable websearch timeout (#1427)
Browse filesMade playwright websearch timeout value configurable through an env variable
- .env +1 -0
- src/lib/server/websearch/scrape/playwright.ts +7 -3
.env
CHANGED
|
@@ -33,6 +33,7 @@ PLAYWRIGHT_ADBLOCKER=true
|
|
| 33 |
WEBSEARCH_ALLOWLIST=`[]` # if it's defined, allow websites from only this list.
|
| 34 |
WEBSEARCH_BLOCKLIST=`[]` # if it's defined, block websites from this list.
|
| 35 |
WEBSEARCH_JAVASCRIPT=true # CPU usage reduces by 60% on average by disabling javascript. Enable to improve website compatibility
|
|
|
|
| 36 |
|
| 37 |
# Parameters to enable open id login
|
| 38 |
OPENID_CONFIG=`{
|
|
|
|
| 33 |
WEBSEARCH_ALLOWLIST=`[]` # if it's defined, allow websites from only this list.
|
| 34 |
WEBSEARCH_BLOCKLIST=`[]` # if it's defined, block websites from this list.
|
| 35 |
WEBSEARCH_JAVASCRIPT=true # CPU usage reduces by 60% on average by disabling javascript. Enable to improve website compatibility
|
| 36 |
+
WEBSEARCH_TIMEOUT = 3500 # in milliseconds, determines how long to wait to load a page before timing out
|
| 37 |
|
| 38 |
# Parameters to enable open id login
|
| 39 |
OPENID_CONFIG=`{
|
src/lib/server/websearch/scrape/playwright.ts
CHANGED
|
@@ -70,9 +70,13 @@ export async function withPage<T>(
|
|
| 70 |
const page = await ctx.newPage();
|
| 71 |
env.PLAYWRIGHT_ADBLOCKER === "true" && (await blocker.enableBlockingInPage(page));
|
| 72 |
|
| 73 |
-
const res = await page
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
// await needed here so that we don't close the context before the callback is done
|
| 78 |
return await callback(page, res ?? undefined);
|
|
|
|
| 70 |
const page = await ctx.newPage();
|
| 71 |
env.PLAYWRIGHT_ADBLOCKER === "true" && (await blocker.enableBlockingInPage(page));
|
| 72 |
|
| 73 |
+
const res = await page
|
| 74 |
+
.goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
|
| 75 |
+
.catch(() => {
|
| 76 |
+
console.warn(
|
| 77 |
+
`Failed to load page within ${parseInt(env.WEBSEARCH_TIMEOUT) / 1000}s: ${url}`
|
| 78 |
+
);
|
| 79 |
+
});
|
| 80 |
|
| 81 |
// await needed here so that we don't close the context before the callback is done
|
| 82 |
return await callback(page, res ?? undefined);
|