diff --git a/README.md b/README.md index 72d4eccb0..f56e45640 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -

+

- +
Maxun
- Open-Source No-Code Web Data Extraction Platform
-

+ The Easiest Way To Extract Web Data With No Code
+

Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web data extraction doesn't get easier than this! @@ -15,115 +15,62 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web

- Go To App | - Documentation | - Website | - Discord | - Twitter | + Go To App • + Documentation • + Website • + DiscordWatch Tutorials

getmaxun%2Fmaxun | Trendshift

-![maxun_gif](https://github.com/user-attachments/assets/3e0b0cf8-9e52-44d2-a140-b26b7b481477) - - - -# Getting Started -The simplest & fastest way to get started is to use the hosted version: https://app.maxun.dev. Maxun Cloud deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. - -# Local Installation -1. Create a root folder for your project (e.g. 'maxun') -2. Create a file named `.env` in the root folder of the project -3. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). Copy all content of example env to your `.env` file. -4. Choose your installation method below - -### Docker Compose -1. Copy paste the [docker-compose.yml file](https://github.com/getmaxun/maxun/blob/master/docker-compose.yml) into your root folder -2. Ensure you have setup the `.env` file in that same folder -3. Run the command below from a terminal -``` -docker-compose up -d -``` -You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/ - -### Without Docker -1. Ensure you have Node.js, PostgreSQL, MinIO and Redis installed on your system. -2. Run the commands below -``` -git clone https://github.com/getmaxun/maxun - -# change directory to the project root -cd maxun - -# install dependencies -npm install - -# change directory to maxun-core to install dependencies -cd maxun-core -npm install - -# get back to the root directory -cd .. - -# install chromium and its dependencies -npx playwright install --with-deps chromium - -# get back to the root directory -cd .. - -# start frontend and backend together -npm run start -``` -You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/ - - -# Environment Variables -1. Create a file named `.env` in the root folder of the project -2. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). - -| Variable | Mandatory | Description | If Not Set | -|-----------------------|-----------|----------------------------------------------------------------------------------------------|--------------------------------------------------------------| -| `BACKEND_PORT` | Yes | Port to run backend on. Needed for Docker setup | Default value: 8080 | -| `FRONTEND_PORT` | Yes | Port to run frontend on. Needed for Docker setup | Default value: 5173 | -| `BACKEND_URL` | Yes | URL to run backend on. | Default value: http://localhost:8080 | -| `VITE_BACKEND_URL` | Yes | URL used by frontend to connect to backend | Default value: http://localhost:8080 | -| `PUBLIC_URL` | Yes | URL to run frontend on. | Default value: http://localhost:5173 | -| `VITE_PUBLIC_URL` | Yes | URL used by backend to connect to frontend | Default value: http://localhost:5173 | -| `JWT_SECRET` | Yes | Secret key used to sign and verify JSON Web Tokens (JWTs) for authentication. | JWT authentication will not work. | -| `DB_NAME` | Yes | Name of the Postgres database to connect to. | Database connection will fail. | -| `DB_USER` | Yes | Username for Postgres database authentication. | Database connection will fail. | -| `DB_PASSWORD` | Yes | Password for Postgres database authentication. | Database connection will fail. | -| `DB_HOST` | Yes | Host address where the Postgres database server is running. | Database connection will fail. | -| `DB_PORT` | Yes | Port number used to connect to the Postgres database server. | Database connection will fail. | -| `ENCRYPTION_KEY` | Yes | Key used for encrypting sensitive data (proxies, passwords). | Encryption functionality will not work. | -| `SESSION_SECRET` | No | A strong, random string used to sign session cookies | Uses default secret. Recommended to define your own session secret to avoid session hijacking. | -| `MINIO_ENDPOINT` | Yes | Endpoint URL for MinIO, to store Robot Run Screenshots. | Connection to MinIO storage will fail. | -| `MINIO_PORT` | Yes | Port number for MinIO service. | Connection to MinIO storage will fail. | -| `MINIO_CONSOLE_PORT` | No | Port number for MinIO WebUI service. Needed for Docker setup. | Cannot access MinIO Web UI. | -| `MINIO_ACCESS_KEY` | Yes | Access key for authenticating with MinIO. | MinIO authentication will fail. | -| `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth. Used for Google Sheet integration authentication. | Google login will not work. | -| `GOOGLE_CLIENT_SECRET`| No | Client Secret for Google OAuth. Used for Google Sheet integration authentication. | Google login will not work. | -| `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. | -| `AIRTABLE_CLIENT_ID` | No | Client ID for Airtable, used for Airtable integration authentication. | Airtable login will not work. | -| `AIRTABLE_REDIRECT_URI` | No | Redirect URI for handling Airtable OAuth responses. | Airtable login will not work. | -| `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. | - -# How Do I Self-Host? -Checkout community self hosting guide: https://docs.maxun.dev/self-host - -# How Does It Work? -Maxun lets you create custom robots which emulate user actions and extract data. A robot can perform any of the actions: Capture List, Capture Text or Capture Screenshot. Once a robot is created, it will keep extracting data for you without manual intervention - -![Screenshot 2024-10-23 222138](https://github.com/user-attachments/assets/53573c98-769e-490d-829e-ada9fac0764f) - -## 1. Robot Actions -1. Capture List: Useful to extract structured and bulk items from the website. Example: Scrape products from Amazon etc. +https://github.com/user-attachments/assets/c6baa75f-b950-482c-8d26-8a8b6c5382c3 + +### Getting Started +The simplest & fastest way to get started is to use the hosted version: https://app.maxun.dev. You can self-host if you like! + +### Installation +Maxun can run locally with or without Docker +1. [Setup with Docker Compose](https://docs.maxun.dev/installation/docker) +2. [Setup without Docker](https://docs.maxun.dev/installation/local) +3. [Environment Variables](https://docs.maxun.dev/installation/environment_variables) + +### Upgrading & Self Hosting +1. [Self Host Maxun With Docker & Portainer](https://docs.maxun.dev/self-host) +2. [Upgrade Maxun With Docker Compose Setup](https://docs.maxun.dev/installation/upgrade#upgrading-with-docker-compose) +3. [Upgrade Maxun Without Docker Compose Setup](https://docs.maxun.dev/installation/upgrade#upgrading-with-local-setup) + +### How Does It Work? +Maxun lets you create custom robots which emulate user actions and extract data. A robot can perform any of the actions: Capture List, Capture Text or Capture Screenshot. Once a robot is created, it will keep extracting data for you without manual intervention. +1. Capture List: Useful to extract structured and bulk items from the website. 2. Capture Text: Useful to extract individual text content from the website. 3. Capture Screenshot: Get fullpage or visible section screenshots of the website. -# Features +### Sponsors + + + + + +
+
+ +

+ LambdaTest +
+
+ GenAI-powered Quality Engineering Platform that empowers teams to test intelligently, smarter, and ship faster. +
+ +
+ CyberYozh App +
+
+ Infrastructure for developers working with multi‑accounting & automation in one place. +
+ +### Features - ✨ Extract Data With No-Code - ✨ Handle Pagination & Scrolling - ✨ Run Robots On A Specific Schedule @@ -134,11 +81,11 @@ Maxun lets you create custom robots which emulate user actions and extract data. - ✨ Integrations - ✨ MCP -# Use Cases +### Use Cases Maxun can be used for various use-cases, including lead generation, market research, content aggregation and more. View use-cases in detail here: https://www.maxun.dev/#usecases -# Screenshots +### Screenshots ![Maxun PH Launch (1)-1-1](https://github.com/user-attachments/assets/d7c75fa2-2bbc-47bb-a5f6-0ee6c162f391) ![Maxun PH Launch (1)-2-1](https://github.com/user-attachments/assets/d85a3ec7-8ce8-4daa-89aa-52d9617e227a) ![Maxun PH Launch (1)-3-1](https://github.com/user-attachments/assets/4bd5a0b4-485d-44f4-a487-edd9afc18b11) @@ -149,18 +96,18 @@ View use-cases in detail here: https://www.maxun.dev/#usecases ![Maxun PH Launch (1)-8-1](https://github.com/user-attachments/assets/16ee4a71-772a-49ae-a0e5-cb0529519bda) ![Maxun PH Launch (1)-9-1](https://github.com/user-attachments/assets/160f46fa-0357-4c1b-ba50-b4fe64453bb7) -# Note +### Note This project is in early stages of development. Your feedback is very important for us - we're actively working on improvements. -# License +### License

This project is licensed under AGPLv3.

-# Support Us +### Support Us Star the repository, contribute if you love what we’re building, or [sponsor us](https://github.com/sponsors/amhsirak). -# Contributors +### Contributors Thank you to the combined efforts of everyone who contributes! diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 000000000..da89e75a0 --- /dev/null +++ b/SETUP.md @@ -0,0 +1,76 @@ +# Local Installation +1. Create a root folder for your project (e.g. 'maxun') +2. Create a file named `.env` in the root folder of the project +3. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). Copy all content of example env to your `.env` file. +4. Choose your installation method below + +### Docker Compose +1. Copy paste the [docker-compose.yml file](https://github.com/getmaxun/maxun/blob/master/docker-compose.yml) into your root folder +2. Ensure you have setup the `.env` file in that same folder +3. Run the command below from a terminal +``` +docker-compose up -d +``` +You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/ + +### Without Docker +1. Ensure you have Node.js, PostgreSQL, MinIO and Redis installed on your system. +2. Run the commands below +``` +git clone https://github.com/getmaxun/maxun + +# change directory to the project root +cd maxun + +# install dependencies +npm install + +# change directory to maxun-core to install dependencies +cd maxun-core +npm install + +# get back to the root directory +cd .. + +# install chromium and its dependencies +npx playwright install --with-deps chromium + +# get back to the root directory +cd .. + +# start frontend and backend together +npm run start +``` +You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/ + + +# Environment Variables +1. Create a file named `.env` in the root folder of the project +2. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). + +| Variable | Mandatory | Description | If Not Set | +|-----------------------|-----------|----------------------------------------------------------------------------------------------|--------------------------------------------------------------| +| `BACKEND_PORT` | Yes | Port to run backend on. Needed for Docker setup | Default value: 8080 | +| `FRONTEND_PORT` | Yes | Port to run frontend on. Needed for Docker setup | Default value: 5173 | +| `BACKEND_URL` | Yes | URL to run backend on. | Default value: http://localhost:8080 | +| `VITE_BACKEND_URL` | Yes | URL used by frontend to connect to backend | Default value: http://localhost:8080 | +| `PUBLIC_URL` | Yes | URL to run frontend on. | Default value: http://localhost:5173 | +| `VITE_PUBLIC_URL` | Yes | URL used by backend to connect to frontend | Default value: http://localhost:5173 | +| `JWT_SECRET` | Yes | Secret key used to sign and verify JSON Web Tokens (JWTs) for authentication. | JWT authentication will not work. | +| `DB_NAME` | Yes | Name of the Postgres database to connect to. | Database connection will fail. | +| `DB_USER` | Yes | Username for Postgres database authentication. | Database connection will fail. | +| `DB_PASSWORD` | Yes | Password for Postgres database authentication. | Database connection will fail. | +| `DB_HOST` | Yes | Host address where the Postgres database server is running. | Database connection will fail. | +| `DB_PORT` | Yes | Port number used to connect to the Postgres database server. | Database connection will fail. | +| `ENCRYPTION_KEY` | Yes | Key used for encrypting sensitive data (proxies, passwords). | Encryption functionality will not work. | +| `SESSION_SECRET` | No | A strong, random string used to sign session cookies | Uses default secret. Recommended to define your own session secret to avoid session hijacking. | +| `MINIO_ENDPOINT` | Yes | Endpoint URL for MinIO, to store Robot Run Screenshots. | Connection to MinIO storage will fail. | +| `MINIO_PORT` | Yes | Port number for MinIO service. | Connection to MinIO storage will fail. | +| `MINIO_CONSOLE_PORT` | No | Port number for MinIO WebUI service. Needed for Docker setup. | Cannot access MinIO Web UI. | +| `MINIO_ACCESS_KEY` | Yes | Access key for authenticating with MinIO. | MinIO authentication will fail. | +| `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth. Used for Google Sheet integration authentication. | Google login will not work. | +| `GOOGLE_CLIENT_SECRET`| No | Client Secret for Google OAuth. Used for Google Sheet integration authentication. | Google login will not work. | +| `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. | +| `AIRTABLE_CLIENT_ID` | No | Client ID for Airtable, used for Airtable integration authentication. | Airtable login will not work. | +| `AIRTABLE_REDIRECT_URI` | No | Redirect URI for handling Airtable OAuth responses. | Airtable login will not work. | +| `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. | diff --git a/maxun-core/package.json b/maxun-core/package.json index cd920d79c..a999f25bf 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -1,6 +1,6 @@ { "name": "maxun-core", - "version": "0.0.25", + "version": "0.0.26", "description": "Core package for Maxun, responsible for data extraction", "main": "build/index.js", "typings": "build/index.d.ts", diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 08efb1205..fdc31cdcd 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -1246,9 +1246,9 @@ export default class Interpreter extends EventEmitter { if (checkLimit()) return allResults; let loadMoreCounter = 0; - // let previousResultCount = allResults.length; - // let noNewItemsCounter = 0; - // const MAX_NO_NEW_ITEMS = 2; + let previousResultCount = allResults.length; + let noNewItemsCounter = 0; + const MAX_NO_NEW_ITEMS = 5; while (true) { if (this.isAborted) { @@ -1332,21 +1332,21 @@ export default class Interpreter extends EventEmitter { await scrapeCurrentPage(); - // const currentResultCount = allResults.length; - // const newItemsAdded = currentResultCount > previousResultCount; + const currentResultCount = allResults.length; + const newItemsAdded = currentResultCount > previousResultCount; - // if (!newItemsAdded) { - // noNewItemsCounter++; - // debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`); + if (!newItemsAdded) { + noNewItemsCounter++; + debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`); - // if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) { - // debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`); - // return allResults; - // } - // } else { - // noNewItemsCounter = 0; - // previousResultCount = currentResultCount; - // } + if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) { + debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`); + return allResults; + } + } else { + noNewItemsCounter = 0; + previousResultCount = currentResultCount; + } if (checkLimit()) return allResults; diff --git a/maxun-core/src/preprocessor.ts b/maxun-core/src/preprocessor.ts index 3d4307a96..f56f9d384 100644 --- a/maxun-core/src/preprocessor.ts +++ b/maxun-core/src/preprocessor.ts @@ -55,7 +55,7 @@ export default class Preprocessor { */ static getParams(workflow: WorkflowFile): string[] { const getParamsRecurse = (object: any): string[] => { - if (typeof object === 'object') { + if (typeof object === 'object' && object !== null) { // Recursion base case if (object.$param) { return [object.$param]; @@ -141,14 +141,24 @@ export default class Preprocessor { } const out = object; - // for every key (child) of the object + Object.keys(object!).forEach((key) => { - // if the field has only one key, which is `k` - if (Object.keys((object)[key]).length === 1 && (object)[key][k]) { - // process the current special tag (init param, hydrate regex...) - (out)[key] = f((object)[key][k]); - } else { - initSpecialRecurse((object)[key], k, f); + const childValue = (object)[key]; + + if (!childValue || typeof childValue !== 'object') { + return; + } + + try { + const childKeys = Object.keys(childValue); + + if (childKeys.length === 1 && childValue[k]) { + (out)[key] = f(childValue[k]); + } else { + initSpecialRecurse(childValue, k, f); + } + } catch (error) { + console.warn(`Error processing key "${key}" in initSpecialRecurse:`, error); } }); return out; diff --git a/package.json b/package.json index d0e3fb6a8..c70b0fef5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "maxun", - "version": "0.0.25", + "version": "0.0.26", "author": "Maxun", "license": "AGPL-3.0-or-later", "dependencies": { @@ -51,7 +51,7 @@ "lodash": "^4.17.21", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "^0.0.25", + "maxun-core": "^0.0.26", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", @@ -81,7 +81,7 @@ "swagger-jsdoc": "^6.2.8", "swagger-ui-express": "^5.0.1", "typedoc": "^0.23.8", - "typescript": "^5.0.0", + "typescript": "^5.0.0", "uuid": "^8.3.2", "uuidv4": "^6.2.12", "web-vitals": "^2.1.4", diff --git a/public/locales/en.json b/public/locales/en.json index c9f218d84..da0ebcc76 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -580,7 +580,7 @@ "buttons": { "stop": "Stop" }, - "loading": "Loading data...", + "loading": "Extracting data...", "empty_output": "No output data available", "captured_data": { "title": "Captured Data", diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index a3796cbeb..c88ba068b 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -201,6 +201,11 @@ export class RemoteBrowser { private networkRequestTimeout: NodeJS.Timeout | null = null; private pendingNetworkRequests: string[] = []; private readonly NETWORK_QUIET_PERIOD = 8000; + private readonly INITIAL_LOAD_QUIET_PERIOD = 3000; + private networkWaitStartTime: number = 0; + private progressInterval: NodeJS.Timeout | null = null; + private hasShownInitialLoader: boolean = false; + private isInitialLoadInProgress: boolean = false; /** * Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and @@ -432,17 +437,19 @@ export class RemoteBrowser { if (!this.currentPage) return; this.currentPage.on("domcontentloaded", async () => { - logger.info("DOM content loaded - triggering snapshot"); - await this.makeAndEmitDOMSnapshot(); + if (!this.isInitialLoadInProgress) { + logger.info("DOM content loaded - triggering snapshot"); + await this.makeAndEmitDOMSnapshot(); + } }); this.currentPage.on("response", async (response) => { const url = response.url(); - if ( - response.request().resourceType() === "document" || - url.includes("api/") || - url.includes("ajax") - ) { + const isDocumentRequest = response.request().resourceType() === "document"; + + if (!this.hasShownInitialLoader && isDocumentRequest && !url.includes("about:blank")) { + this.hasShownInitialLoader = true; + this.isInitialLoadInProgress = true; this.pendingNetworkRequests.push(url); if (this.networkRequestTimeout) { @@ -450,24 +457,54 @@ export class RemoteBrowser { this.networkRequestTimeout = null; } + if (this.progressInterval) { + clearInterval(this.progressInterval); + this.progressInterval = null; + } + + this.networkWaitStartTime = Date.now(); + this.progressInterval = setInterval(() => { + const elapsed = Date.now() - this.networkWaitStartTime; + const navigationProgress = Math.min((elapsed / this.INITIAL_LOAD_QUIET_PERIOD) * 40, 35); + const totalProgress = 60 + navigationProgress; + this.emitLoadingProgress(totalProgress, this.pendingNetworkRequests.length); + }, 500); + logger.debug( - `Network request received: ${url}. Total pending: ${this.pendingNetworkRequests.length}` + `Initial load network request received: ${url}. Using ${this.INITIAL_LOAD_QUIET_PERIOD}ms quiet period` ); this.networkRequestTimeout = setTimeout(async () => { logger.info( - `Network quiet period reached. Processing ${this.pendingNetworkRequests.length} requests` + `Initial load network quiet period reached (${this.INITIAL_LOAD_QUIET_PERIOD}ms)` ); + if (this.progressInterval) { + clearInterval(this.progressInterval); + this.progressInterval = null; + } + + this.emitLoadingProgress(100, this.pendingNetworkRequests.length); + this.pendingNetworkRequests = []; this.networkRequestTimeout = null; + this.isInitialLoadInProgress = false; await this.makeAndEmitDOMSnapshot(); - }, this.NETWORK_QUIET_PERIOD); + }, this.INITIAL_LOAD_QUIET_PERIOD); } }); } + private emitLoadingProgress(progress: number, pendingRequests: number): void { + this.socket.emit("domLoadingProgress", { + progress: Math.round(progress), + pendingRequests, + userId: this.userId, + timestamp: Date.now(), + }); + } + private async setupPageEventListeners(page: Page) { page.on('framenavigated', async (frame) => { if (frame === page.mainFrame()) { @@ -521,7 +558,13 @@ export class RemoteBrowser { const MAX_RETRIES = 3; let retryCount = 0; let success = false; - + + this.socket.emit("dom-snapshot-loading", { + userId: this.userId, + timestamp: Date.now(), + }); + this.emitLoadingProgress(0, 0); + while (!success && retryCount < MAX_RETRIES) { try { this.browser = (await chromium.launch({ @@ -545,7 +588,9 @@ export class RemoteBrowser { if (!this.browser || this.browser.isConnected() === false) { throw new Error('Browser failed to launch or is not connected'); } - + + this.emitLoadingProgress(20, 0); + const proxyConfig = await getDecryptedProxyConfig(userId); let proxyOptions: { server: string, username?: string, password?: string } = { server: '' }; @@ -623,6 +668,8 @@ export class RemoteBrowser { this.currentPage = await this.context.newPage(); + this.emitLoadingProgress(40, 0); + await this.setupPageEventListeners(this.currentPage); const viewportSize = await this.currentPage.viewportSize(); @@ -645,7 +692,9 @@ export class RemoteBrowser { // Still need to set up the CDP session even if blocker fails this.client = await this.currentPage.context().newCDPSession(this.currentPage); } - + + this.emitLoadingProgress(60, 0); + success = true; logger.log('debug', `Browser initialized successfully for user ${userId}`); } catch (error: any) { @@ -1521,9 +1570,6 @@ export class RemoteBrowser { this.isDOMStreamingActive = true; logger.info("DOM streaming started successfully"); - // Initial DOM snapshot - await this.makeAndEmitDOMSnapshot(); - this.setupScrollEventListener(); this.setupPageChangeListeners(); } catch (error) { diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index f9a489210..71cac8b70 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -116,6 +116,16 @@ export class WorkflowInterpreter { */ private currentScrapeListIndex: number = 0; + /** + * Track action counts to generate unique names + */ + private actionCounts: Record = {}; + + /** + * Track used action names to prevent duplicates + */ + private usedActionNames: Set = new Set(); + /** * Current run ID for real-time persistence */ @@ -379,6 +389,8 @@ export class WorkflowInterpreter { }; this.binaryData = []; this.currentScrapeListIndex = 0; + this.actionCounts = {}; + this.usedActionNames = new Set(); this.currentRunId = null; this.persistenceBuffer = []; this.persistenceInProgress = false; @@ -394,6 +406,43 @@ export class WorkflowInterpreter { logger.log('debug', `Set run ID for real-time persistence: ${runId}`); }; + /** + * Generates a unique action name for data storage + * @param actionType The type of action (scrapeList, scrapeSchema, etc.) + * @param providedName Optional name provided by the action + * @returns A unique action name + */ + private getUniqueActionName = (actionType: string, providedName?: string | null): string => { + if (providedName && providedName.trim() !== '' && !this.usedActionNames.has(providedName)) { + this.usedActionNames.add(providedName); + return providedName; + } + + if (!this.actionCounts[actionType]) { + this.actionCounts[actionType] = 0; + } + + let uniqueName: string; + let counter = this.actionCounts[actionType]; + + do { + counter++; + if (actionType === 'scrapeList') { + uniqueName = `List ${counter}`; + } else if (actionType === 'scrapeSchema') { + uniqueName = `Text ${counter}`; + } else if (actionType === 'screenshot') { + uniqueName = `Screenshot ${counter}`; + } else { + uniqueName = `${actionType} ${counter}`; + } + } while (this.usedActionNames.has(uniqueName)); + + this.actionCounts[actionType] = counter; + this.usedActionNames.add(uniqueName); + return uniqueName; + }; + /** * Persists extracted data to database with intelligent batching for performance * Falls back to immediate persistence for critical operations @@ -525,20 +574,8 @@ export class WorkflowInterpreter { } let actionName = this.currentActionName || ""; - - if (!actionName) { - if (!Array.isArray(data) && Object.keys(data).length === 1) { - const soleKey = Object.keys(data)[0]; - const soleValue = data[soleKey]; - if (Array.isArray(soleValue) || typeof soleValue === "object") { - actionName = soleKey; - data = soleValue; - } - } - } - - if (!actionName) { - actionName = "Unnamed Action"; + if (typeKey === "scrapeList") { + actionName = this.getUniqueActionName(typeKey, this.currentActionName); } const flattened = Array.isArray(data) @@ -570,9 +607,10 @@ export class WorkflowInterpreter { const { name, data, mimeType } = payload; const base64Data = data.toString("base64"); + const uniqueName = this.getUniqueActionName('screenshot', name); const binaryItem = { - name, + name: uniqueName, mimeType, data: base64Data }; @@ -582,7 +620,7 @@ export class WorkflowInterpreter { await this.persistBinaryDataToDatabase(binaryItem); this.socket.emit("binaryCallback", { - name, + name: uniqueName, data: base64Data, mimeType }); diff --git a/src/api/storage.ts b/src/api/storage.ts index e584c36fc..b5dc32ded 100644 --- a/src/api/storage.ts +++ b/src/api/storage.ts @@ -1,7 +1,7 @@ import { default as axios } from "axios"; import { WorkflowFile } from "maxun-core"; import { RunSettings } from "../components/run/RunSettings"; -import { ScheduleSettings } from "../components/robot/ScheduleSettings"; +import { ScheduleSettings } from "../components/robot/pages/ScheduleSettingsPage"; import { CreateRunResponse, ScheduleRunResponse } from "../pages/MainPage"; import { apiUrl } from "../apiConfig"; diff --git a/src/components/api/ApiKey.tsx b/src/components/api/ApiKey.tsx index 9feb9551e..278971695 100644 --- a/src/components/api/ApiKey.tsx +++ b/src/components/api/ApiKey.tsx @@ -26,7 +26,8 @@ const Container = styled(Box)` flex-direction: column; align-items: center; margin-top: 50px; - margin-left: 50px; + margin-left: 70px; + margin-right: 70px; `; const ApiKeyManager = () => { @@ -108,7 +109,7 @@ const ApiKeyManager = () => { return ( - + Start by creating an API key below. Then, test your API @@ -139,7 +140,7 @@ const ApiKeyManager = () => { {apiKeyName} - + {showKey ? `${apiKey?.substring(0, 10)}...` : '**********'} @@ -174,6 +175,5 @@ const ApiKeyManager = () => { )} ); -}; - +} export default ApiKeyManager; \ No newline at end of file diff --git a/src/components/browser/BrowserContent.tsx b/src/components/browser/BrowserContent.tsx index 46a8886d9..f592f846b 100644 --- a/src/components/browser/BrowserContent.tsx +++ b/src/components/browser/BrowserContent.tsx @@ -13,7 +13,7 @@ import { export const BrowserContent = () => { const { socket } = useSocketStore(); - const [tabs, setTabs] = useState(["current"]); + const [tabs, setTabs] = useState(["Loading..."]); const [tabIndex, setTabIndex] = React.useState(0); const [showOutputData, setShowOutputData] = useState(false); const { browserWidth } = useBrowserDimensionsStore(); @@ -125,7 +125,7 @@ export const BrowserContent = () => { useEffect(() => { getCurrentTabs() .then((response) => { - if (response) { + if (response && response.length > 0) { setTabs(response); } }) diff --git a/src/components/browser/BrowserRecordingSave.tsx b/src/components/browser/BrowserRecordingSave.tsx index 32d0fabad..d5e484063 100644 --- a/src/components/browser/BrowserRecordingSave.tsx +++ b/src/components/browser/BrowserRecordingSave.tsx @@ -143,7 +143,8 @@ const BrowserRecordingSave = () => { overflow: 'hidden', display: 'flex', justifyContent: 'space-between', - height: "48px" + height: "48px", + marginLeft: '10px' }}>