Examples: vnc + openflux, web crawler

tailot · tailot · commit c3e652805fa6 · 2025-06-15T11:59:16.000+02:00
diff --git a/snippet_examples/openflux.json b/snippet_examples/openflux.json
@@ -0,0 +1,14 @@
+[
+  {
+    "id": 1,
+    "type": "compute",
+    "output": "",
+    "value": "#!/bin/sh\n# Install Fluxbox and a terminal (xterm) to interact with the desktop\napk add fluxbox xterm"
+  },
+  {
+    "id": 2,
+    "type": "compute",
+    "output": "",
+    "value": "#!/bin/sh\nset -e\n\n# --- CONFIGURABLE SETTINGS ---\n\n# --- OPERATION MODE ---\n# 'FULL_START': Checks dependencies, cleans up, and starts everything from scratch.\n# 'REFRESH': Not applicable for Fluxbox, behaves like FULL_START.\n# 'CLEAN': Terminates all processes and stops (shutdown).\nOPERATION_MODE=\"FULL_START\"\n\n# --- DEPENDENCY MANAGEMENT ---\n# If 'true', the script will try to install missing dependencies.\n# On Alpine, this requires root privileges.\nAUTO_INSTALL_DEPS=\"true\"\n\n# --- CONNECTION SETTINGS ---\nVNC_PASSWORD=\"p\"\nVNC_PORT=\"2999\"\nWEBSOCKET_PORT=\"6080\"\n\n# --- GRAPHICAL ENVIRONMENT SETTINGS ---\nRESOLUTION=\"1920x1080x24\"\n\n# --- APPLICATION TO RUN ---\n# We start Fluxbox. Inside Fluxbox, the user can launch other applications (e.g., xterm).\nPROGRAM_TO_RUN=\"fluxbox\"\n\n\n# --- SCRIPT LOGIC (DO NOT MODIFY BELOW THIS LINE) ---\n\n# Function for a complete cleanup of the environment (uses pkill for Alpine)\ncleanup_all() {\n  echo \"--- Full cleanup: Terminating all VNC environment processes... ---\"\n  pkill -f \"x11vnc -display :0\" || true\n  pkill -f \"Xvfb :0\" || true\n  pkill -f \"websockify.*localhost:${VNC_PORT}\" || true\n  pkill -f \"${PROGRAM_TO_RUN}\" || true\n  echo \"--- Cleanup complete. ---\"\n}\n\n# Function to install dependencies on Alpine Linux\ninstall_dependencies() {\n  if [ \"$AUTO_INSTALL_DEPS\" != \"true\" ]; then\n    echo \"--- Dependency check skipped (disabled by configuration). ---\"\n    return\n  fi\n\n  if ! command -v apk >/dev/null; then\n    echo \"WARNING: This script is configured for Alpine Linux (apk) but 'apk' was not found.\"\n    return\n  fi\n\n  if [ \"$(id -u)\" -ne 0 ]; then\n    echo \"ERROR: To install dependencies on Alpine, run the script as the 'root' user.\"\n    exit 1\n  fi\n  \n  # APK dependencies: procps contains pkill, websockify\n  APK_DEPS=\"xvfb x11vnc procps websockify\"\n  PACKAGES_TO_INSTALL=\"\"\n  echo \"--- Checking required APK dependencies... ---\"\n  for pkg in $APK_DEPS; do\n    if ! apk info -e \"$pkg\" >/dev/null 2>&1; then\n      echo \"Missing APK dependency: $pkg\"\n      PACKAGES_TO_INSTALL=\"$PACKAGES_TO_INSTALL $pkg\"\n    fi\n  done\n\n  if [ -n \"$PACKAGES_TO_INSTALL\" ]; then\n    echo \"Found missing APK dependencies. Updating and installing with 'apk add'...\"\n    apk update\n    apk add --no-cache $PACKAGES_TO_INSTALL\n  else\n    echo \"All required APK dependencies are already installed.\"\n  fi\n}\n\n# Function to start the application\nstart_app() {\n  echo \"--- Starting ${PROGRAM_TO_RUN}... ---\"\n  nohup env DISPLAY=:0 ${PROGRAM_TO_RUN} &\n  # For Fluxbox, the user can launch xterm manually from the menu\n}\n\n# --- OPERATION MODE HANDLING ---\nif [ \"$OPERATION_MODE\" = \"CLEAN\" ]; then\n  echo \"--- OPERATION MODE: CLEAN ---\"\n  cleanup_all\n  exit 0\n\nelif [ \"$OPERATION_MODE\" = \"REFRESH\" ]; then\n  echo \"--- OPERATION MODE: REFRESH (not applicable, executing FULL_START) ---\"\n  OPERATION_MODE=\"FULL_START\" # Force FULL_START\nfi\n\nif [ \"$OPERATION_MODE\" = \"FULL_START\" ]; then\n  echo \"--- OPERATION MODE: FULL_START ---\"\n  install_dependencies\n  cleanup_all\n  trap 'echo; cleanup_all; exit 0' INT TERM\n  echo \"--- Starting base services... ---\"\n  nohup Xvfb :0 -screen 0 ${RESOLUTION} &\n  sleep 1\n  nohup x11vnc -display :0 -forever -rfbport ${VNC_PORT} -passwd \"$VNC_PASSWORD\" &\n  sleep 1\n  nohup websockify --web /usr/share/novnc/ ${WEBSOCKET_PORT} localhost:${VNC_PORT} &\n  sleep 1\n  echo \"Servers started.\"\n  start_app\n  echo\n  echo \">>> VNC environment with Fluxbox started. <<<\"\n  echo \"Open xterm from the Fluxbox menu (right-click on the desktop).\"\nelse\n  echo \"ERROR: Invalid OPERATION_MODE. Choose 'FULL_START' or 'CLEAN'.\"\n  exit 1\nfi"
+  }
+]
diff --git a/snippet_examples/web_crawler.json b/snippet_examples/web_crawler.json
@@ -0,0 +1,7 @@
+[
+  {
+    "id": 0,
+    "type": "compute",
+    "value": "#!/usr/bin/env bash\n\n# --- CONFIGURATION ---\n# The URL to start crawling from.\nSTART_URL=\"https://www......../\"\n\n# The directory where all files will be saved (will be created if it does not exist).\nOUTPUT_DIR=\"/crawler_results\"\n\n# Limit crawling to the starting domain? (true/false)\n# If 'true', it will not follow links to external sites.\nLIMIT_TO_DOMAIN=true\n\n# Delay in seconds between requests to avoid overloading the server.\nREQUEST_DELAY=1\n# --- END CONFIGURATION ---\n\n# --- PREPARATION ---\n# Create the output directory if it doesn't exist. The -p flag prevents errors if it's already there.\nmkdir -p \"$OUTPUT_DIR\"\n\n# Define the full paths for the working files\nQUEUE_FILE=\"$OUTPUT_DIR/queue.txt\"\nVISITED_FILE=\"$OUTPUT_DIR/visited.txt\"\nCOLLECTED_FILE=\"$OUTPUT_DIR/collected.txt\"\n\n# 'touch' creates the files if they don't exist.\ntouch \"$QUEUE_FILE\"\ntouch \"$VISITED_FILE\"\ntouch \"$COLLECTED_FILE\"\n\n# Add the starting URL to the queue\necho \"$START_URL\" > \"$QUEUE_FILE\"\n\n# Extract the base domain to limit the search (e.g., https://www.alpinelinux.org)\nBASE_DOMAIN=$(echo \"$START_URL\" | grep -o 'https?://[^/]*')\n\n# Main loop: continues as long as there are URLs in the queue\nwhile [ -s \"$QUEUE_FILE\" ]; do\n    # Get the first URL from the queue\n    CURRENT_URL=$(head -n 1 \"$QUEUE_FILE\")\n    # Remove the URL we just took from the queue\n    sed -i '1d' \"$QUEUE_FILE\"\n\n    # Check if we have already visited this URL\n    if grep -q -x \"$CURRENT_URL\" \"$VISITED_FILE\"; then\n        echo \"  Already visited: $CURRENT_URL\"\n        continue # Skip to the next URL in the loop\n    fi\n\n    echo \"Visiting: $CURRENT_URL\"\n\n    # Add the current URL to the visited list and the results\n    echo \"$CURRENT_URL\" >> \"$VISITED_FILE\"\n    echo \"$CURRENT_URL\" >> \"$COLLECTED_FILE\"\n\n    # Download the page and find new links\n    NEW_LINKS=$(curl -s -L -m 10 \"$CURRENT_URL\" | grep -Eo 'href=\"([^\"]+)\"' | cut -d'\"' -f2)\n\n    for link in $NEW_LINKS; do\n        # Convert relative links to absolute links\n        case \"$link\" in\n          \\#* | mailto:* | javascript:*)\n            continue ;; # Ignore anchors, email links, or javascript calls\n          http*)\n            abs_link=\"$link\" ;; # It's already an absolute link\n          //*)\n            abs_link=\"https:$link\" ;; # Protocol-relative link\n          /*)\n            abs_link=\"$BASE_DOMAIN$link\" ;; # Root-relative link\n          *)\n            abs_link=\"$CURRENT_URL$link\" ;; # Page-relative link\n        esac\n\n        # Clean up the URL by removing any trailing slash to avoid duplicates\n        abs_link=$(echo \"$abs_link\" | sed 's|/$||')\n\n        # If LIMIT_TO_DOMAIN is true, check that the link belongs to the base domain\n        if [ \"$LIMIT_TO_DOMAIN\" = true ] && ! [[ \"$abs_link\" == \"$BASE_DOMAIN\"* ]]; then\n            continue # Skip external links\n        fi\n\n        # Add the new link to the queue only if it hasn't been visited yet\n        if ! grep -q -x \"$abs_link\" \"$VISITED_FILE\"; then\n            echo \"$abs_link\" >> \"$QUEUE_FILE\"\n        fi\n    done\n\n    # Wait before the next request\n    sleep \"$REQUEST_DELAY\"\ndone\n\n# Clean up the final results by sorting and removing duplicates\nsort -u \"$COLLECTED_FILE\" -o \"$COLLECTED_FILE\"\n\necho \"\"\necho \"✅ Crawling complete!\"\necho \"Found $(wc -l < \"$COLLECTED_FILE\") unique links. Results saved in: $COLLECTED_FILE\"\n\n# Remove temporary files\nrm \"$QUEUE_FILE\" \"$VISITED_FILE\""
+  }
+]