Skip to content

Commit c3e6528

Browse files
committed
Examples: vnc + openflux, web crawler
1 parent b5797bb commit c3e6528

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

snippet_examples/openflux.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[
2+
{
3+
"id": 1,
4+
"type": "compute",
5+
"output": "",
6+
"value": "#!/bin/sh\n# Install Fluxbox and a terminal (xterm) to interact with the desktop\napk add fluxbox xterm"
7+
},
8+
{
9+
"id": 2,
10+
"type": "compute",
11+
"output": "",
12+
"value": "#!/bin/sh\nset -e\n\n# --- CONFIGURABLE SETTINGS ---\n\n# --- OPERATION MODE ---\n# 'FULL_START': Checks dependencies, cleans up, and starts everything from scratch.\n# 'REFRESH': Not applicable for Fluxbox, behaves like FULL_START.\n# 'CLEAN': Terminates all processes and stops (shutdown).\nOPERATION_MODE=\"FULL_START\"\n\n# --- DEPENDENCY MANAGEMENT ---\n# If 'true', the script will try to install missing dependencies.\n# On Alpine, this requires root privileges.\nAUTO_INSTALL_DEPS=\"true\"\n\n# --- CONNECTION SETTINGS ---\nVNC_PASSWORD=\"p\"\nVNC_PORT=\"2999\"\nWEBSOCKET_PORT=\"6080\"\n\n# --- GRAPHICAL ENVIRONMENT SETTINGS ---\nRESOLUTION=\"1920x1080x24\"\n\n# --- APPLICATION TO RUN ---\n# We start Fluxbox. Inside Fluxbox, the user can launch other applications (e.g., xterm).\nPROGRAM_TO_RUN=\"fluxbox\"\n\n\n# --- SCRIPT LOGIC (DO NOT MODIFY BELOW THIS LINE) ---\n\n# Function for a complete cleanup of the environment (uses pkill for Alpine)\ncleanup_all() {\n echo \"--- Full cleanup: Terminating all VNC environment processes... ---\"\n pkill -f \"x11vnc -display :0\" || true\n pkill -f \"Xvfb :0\" || true\n pkill -f \"websockify.*localhost:${VNC_PORT}\" || true\n pkill -f \"${PROGRAM_TO_RUN}\" || true\n echo \"--- Cleanup complete. ---\"\n}\n\n# Function to install dependencies on Alpine Linux\ninstall_dependencies() {\n if [ \"$AUTO_INSTALL_DEPS\" != \"true\" ]; then\n echo \"--- Dependency check skipped (disabled by configuration). ---\"\n return\n fi\n\n if ! command -v apk >/dev/null; then\n echo \"WARNING: This script is configured for Alpine Linux (apk) but 'apk' was not found.\"\n return\n fi\n\n if [ \"$(id -u)\" -ne 0 ]; then\n echo \"ERROR: To install dependencies on Alpine, run the script as the 'root' user.\"\n exit 1\n fi\n \n # APK dependencies: procps contains pkill, websockify\n APK_DEPS=\"xvfb x11vnc procps websockify\"\n PACKAGES_TO_INSTALL=\"\"\n echo \"--- Checking required APK dependencies... ---\"\n for pkg in $APK_DEPS; do\n if ! apk info -e \"$pkg\" >/dev/null 2>&1; then\n echo \"Missing APK dependency: $pkg\"\n PACKAGES_TO_INSTALL=\"$PACKAGES_TO_INSTALL $pkg\"\n fi\n done\n\n if [ -n \"$PACKAGES_TO_INSTALL\" ]; then\n echo \"Found missing APK dependencies. Updating and installing with 'apk add'...\"\n apk update\n apk add --no-cache $PACKAGES_TO_INSTALL\n else\n echo \"All required APK dependencies are already installed.\"\n fi\n}\n\n# Function to start the application\nstart_app() {\n echo \"--- Starting ${PROGRAM_TO_RUN}... ---\"\n nohup env DISPLAY=:0 ${PROGRAM_TO_RUN} &\n # For Fluxbox, the user can launch xterm manually from the menu\n}\n\n# --- OPERATION MODE HANDLING ---\nif [ \"$OPERATION_MODE\" = \"CLEAN\" ]; then\n echo \"--- OPERATION MODE: CLEAN ---\"\n cleanup_all\n exit 0\n\nelif [ \"$OPERATION_MODE\" = \"REFRESH\" ]; then\n echo \"--- OPERATION MODE: REFRESH (not applicable, executing FULL_START) ---\"\n OPERATION_MODE=\"FULL_START\" # Force FULL_START\nfi\n\nif [ \"$OPERATION_MODE\" = \"FULL_START\" ]; then\n echo \"--- OPERATION MODE: FULL_START ---\"\n install_dependencies\n cleanup_all\n trap 'echo; cleanup_all; exit 0' INT TERM\n echo \"--- Starting base services... ---\"\n nohup Xvfb :0 -screen 0 ${RESOLUTION} &\n sleep 1\n nohup x11vnc -display :0 -forever -rfbport ${VNC_PORT} -passwd \"$VNC_PASSWORD\" &\n sleep 1\n nohup websockify --web /usr/share/novnc/ ${WEBSOCKET_PORT} localhost:${VNC_PORT} &\n sleep 1\n echo \"Servers started.\"\n start_app\n echo\n echo \">>> VNC environment with Fluxbox started. <<<\"\n echo \"Open xterm from the Fluxbox menu (right-click on the desktop).\"\nelse\n echo \"ERROR: Invalid OPERATION_MODE. Choose 'FULL_START' or 'CLEAN'.\"\n exit 1\nfi"
13+
}
14+
]

snippet_examples/web_crawler.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[
2+
{
3+
"id": 0,
4+
"type": "compute",
5+
"value": "#!/usr/bin/env bash\n\n# --- CONFIGURATION ---\n# The URL to start crawling from.\nSTART_URL=\"https://www......../\"\n\n# The directory where all files will be saved (will be created if it does not exist).\nOUTPUT_DIR=\"/crawler_results\"\n\n# Limit crawling to the starting domain? (true/false)\n# If 'true', it will not follow links to external sites.\nLIMIT_TO_DOMAIN=true\n\n# Delay in seconds between requests to avoid overloading the server.\nREQUEST_DELAY=1\n# --- END CONFIGURATION ---\n\n# --- PREPARATION ---\n# Create the output directory if it doesn't exist. The -p flag prevents errors if it's already there.\nmkdir -p \"$OUTPUT_DIR\"\n\n# Define the full paths for the working files\nQUEUE_FILE=\"$OUTPUT_DIR/queue.txt\"\nVISITED_FILE=\"$OUTPUT_DIR/visited.txt\"\nCOLLECTED_FILE=\"$OUTPUT_DIR/collected.txt\"\n\n# 'touch' creates the files if they don't exist.\ntouch \"$QUEUE_FILE\"\ntouch \"$VISITED_FILE\"\ntouch \"$COLLECTED_FILE\"\n\n# Add the starting URL to the queue\necho \"$START_URL\" > \"$QUEUE_FILE\"\n\n# Extract the base domain to limit the search (e.g., https://www.alpinelinux.org)\nBASE_DOMAIN=$(echo \"$START_URL\" | grep -o 'https?://[^/]*')\n\n# Main loop: continues as long as there are URLs in the queue\nwhile [ -s \"$QUEUE_FILE\" ]; do\n # Get the first URL from the queue\n CURRENT_URL=$(head -n 1 \"$QUEUE_FILE\")\n # Remove the URL we just took from the queue\n sed -i '1d' \"$QUEUE_FILE\"\n\n # Check if we have already visited this URL\n if grep -q -x \"$CURRENT_URL\" \"$VISITED_FILE\"; then\n echo \" Already visited: $CURRENT_URL\"\n continue # Skip to the next URL in the loop\n fi\n\n echo \"Visiting: $CURRENT_URL\"\n\n # Add the current URL to the visited list and the results\n echo \"$CURRENT_URL\" >> \"$VISITED_FILE\"\n echo \"$CURRENT_URL\" >> \"$COLLECTED_FILE\"\n\n # Download the page and find new links\n NEW_LINKS=$(curl -s -L -m 10 \"$CURRENT_URL\" | grep -Eo 'href=\"([^\"]+)\"' | cut -d'\"' -f2)\n\n for link in $NEW_LINKS; do\n # Convert relative links to absolute links\n case \"$link\" in\n \\#* | mailto:* | javascript:*)\n continue ;; # Ignore anchors, email links, or javascript calls\n http*)\n abs_link=\"$link\" ;; # It's already an absolute link\n //*)\n abs_link=\"https:$link\" ;; # Protocol-relative link\n /*)\n abs_link=\"$BASE_DOMAIN$link\" ;; # Root-relative link\n *)\n abs_link=\"$CURRENT_URL$link\" ;; # Page-relative link\n esac\n\n # Clean up the URL by removing any trailing slash to avoid duplicates\n abs_link=$(echo \"$abs_link\" | sed 's|/$||')\n\n # If LIMIT_TO_DOMAIN is true, check that the link belongs to the base domain\n if [ \"$LIMIT_TO_DOMAIN\" = true ] && ! [[ \"$abs_link\" == \"$BASE_DOMAIN\"* ]]; then\n continue # Skip external links\n fi\n\n # Add the new link to the queue only if it hasn't been visited yet\n if ! grep -q -x \"$abs_link\" \"$VISITED_FILE\"; then\n echo \"$abs_link\" >> \"$QUEUE_FILE\"\n fi\n done\n\n # Wait before the next request\n sleep \"$REQUEST_DELAY\"\ndone\n\n# Clean up the final results by sorting and removing duplicates\nsort -u \"$COLLECTED_FILE\" -o \"$COLLECTED_FILE\"\n\necho \"\"\necho \"✅ Crawling complete!\"\necho \"Found $(wc -l < \"$COLLECTED_FILE\") unique links. Results saved in: $COLLECTED_FILE\"\n\n# Remove temporary files\nrm \"$QUEUE_FILE\" \"$VISITED_FILE\""
6+
}
7+
]

0 commit comments

Comments
 (0)