diff --git a/devops/nginx-static-loadbalance.conf b/devops/nginx-static-loadbalance.conf new file mode 100644 index 000000000..1746f3c2c --- /dev/null +++ b/devops/nginx-static-loadbalance.conf @@ -0,0 +1,34 @@ +# Define upstream servers +upstream extension_server { server localhost:8888; } +upstream dashboard_server { server localhost:8889; } + +map $arg_source $upstream_server { + default dashboard_server; # default if no match or query param is absent + "extension" extension_server; + "dashboard" dashboard_server; +} + +server { + listen 80; + + location / { + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Pass original host to the upstream server + proxy_set_header Host $host; + + # Other WebSocket headers + proxy_http_version 1.1; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Route the request to the selected upstream server + proxy_pass http://$upstream_server; + proxy_read_timeout 3600; + proxy_send_timeout 3600; + } +} + + diff --git a/docs/nginx.md b/docs/nginx.md new file mode 100644 index 000000000..f2dbe54ec --- /dev/null +++ b/docs/nginx.md @@ -0,0 +1,17 @@ +# NGINX Setup and Config + +This document contains basic info related to using nginx in the context of writing observer. + +## Static Routing Use Case Config + +In order to improve event processing efficiency, we will use nginx to route requests to different processes based on whether they are incoming data events (from the extension), or requests for data (from the dashboard). The associated config can be found at `/devops/nginx-static-loadbalance.conf` in this repository. + +At the top of that file, the `upstream`s indicate the servers that should be used for each type of request, `dashboard` or `extension`. + +The next section, which includes the `map` directive, tells nginx to look for a url query parameter called source (`$arg_source`: `$arg` indicates that it is a url query param, and `source` gives the name). If `source=dashboard`, then it routes to the dashboard server, if `source=extension`, it routes to the extension server, and if no `source` is provided, then it defaults to the dashboard server. + +Below, in the `server` block, are general instructions for handling proxying. + +## Usage + +To deploy nginx, take the appropriate `.conf` file and put it in `/etc/nginx/sites-enabled`. Then, you can use `sudo nginx -t` to validate the config and ensure it is correct--this will print any detected syntax errors in the config files. Finally, restart nginx to apply the new config (for example, with `sudo systemctl restart nginx`) diff --git a/extension/writing-process/src/background.js b/extension/writing-process/src/background.js index 8b7be1430..30f05aafb 100644 --- a/extension/writing-process/src/background.js +++ b/extension/writing-process/src/background.js @@ -9,7 +9,7 @@ var RAW_DEBUG = false; /* This variable must be manually updated to specify the server that * the data will be sent to. */ -var WEBSOCKET_SERVER_URL = "wss://learning-observer.org/wsapi/in/" +var WEBSOCKET_SERVER_URL = "wss://learning-observer.org/wsapi/in?source=extension" import { googledocs_id_from_url } from './writing_common'; diff --git a/learning_observer/learning_observer/main.py b/learning_observer/learning_observer/main.py index bce8dd0ad..a48a028ed 100644 --- a/learning_observer/learning_observer/main.py +++ b/learning_observer/learning_observer/main.py @@ -50,13 +50,13 @@ # Run argparse args = settings.parse_and_validate_arguments() -# This will need to move but for the moment we hack with -# this to prefer the GPU where possible. +# This will need to move but for the moment we hack with +# this to prefer the GPU where possible. import spacy -#spacy.prefer_gpu() -#debug_log("Preferring GPU Use.") -spacy.require_gpu() -debug_log("Requiring GPU Use.") +# spacy.prefer_gpu() +# debug_log("Preferring GPU Use.") +# spacy.require_gpu() +# debug_log("Requiring GPU Use.") def configure_event_loop(): diff --git a/learning_observer/learning_observer/routes.py b/learning_observer/learning_observer/routes.py index b7adf3812..ee91a4cef 100644 --- a/learning_observer/learning_observer/routes.py +++ b/learning_observer/learning_observer/routes.py @@ -4,6 +4,7 @@ import getpass import os +import pmss import secrets import sys @@ -33,6 +34,20 @@ from learning_observer.utility_handlers import * +pmss.register_field( + name='disable_extension_routes', + type=pmss.pmsstypes.TYPES.boolean, + description='Whether to disable extension-related API routes', + default=False +) + +pmss.register_field( + name='disable_dashboard_routes', + type=pmss.pmsstypes.TYPES.boolean, + description='Whether to disable dashboard-related API routes', + default=False +) + def add_routes(app): ''' @@ -62,9 +77,20 @@ def tracemalloc_handler(request): aiohttp.web.get('/debug/tracemalloc/', tracemalloc_handler), ]) - register_dashboard_api(app) + if not settings.pmss_settings.disable_dashboard_routes(types=['server']): + register_dashboard_api(app) + debug_log("Dashbord routes are enabled") + else: + debug_log("Dashboard routes are disabled") + register_static_routes(app) - register_incoming_event_views(app) + + if not settings.pmss_settings.disable_extension_routes(types=['server']): + register_incoming_event_views(app) + debug_log("Extension routes are enabled") + else: + debug_log("Extension routes are disabled") + register_debug_routes(app) learning_observer.google.initialize_and_register_routes(app) diff --git a/learning_observer/util/stream_writing.py b/learning_observer/util/stream_writing.py index 8e08e7a26..b5d02567c 100644 --- a/learning_observer/util/stream_writing.py +++ b/learning_observer/util/stream_writing.py @@ -12,7 +12,7 @@ [--gpt3=type] Options: - --url=url URL to connect [default: http://localhost:8888/wsapi/in/] + --url=url URL to connect [default: http://localhost:80/wsapi/in/] --streams=N How many students typing in parallel? [default: 1] --users=user_id,uid,uid Supply the user ID --ici=secs,secs Mean intercharacter interval [default: 0.1] @@ -129,7 +129,7 @@ def argument_list(argument, default): elif ARGS['--fake-name']: USERS = [names.get_first_name() for i in range(STREAMS)] else: - USERS = ["test-user-{n}".format(n=i) for i in range(STREAMS)] + USERS = [f"test-user-{i}" for i in range(STREAMS)] assert len(TEXT) == STREAMS, "len(filenames) != STREAMS." assert len(ICI) == STREAMS, "len(ICIs) != STREAMS." @@ -184,7 +184,7 @@ async def stream_document(text, ici, user, doc_id): ''' retries_remaining = 5 done = False - url = ARGS["--url"] + url = ARGS["--url"] + "?source=extension" while not done: try: async with aiohttp.ClientSession() as session: diff --git a/servermanagement/RunTwoLO.sh b/servermanagement/RunTwoLO.sh new file mode 100644 index 000000000..600488341 --- /dev/null +++ b/servermanagement/RunTwoLO.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# =============================== +# This a modified version RunLearningObserver.sh that automatically starts two processes, +# each with a different creds.yaml ('creds-a.yaml' and 'creds-b.yaml')--this should be +# used for static routing, where one creds.yaml has only dashboard routes enabled, +# and the other has only extension routes enabled. Note that these creds.yaml should +# specify different ports. +# +# This bash script provides a simple wrapper to run the +# learning observer service and pipe the data to a logfile +# over time this should be integrated into the systemd +# service process. This uses static variables to specify +# the location of the virtualenv and the command and +# specifies the location for the running logfile. + +# System Variables +# -------------------------------------- +VIRTUALENV_PATH="/usr/local/share/projects/WritingObserver/VirtualENVs/WOvenv" +#VIRTUALENV_PYTHON="/usr/local/share/Projects/WritingObserver/VirtualENVs/learning_observer/bin/python3.9" +LEARNING_OBSERVER_LOC="/usr/local/share/projects/WritingObserver/Repositories/ArgLab_writing_observer/learning_observer" +LOGFILE_DEST="/usr/local/share/projects/WritingObserver/Repositories/ArgLab_writing_observer/learning_observer/learning_observer/logs" + +# Make the logfile name +# --------------------------------------- +LOG_DATE=$(date "+%m-%d-%Y--%H-%M-%S") +LOGFILE_NAME="$LOGFILE_DEST/learning_observer_service_$LOG_DATE.log" +echo $LOG_NAME; + + +# Run both processes +# -------------------------------------- +echo "Running Learning Observer Service..." +cd $LEARNING_OBSERVER_LOC +source $VIRTUALENV_PATH/bin/activate +nohup python learning_observer --config-file=creds-a.yaml > $LOGFILE_NAME 2>&1 & +PROCESS_ID=$! +echo $PROCESS_ID > $LOGFILE_DEST/run.pid + +# NOTE: if this should go to separate log file location, modify here +nohup python learning_observer --config-file=creds-b.yaml > $LOGFILE_NAME 2>&1 & +PROCESS_ID=$! +echo $PROCESS_ID > $LOGFILE_DEST/run.pid + +# Set the number of allowed open files to something large 8192 +prlimit --pid $PROCESS_ID --nofile=8192