From 5ffad2364d32c985252b256359a40dcd41436cca Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Thu, 17 Oct 2024 22:33:34 -0400 Subject: [PATCH 01/12] add functionality for static load balancing on the backend nginx config file, command line port option, load balancing header inclusion --- devops/nginx-static-loadbalance.conf | 52 +++++++++++++++++++ learning_observer/learning_observer/main.py | 16 +++--- .../learning_observer/settings.py | 33 ++++++++---- learning_observer/util/stream_writing.py | 22 +++++--- 4 files changed, 100 insertions(+), 23 deletions(-) create mode 100644 devops/nginx-static-loadbalance.conf diff --git a/devops/nginx-static-loadbalance.conf b/devops/nginx-static-loadbalance.conf new file mode 100644 index 000000000..5f8dab5ab --- /dev/null +++ b/devops/nginx-static-loadbalance.conf @@ -0,0 +1,52 @@ +# Define upstream servers +upstream server0 { server localhost:8888; } +upstream server1 { server localhost:8889; } +upstream server2 { server localhost:8890; } + +# Extract first character of the student-id header +map $http_student $upstream_server { + default server0; # default if no match or header is absent + "~^[0]" server0; + "~^[1]" server1; + "~^[2]" server2; + "~^[3]" server0; + "~^[4]" server1; + "~^[5]" server2; + "~^[6]" server0; + "~^[7]" server1; + "~^[8]" server2; + "~^[9]" server0; + "~^[aA]" server1; + "~^[bB]" server2; + "~^[cC]" server0; + "~^[dD]" server1; + "~^[eE]" server2; + "~^[fF]" server0; +} + +server { + listen 80; + + location / { + proxy_set_header Student $http_student; + + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Pass original host to the upstream server + proxy_set_header Host $host; + + # Other WebSocket headers + proxy_http_version 1.1; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Route the request to the selected upstream server + proxy_pass http://$upstream_server; + proxy_read_timeout 3600; + proxy_send_timeout 3600; + } +} + + diff --git a/learning_observer/learning_observer/main.py b/learning_observer/learning_observer/main.py index bce8dd0ad..685c3aa46 100644 --- a/learning_observer/learning_observer/main.py +++ b/learning_observer/learning_observer/main.py @@ -9,6 +9,7 @@ assessment). ''' +import spacy import sys import asyncio @@ -50,13 +51,12 @@ # Run argparse args = settings.parse_and_validate_arguments() -# This will need to move but for the moment we hack with -# this to prefer the GPU where possible. -import spacy -#spacy.prefer_gpu() -#debug_log("Preferring GPU Use.") -spacy.require_gpu() -debug_log("Requiring GPU Use.") +# This will need to move but for the moment we hack with +# this to prefer the GPU where possible. +# spacy.prefer_gpu() +# debug_log("Preferring GPU Use.") +# spacy.require_gpu() +# debug_log("Requiring GPU Use.") def configure_event_loop(): @@ -96,6 +96,8 @@ def create_app(): if port is None and runmode == 'dev': port = learning_observer.webapp_helpers.find_open_port() + port = int(args.port) + # Check that everything is configured correctly, # and initialize anything which needs initialization learning_observer.prestartup.startup_checks_and_init() diff --git a/learning_observer/learning_observer/settings.py b/learning_observer/learning_observer/settings.py index f9140b3b5..bcab7341e 100644 --- a/learning_observer/learning_observer/settings.py +++ b/learning_observer/learning_observer/settings.py @@ -25,7 +25,8 @@ prog=__name__, description="A system for monitoring", epilog="For more information, see PMSS documentation.", - rulesets=[pmss.YAMLFileRuleset(filename=learning_observer.paths.config_file())] + rulesets=[pmss.YAMLFileRuleset( + filename=learning_observer.paths.config_file())] ) # If we e.g. `import settings` and `import learning_observer.settings`, we @@ -39,6 +40,7 @@ args = None parser = None + def str_to_bool(arg): if isinstance(arg, bool): return arg @@ -91,6 +93,11 @@ def parse_and_validate_arguments(): help='Launce the Learning Observer application. This can be used with `--ipython-console` and `--ipython-kernel`.', default=True, nargs='?', const=True, type=str_to_bool) + parser.add_argument( + '--port', + default=8888 + ) + args = parser.parse_args() if not os.path.exists(args.config_file): @@ -119,13 +126,14 @@ def parse_and_validate_arguments(): RUN_MODES = enum.Enum('RUN_MODES', 'DEV DEPLOY INTERACTIVE') RUN_MODE = None -pmss.parser('run_mode', parent='string', choices=['dev', 'deploy', 'interactive'], transform=None) +pmss.parser('run_mode', parent='string', choices=[ + 'dev', 'deploy', 'interactive'], transform=None) pmss.register_field( name='run_mode', type='run_mode', - description="Set which mode the server is running in.\n"\ - "`dev` for local development with full debugging\n"\ - "`deploy` for running on a server with better performance\n"\ + description="Set which mode the server is running in.\n" + "`dev` for local development with full debugging\n" + "`deploy` for running on a server with better performance\n" "`interactive` for processing data offline", required=True ) @@ -176,16 +184,19 @@ def load_settings(config): elif settings_run_mode == 'interactive': RUN_MODE = RUN_MODES.INTERACTIVE else: - raise ValueError("Configuration setting for run_mode must be either 'dev', 'deploy', or 'interactive'") + raise ValueError( + "Configuration setting for run_mode must be either 'dev', 'deploy', or 'interactive'") if 'repos' in settings: for repo in settings['repos']: # In the future, we might allow dicts if we e.g. want more metadata if isinstance(settings['repos'][repo], str): - learning_observer.paths.register_repo(repo, settings['repos'][repo]) + learning_observer.paths.register_repo( + repo, settings['repos'][repo]) elif isinstance(settings['repos'][repo], dict): # HACK. We should figure out where to stick this. This does not belong in paths - debug_working = settings['repos'][repo].get("debug_working", None) + debug_working = settings['repos'][repo].get( + "debug_working", None) learning_observer.paths.register_repo( repo, @@ -193,7 +204,8 @@ def load_settings(config): debug_working=debug_working ) else: - raise ValueError("settings.repos.{repo} should be a string or a dict. Please fix the settings file.".format(repo=repo)) + raise ValueError( + "settings.repos.{repo} should be a string or a dict. Please fix the settings file.".format(repo=repo)) return settings @@ -223,7 +235,8 @@ def initialized(): # Not all of these are guaranteed to work on every branch of the codebase. -AVAILABLE_FEATURE_FLAGS = ['uvloop', 'watchdog', 'auth_headers_page', 'merkle', 'save_google_ajax', 'use_google_ajax'] +AVAILABLE_FEATURE_FLAGS = ['uvloop', 'watchdog', 'auth_headers_page', + 'merkle', 'save_google_ajax', 'use_google_ajax'] def feature_flag(flag): diff --git a/learning_observer/util/stream_writing.py b/learning_observer/util/stream_writing.py index 8e08e7a26..f3c0c54d7 100644 --- a/learning_observer/util/stream_writing.py +++ b/learning_observer/util/stream_writing.py @@ -12,7 +12,7 @@ [--gpt3=type] Options: - --url=url URL to connect [default: http://localhost:8888/wsapi/in/] + --url=url URL to connect [default: http://localhost:80/wsapi/in/] --streams=N How many students typing in parallel? [default: 1] --users=user_id,uid,uid Supply the user ID --ici=secs,secs Mean intercharacter interval [default: 0.1] @@ -90,10 +90,12 @@ def argument_list(argument, default): if isinstance(list_string, str): list_string = [list_string] * STREAMS if len(list_string) != STREAMS: - print(f"Failure: {list_string}\nfrom {argument} should make {STREAMS} items") + print( + f"Failure: {list_string}\nfrom {argument} should make {STREAMS} items") sys.exit(-1) return list_string + # TODO what is `source_files` supposed to be? # when running this script for the workshop, we should either # 1) move gpt3 texts out of writing observer (dependency hell) OR @@ -103,9 +105,11 @@ def argument_list(argument, default): if ARGS["--gpt3"] is not None: import writing_observer.sample_essays TEXT = writing_observer.sample_essays.GPT3_TEXTS[ARGS["--gpt3"]] - STREAMS = len(TEXT) + text_to_use = TEXT[0] + TEXT = [text_to_use for _ in range(STREAMS)] elif source_files is None: - TEXT = ["\n".join(loremipsum.get_paragraphs(int(ARGS.get("--text-length", 5)))) for i in range(STREAMS)] + TEXT = ["\n".join(loremipsum.get_paragraphs( + int(ARGS.get("--text-length", 5)))) for i in range(STREAMS)] else: TEXT = [open(filename).read() for filename in source_files] @@ -124,18 +128,24 @@ def argument_list(argument, default): None ) + +def str_to_hex(s: str) -> str: + return hex(abs(hash(s))).replace("0x", "") + + if ARGS['--users'] is not None: USERS = argument_list('--users', None) elif ARGS['--fake-name']: USERS = [names.get_first_name() for i in range(STREAMS)] else: - USERS = ["test-user-{n}".format(n=i) for i in range(STREAMS)] + USERS = [str_to_hex(str(i)) for i in range(STREAMS)] assert len(TEXT) == STREAMS, "len(filenames) != STREAMS." assert len(ICI) == STREAMS, "len(ICIs) != STREAMS." assert len(USERS) == STREAMS, "len(users) != STREAMS." assert len(DOC_IDS) == STREAMS, "len(document IDs) != STREAMS." + def current_millis(): return round(time.time() * 1000) @@ -188,7 +198,7 @@ async def stream_document(text, ici, user, doc_id): while not done: try: async with aiohttp.ClientSession() as session: - async with session.ws_connect(url) as web_socket: + async with session.ws_connect(url, headers={"student": user}) as web_socket: commands = identify(user) for command in commands: await web_socket.send_str(json.dumps(command)) From 7d9b1635824c7ec66d21b4100dbf0849e97c68a0 Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Fri, 18 Oct 2024 09:21:18 -0400 Subject: [PATCH 02/12] switch from header to query param for load balancing --- devops/nginx-static-loadbalance.conf | 4 ++-- learning_observer/util/stream_writing.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/devops/nginx-static-loadbalance.conf b/devops/nginx-static-loadbalance.conf index 5f8dab5ab..445d252da 100644 --- a/devops/nginx-static-loadbalance.conf +++ b/devops/nginx-static-loadbalance.conf @@ -4,7 +4,7 @@ upstream server1 { server localhost:8889; } upstream server2 { server localhost:8890; } # Extract first character of the student-id header -map $http_student $upstream_server { +map $arg_student $upstream_server { default server0; # default if no match or header is absent "~^[0]" server0; "~^[1]" server1; @@ -28,7 +28,7 @@ server { listen 80; location / { - proxy_set_header Student $http_student; + # proxy_set_header Student $http_student; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; diff --git a/learning_observer/util/stream_writing.py b/learning_observer/util/stream_writing.py index f3c0c54d7..a0513fe8f 100644 --- a/learning_observer/util/stream_writing.py +++ b/learning_observer/util/stream_writing.py @@ -194,11 +194,11 @@ async def stream_document(text, ici, user, doc_id): ''' retries_remaining = 5 done = False - url = ARGS["--url"] + url = ARGS["--url"] + "?student=" + user while not done: try: async with aiohttp.ClientSession() as session: - async with session.ws_connect(url, headers={"student": user}) as web_socket: + async with session.ws_connect(url) as web_socket: commands = identify(user) for command in commands: await web_socket.send_str(json.dumps(command)) From c14d569dc52e8b904876f1c627af2c45098e234e Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Fri, 18 Oct 2024 09:24:47 -0400 Subject: [PATCH 03/12] switch to sha256 for deterministic hashing --- learning_observer/util/stream_writing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/learning_observer/util/stream_writing.py b/learning_observer/util/stream_writing.py index a0513fe8f..c3a696a8b 100644 --- a/learning_observer/util/stream_writing.py +++ b/learning_observer/util/stream_writing.py @@ -36,6 +36,7 @@ import random import sys import time +import hashlib ARGS = docopt.docopt(__doc__) @@ -130,7 +131,11 @@ def argument_list(argument, default): def str_to_hex(s: str) -> str: - return hex(abs(hash(s))).replace("0x", "") + # return hex(abs(hash(s))).replace("0x", "") + s = s.encode() + sha256_hasher = hashlib.sha256() + sha256_hasher.update(s) + return sha256_hasher.hexdigest() if ARGS['--users'] is not None: From 453f3f4567cbc7c19be8386774c3e9a56f71258e Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Fri, 18 Oct 2024 09:46:15 -0400 Subject: [PATCH 04/12] minor refactor to id hashing for load balancing --- learning_observer/util/stream_writing.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/learning_observer/util/stream_writing.py b/learning_observer/util/stream_writing.py index c3a696a8b..0d40bcab7 100644 --- a/learning_observer/util/stream_writing.py +++ b/learning_observer/util/stream_writing.py @@ -130,20 +130,12 @@ def argument_list(argument, default): ) -def str_to_hex(s: str) -> str: - # return hex(abs(hash(s))).replace("0x", "") - s = s.encode() - sha256_hasher = hashlib.sha256() - sha256_hasher.update(s) - return sha256_hasher.hexdigest() - - if ARGS['--users'] is not None: USERS = argument_list('--users', None) elif ARGS['--fake-name']: USERS = [names.get_first_name() for i in range(STREAMS)] else: - USERS = [str_to_hex(str(i)) for i in range(STREAMS)] + USERS = [f"test-user-{i}" for i in range(STREAMS)] assert len(TEXT) == STREAMS, "len(filenames) != STREAMS." assert len(ICI) == STREAMS, "len(ICIs) != STREAMS." @@ -193,13 +185,20 @@ def identify(user): ] +def str_to_hex(s: str) -> str: + s = s.encode() + sha256_hasher = hashlib.sha256() + sha256_hasher.update(s) + return sha256_hasher.hexdigest() + + async def stream_document(text, ici, user, doc_id): ''' Send a document to the server. ''' retries_remaining = 5 done = False - url = ARGS["--url"] + "?student=" + user + url = ARGS["--url"] + "?student=" + str_to_hex(user) while not done: try: async with aiohttp.ClientSession() as session: From ef7ec2ba2c5d5395fd778e0858685cfa175dc840 Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Tue, 5 Nov 2024 14:26:19 -0500 Subject: [PATCH 05/12] fix typo in load balancing nginx config --- devops/nginx-static-loadbalance.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/nginx-static-loadbalance.conf b/devops/nginx-static-loadbalance.conf index 445d252da..a184cd69d 100644 --- a/devops/nginx-static-loadbalance.conf +++ b/devops/nginx-static-loadbalance.conf @@ -3,7 +3,7 @@ upstream server0 { server localhost:8888; } upstream server1 { server localhost:8889; } upstream server2 { server localhost:8890; } -# Extract first character of the student-id header +# Extract first character of the student header map $arg_student $upstream_server { default server0; # default if no match or header is absent "~^[0]" server0; From 095aa48df1b385d7d99a0f8159b9299c6eb4da90 Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Fri, 8 Nov 2024 09:50:53 -0500 Subject: [PATCH 06/12] add hashed id to url as query param in extension --- extension/writing-process/src/background.js | 58 ++++++++++++++++----- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/extension/writing-process/src/background.js b/extension/writing-process/src/background.js index 8b7be1430..c04af117e 100644 --- a/extension/writing-process/src/background.js +++ b/extension/writing-process/src/background.js @@ -9,7 +9,7 @@ var RAW_DEBUG = false; /* This variable must be manually updated to specify the server that * the data will be sent to. */ -var WEBSOCKET_SERVER_URL = "wss://learning-observer.org/wsapi/in/" +var WEBSOCKET_SERVER_URL = "ws://localhost:80/wsapi/in/" import { googledocs_id_from_url } from './writing_common'; @@ -26,16 +26,16 @@ import * as loEventUtils from 'lo_event/lo_event/util.js'; // } /* and other async logic */); // websocketLogger( callback ); -// We are not sure if this should be done within `websocketLogger()`'s `init` -// or one level up. -const loggers = [ - consoleLogger(), - websocketLogger(WEBSOCKET_SERVER_URL) -] +// // We are not sure if this should be done within `websocketLogger()`'s `init` +// // or one level up. +// const loggers = [ +// consoleLogger(), +// websocketLogger(WEBSOCKET_SERVER_URL) +// ] -loEvent.init('org.mitros.writing_analytics', '0.01', loggers, loEventDebug.LEVEL.SIMPLE); -loEvent.setFieldSet([loEventUtils.getBrowserInfo(), loEventUtils.fetchDebuggingIdentifier()]); -loEvent.go() +// loEvent.init('org.mitros.writing_analytics', '0.01', loggers, loEventDebug.LEVEL.SIMPLE); +// loEvent.setFieldSet([loEventUtils.getBrowserInfo(), loEventUtils.fetchDebuggingIdentifier()]); +// loEvent.go() // Function to serve as replacement for // chrome.extension.getBackgroundPage().console.log(event); because it is not allowed in V3 @@ -203,18 +203,50 @@ async function reinjectContentScripts() { } // Let the server know we've loaded. -loEvent.logEvent("extension_loaded", {}); +// loEvent.logEvent("extension_loaded", {}); + + +async function digest(message, algo = 'SHA-1') { + return Array.from( + new Uint8Array(await crypto.subtle.digest(algo, new TextEncoder().encode(message))), + (byte) => byte.toString(16).padStart(2, '0') + ).join(''); +} + // Send the server the user info. This might not always be available. // HACK: this code will be changed pending server side changes to how we // handle auth and metadata. -loEventUtils.profileInfoWrapper().then((result) => { +loEventUtils.profileInfoWrapper().then(result => { + return digest(result.id).then(hashedId => ({result, hashedId})) +}).then(({result, hashedId}) => { + WEBSOCKET_SERVER_URL += `?student=${hashedId}`; + + // further initialization logic goes here, since the id needs to be retrieved + // and appended to the websocket url before we can connect and send log events to + // the backend + const loggers = [ + consoleLogger(), + websocketLogger(WEBSOCKET_SERVER_URL) + ] + + loEvent.init('org.mitros.writing_analytics', '0.01', loggers, loEventDebug.LEVEL.SIMPLE); + loEvent.setFieldSet([loEventUtils.getBrowserInfo(), loEventUtils.fetchDebuggingIdentifier()]); + loEvent.go() + + loEvent.logEvent("extension_loaded", {}); + if (Object.keys(result).length > 0) { loEvent.logEvent('chrome_identity', { chrome_identity: result }); loEvent.logEvent('metadata_finished', {}) } + + logFromServiceWorker("Loaded"); +}).catch(err => { + logFromServiceWorker(`Failed to connect to backend: ${err}`) }); + // And let the console know we've loaded // chrome.extension.getBackgroundPage().console.log("Loaded"); remove -logFromServiceWorker("Loaded"); +// logFromServiceWorker("Loaded"); From a96f5b0fad40524b5b2bccde82ea028bb0560b4c Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Thu, 14 Nov 2024 21:39:35 -0500 Subject: [PATCH 07/12] refactor load balancing to be between dashboard and extension requests --- devops/nginx-static-loadbalance.conf | 30 ++++--------------- extension/writing-process/src/background.js | 2 +- learning_observer/util/stream_writing.py | 2 +- .../src/lib/components/LOConnection.react.js | 2 +- 4 files changed, 9 insertions(+), 27 deletions(-) diff --git a/devops/nginx-static-loadbalance.conf b/devops/nginx-static-loadbalance.conf index a184cd69d..5f3fc4c2b 100644 --- a/devops/nginx-static-loadbalance.conf +++ b/devops/nginx-static-loadbalance.conf @@ -1,35 +1,17 @@ # Define upstream servers -upstream server0 { server localhost:8888; } -upstream server1 { server localhost:8889; } -upstream server2 { server localhost:8890; } +upstream extension_server { server localhost:8888; } +upstream dashboard_server { server localhost:8889; } -# Extract first character of the student header -map $arg_student $upstream_server { - default server0; # default if no match or header is absent - "~^[0]" server0; - "~^[1]" server1; - "~^[2]" server2; - "~^[3]" server0; - "~^[4]" server1; - "~^[5]" server2; - "~^[6]" server0; - "~^[7]" server1; - "~^[8]" server2; - "~^[9]" server0; - "~^[aA]" server1; - "~^[bB]" server2; - "~^[cC]" server0; - "~^[dD]" server1; - "~^[eE]" server2; - "~^[fF]" server0; +map $arg_source $upstream_server { + default extension_server; # default if no match or query param is absent + "extension" extension_server; + "dashboard" dashboard_server; } server { listen 80; location / { - # proxy_set_header Student $http_student; - proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; diff --git a/extension/writing-process/src/background.js b/extension/writing-process/src/background.js index c04af117e..13d1d19e1 100644 --- a/extension/writing-process/src/background.js +++ b/extension/writing-process/src/background.js @@ -9,7 +9,7 @@ var RAW_DEBUG = false; /* This variable must be manually updated to specify the server that * the data will be sent to. */ -var WEBSOCKET_SERVER_URL = "ws://localhost:80/wsapi/in/" +var WEBSOCKET_SERVER_URL = "ws://localhost:80/wsapi/in?source=extension" import { googledocs_id_from_url } from './writing_common'; diff --git a/learning_observer/util/stream_writing.py b/learning_observer/util/stream_writing.py index 0d40bcab7..3bc590300 100644 --- a/learning_observer/util/stream_writing.py +++ b/learning_observer/util/stream_writing.py @@ -198,7 +198,7 @@ async def stream_document(text, ici, user, doc_id): ''' retries_remaining = 5 done = False - url = ARGS["--url"] + "?student=" + str_to_hex(user) + url = ARGS["--url"] + "?source=extension" while not done: try: async with aiohttp.ClientSession() as session: diff --git a/modules/lo_dash_react_components/src/lib/components/LOConnection.react.js b/modules/lo_dash_react_components/src/lib/components/LOConnection.react.js index b453640a3..20940fda9 100644 --- a/modules/lo_dash_react_components/src/lib/components/LOConnection.react.js +++ b/modules/lo_dash_react_components/src/lib/components/LOConnection.react.js @@ -36,7 +36,7 @@ export default class LOConnection extends Component { // Determine url const protocol = {"http:": "ws:", "https:": "wss:"}[window.location.protocol]; - url = url ? url : `${protocol}//${window.location.hostname}:${window.location.port}/wsapi/communication_protocol`; + url = url ? url : `${protocol}//${window.location.hostname}:${window.location.port}/wsapi/communication_protocol?source=dashboard`; this.client = new WebSocket(url); // Listen for events. this.client.onopen = (e) => { From 3a778c2117bdd89ed95c906cd1631499da0dc430 Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Thu, 21 Nov 2024 20:34:35 -0500 Subject: [PATCH 08/12] implement configurable route disabling on the backend, and clean up static routing on the frontend --- devops/nginx-static-loadbalance.conf | 2 +- docs/nginx.md | 17 +++++ extension/writing-process/src/background.js | 62 +++++-------------- learning_observer/learning_observer/main.py | 2 - learning_observer/learning_observer/routes.py | 30 ++++++++- .../src/lib/components/LOConnection.react.js | 2 +- 6 files changed, 62 insertions(+), 53 deletions(-) create mode 100644 docs/nginx.md diff --git a/devops/nginx-static-loadbalance.conf b/devops/nginx-static-loadbalance.conf index 5f3fc4c2b..1746f3c2c 100644 --- a/devops/nginx-static-loadbalance.conf +++ b/devops/nginx-static-loadbalance.conf @@ -3,7 +3,7 @@ upstream extension_server { server localhost:8888; } upstream dashboard_server { server localhost:8889; } map $arg_source $upstream_server { - default extension_server; # default if no match or query param is absent + default dashboard_server; # default if no match or query param is absent "extension" extension_server; "dashboard" dashboard_server; } diff --git a/docs/nginx.md b/docs/nginx.md new file mode 100644 index 000000000..f2dbe54ec --- /dev/null +++ b/docs/nginx.md @@ -0,0 +1,17 @@ +# NGINX Setup and Config + +This document contains basic info related to using nginx in the context of writing observer. + +## Static Routing Use Case Config + +In order to improve event processing efficiency, we will use nginx to route requests to different processes based on whether they are incoming data events (from the extension), or requests for data (from the dashboard). The associated config can be found at `/devops/nginx-static-loadbalance.conf` in this repository. + +At the top of that file, the `upstream`s indicate the servers that should be used for each type of request, `dashboard` or `extension`. + +The next section, which includes the `map` directive, tells nginx to look for a url query parameter called source (`$arg_source`: `$arg` indicates that it is a url query param, and `source` gives the name). If `source=dashboard`, then it routes to the dashboard server, if `source=extension`, it routes to the extension server, and if no `source` is provided, then it defaults to the dashboard server. + +Below, in the `server` block, are general instructions for handling proxying. + +## Usage + +To deploy nginx, take the appropriate `.conf` file and put it in `/etc/nginx/sites-enabled`. Then, you can use `sudo nginx -t` to validate the config and ensure it is correct--this will print any detected syntax errors in the config files. Finally, restart nginx to apply the new config (for example, with `sudo systemctl restart nginx`) diff --git a/extension/writing-process/src/background.js b/extension/writing-process/src/background.js index 13d1d19e1..3685b1961 100644 --- a/extension/writing-process/src/background.js +++ b/extension/writing-process/src/background.js @@ -26,16 +26,16 @@ import * as loEventUtils from 'lo_event/lo_event/util.js'; // } /* and other async logic */); // websocketLogger( callback ); -// // We are not sure if this should be done within `websocketLogger()`'s `init` -// // or one level up. -// const loggers = [ -// consoleLogger(), -// websocketLogger(WEBSOCKET_SERVER_URL) -// ] +// We are not sure if this should be done within `websocketLogger()`'s `init` +// or one level up. +const loggers = [ + consoleLogger(), + websocketLogger(WEBSOCKET_SERVER_URL) +] -// loEvent.init('org.mitros.writing_analytics', '0.01', loggers, loEventDebug.LEVEL.SIMPLE); -// loEvent.setFieldSet([loEventUtils.getBrowserInfo(), loEventUtils.fetchDebuggingIdentifier()]); -// loEvent.go() +loEvent.init('org.mitros.writing_analytics', '0.01', loggers, loEventDebug.LEVEL.SIMPLE); +loEvent.setFieldSet([loEventUtils.getBrowserInfo(), loEventUtils.fetchDebuggingIdentifier()]); +loEvent.go(); // Function to serve as replacement for // chrome.extension.getBackgroundPage().console.log(event); because it is not allowed in V3 @@ -157,7 +157,7 @@ chrome.webRequest.onBeforeRequest.addListener( 'bundles': JSON.parse(formdata.bundles), 'rev': formdata.rev, 'timestamp': parseInt(request.timeStamp, 10) - } + }; logFromServiceWorker(event); loEvent.logEvent('google_docs_save', event); } catch(err) { @@ -170,7 +170,7 @@ chrome.webRequest.onBeforeRequest.addListener( 'formdata': formdata, 'rev': formdata.rev, 'timestamp': parseInt(request.timeStamp, 10) - } + }; loEvent.logEvent('google_docs_save_extra', event); } } else if(this_a_google_docs_bind(request)) { @@ -181,7 +181,7 @@ chrome.webRequest.onBeforeRequest.addListener( }, { urls: ["*://docs.google.com/*"] }, ['requestBody'] -) +); // re-injected scripts when chrome extension is reloaded, upgraded or re-installed // https://stackoverflow.com/questions/10994324/chrome-extension-content-script-re-injection-after-upgrade-or-install @@ -203,50 +203,18 @@ async function reinjectContentScripts() { } // Let the server know we've loaded. -// loEvent.logEvent("extension_loaded", {}); - - -async function digest(message, algo = 'SHA-1') { - return Array.from( - new Uint8Array(await crypto.subtle.digest(algo, new TextEncoder().encode(message))), - (byte) => byte.toString(16).padStart(2, '0') - ).join(''); -} - +loEvent.logEvent("extension_loaded", {}); // Send the server the user info. This might not always be available. // HACK: this code will be changed pending server side changes to how we // handle auth and metadata. -loEventUtils.profileInfoWrapper().then(result => { - return digest(result.id).then(hashedId => ({result, hashedId})) -}).then(({result, hashedId}) => { - WEBSOCKET_SERVER_URL += `?student=${hashedId}`; - - // further initialization logic goes here, since the id needs to be retrieved - // and appended to the websocket url before we can connect and send log events to - // the backend - const loggers = [ - consoleLogger(), - websocketLogger(WEBSOCKET_SERVER_URL) - ] - - loEvent.init('org.mitros.writing_analytics', '0.01', loggers, loEventDebug.LEVEL.SIMPLE); - loEvent.setFieldSet([loEventUtils.getBrowserInfo(), loEventUtils.fetchDebuggingIdentifier()]); - loEvent.go() - - loEvent.logEvent("extension_loaded", {}); - +loEventUtils.profileInfoWrapper().then((result) => { if (Object.keys(result).length > 0) { loEvent.logEvent('chrome_identity', { chrome_identity: result }); loEvent.logEvent('metadata_finished', {}) } - - logFromServiceWorker("Loaded"); -}).catch(err => { - logFromServiceWorker(`Failed to connect to backend: ${err}`) }); - // And let the console know we've loaded // chrome.extension.getBackgroundPage().console.log("Loaded"); remove -// logFromServiceWorker("Loaded"); +logFromServiceWorker("Loaded"); diff --git a/learning_observer/learning_observer/main.py b/learning_observer/learning_observer/main.py index 685c3aa46..c966a2c40 100644 --- a/learning_observer/learning_observer/main.py +++ b/learning_observer/learning_observer/main.py @@ -96,8 +96,6 @@ def create_app(): if port is None and runmode == 'dev': port = learning_observer.webapp_helpers.find_open_port() - port = int(args.port) - # Check that everything is configured correctly, # and initialize anything which needs initialization learning_observer.prestartup.startup_checks_and_init() diff --git a/learning_observer/learning_observer/routes.py b/learning_observer/learning_observer/routes.py index b7adf3812..ee91a4cef 100644 --- a/learning_observer/learning_observer/routes.py +++ b/learning_observer/learning_observer/routes.py @@ -4,6 +4,7 @@ import getpass import os +import pmss import secrets import sys @@ -33,6 +34,20 @@ from learning_observer.utility_handlers import * +pmss.register_field( + name='disable_extension_routes', + type=pmss.pmsstypes.TYPES.boolean, + description='Whether to disable extension-related API routes', + default=False +) + +pmss.register_field( + name='disable_dashboard_routes', + type=pmss.pmsstypes.TYPES.boolean, + description='Whether to disable dashboard-related API routes', + default=False +) + def add_routes(app): ''' @@ -62,9 +77,20 @@ def tracemalloc_handler(request): aiohttp.web.get('/debug/tracemalloc/', tracemalloc_handler), ]) - register_dashboard_api(app) + if not settings.pmss_settings.disable_dashboard_routes(types=['server']): + register_dashboard_api(app) + debug_log("Dashbord routes are enabled") + else: + debug_log("Dashboard routes are disabled") + register_static_routes(app) - register_incoming_event_views(app) + + if not settings.pmss_settings.disable_extension_routes(types=['server']): + register_incoming_event_views(app) + debug_log("Extension routes are enabled") + else: + debug_log("Extension routes are disabled") + register_debug_routes(app) learning_observer.google.initialize_and_register_routes(app) diff --git a/modules/lo_dash_react_components/src/lib/components/LOConnection.react.js b/modules/lo_dash_react_components/src/lib/components/LOConnection.react.js index 20940fda9..b453640a3 100644 --- a/modules/lo_dash_react_components/src/lib/components/LOConnection.react.js +++ b/modules/lo_dash_react_components/src/lib/components/LOConnection.react.js @@ -36,7 +36,7 @@ export default class LOConnection extends Component { // Determine url const protocol = {"http:": "ws:", "https:": "wss:"}[window.location.protocol]; - url = url ? url : `${protocol}//${window.location.hostname}:${window.location.port}/wsapi/communication_protocol?source=dashboard`; + url = url ? url : `${protocol}//${window.location.hostname}:${window.location.port}/wsapi/communication_protocol`; this.client = new WebSocket(url); // Listen for events. this.client.onopen = (e) => { From c20ff942c244f651216b69c35555d79d8159b47e Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Thu, 21 Nov 2024 20:53:36 -0500 Subject: [PATCH 09/12] revert irrelevant changes --- extension/writing-process/src/background.js | 10 +++---- learning_observer/learning_observer/main.py | 4 +-- .../learning_observer/settings.py | 28 +++++++------------ learning_observer/util/stream_writing.py | 20 ++----------- 4 files changed, 20 insertions(+), 42 deletions(-) diff --git a/extension/writing-process/src/background.js b/extension/writing-process/src/background.js index 3685b1961..30f05aafb 100644 --- a/extension/writing-process/src/background.js +++ b/extension/writing-process/src/background.js @@ -9,7 +9,7 @@ var RAW_DEBUG = false; /* This variable must be manually updated to specify the server that * the data will be sent to. */ -var WEBSOCKET_SERVER_URL = "ws://localhost:80/wsapi/in?source=extension" +var WEBSOCKET_SERVER_URL = "wss://learning-observer.org/wsapi/in?source=extension" import { googledocs_id_from_url } from './writing_common'; @@ -35,7 +35,7 @@ const loggers = [ loEvent.init('org.mitros.writing_analytics', '0.01', loggers, loEventDebug.LEVEL.SIMPLE); loEvent.setFieldSet([loEventUtils.getBrowserInfo(), loEventUtils.fetchDebuggingIdentifier()]); -loEvent.go(); +loEvent.go() // Function to serve as replacement for // chrome.extension.getBackgroundPage().console.log(event); because it is not allowed in V3 @@ -157,7 +157,7 @@ chrome.webRequest.onBeforeRequest.addListener( 'bundles': JSON.parse(formdata.bundles), 'rev': formdata.rev, 'timestamp': parseInt(request.timeStamp, 10) - }; + } logFromServiceWorker(event); loEvent.logEvent('google_docs_save', event); } catch(err) { @@ -170,7 +170,7 @@ chrome.webRequest.onBeforeRequest.addListener( 'formdata': formdata, 'rev': formdata.rev, 'timestamp': parseInt(request.timeStamp, 10) - }; + } loEvent.logEvent('google_docs_save_extra', event); } } else if(this_a_google_docs_bind(request)) { @@ -181,7 +181,7 @@ chrome.webRequest.onBeforeRequest.addListener( }, { urls: ["*://docs.google.com/*"] }, ['requestBody'] -); +) // re-injected scripts when chrome extension is reloaded, upgraded or re-installed // https://stackoverflow.com/questions/10994324/chrome-extension-content-script-re-injection-after-upgrade-or-install diff --git a/learning_observer/learning_observer/main.py b/learning_observer/learning_observer/main.py index c966a2c40..faa8cf1a6 100644 --- a/learning_observer/learning_observer/main.py +++ b/learning_observer/learning_observer/main.py @@ -55,8 +55,8 @@ # this to prefer the GPU where possible. # spacy.prefer_gpu() # debug_log("Preferring GPU Use.") -# spacy.require_gpu() -# debug_log("Requiring GPU Use.") +spacy.require_gpu() +debug_log("Requiring GPU Use.") def configure_event_loop(): diff --git a/learning_observer/learning_observer/settings.py b/learning_observer/learning_observer/settings.py index bcab7341e..ba3434cf1 100644 --- a/learning_observer/learning_observer/settings.py +++ b/learning_observer/learning_observer/settings.py @@ -25,8 +25,7 @@ prog=__name__, description="A system for monitoring", epilog="For more information, see PMSS documentation.", - rulesets=[pmss.YAMLFileRuleset( - filename=learning_observer.paths.config_file())] + rulesets=[pmss.YAMLFileRuleset(filename=learning_observer.paths.config_file())] ) # If we e.g. `import settings` and `import learning_observer.settings`, we @@ -40,7 +39,6 @@ args = None parser = None - def str_to_bool(arg): if isinstance(arg, bool): return arg @@ -126,14 +124,13 @@ def parse_and_validate_arguments(): RUN_MODES = enum.Enum('RUN_MODES', 'DEV DEPLOY INTERACTIVE') RUN_MODE = None -pmss.parser('run_mode', parent='string', choices=[ - 'dev', 'deploy', 'interactive'], transform=None) +pmss.parser('run_mode', parent='string', choices=['dev', 'deploy', 'interactive'], transform=None) pmss.register_field( name='run_mode', type='run_mode', - description="Set which mode the server is running in.\n" - "`dev` for local development with full debugging\n" - "`deploy` for running on a server with better performance\n" + description="Set which mode the server is running in.\n"\ + "`dev` for local development with full debugging\n"\ + "`deploy` for running on a server with better performance\n"\ "`interactive` for processing data offline", required=True ) @@ -184,19 +181,16 @@ def load_settings(config): elif settings_run_mode == 'interactive': RUN_MODE = RUN_MODES.INTERACTIVE else: - raise ValueError( - "Configuration setting for run_mode must be either 'dev', 'deploy', or 'interactive'") + raise ValueError("Configuration setting for run_mode must be either 'dev', 'deploy', or 'interactive'") if 'repos' in settings: for repo in settings['repos']: # In the future, we might allow dicts if we e.g. want more metadata if isinstance(settings['repos'][repo], str): - learning_observer.paths.register_repo( - repo, settings['repos'][repo]) + learning_observer.paths.register_repo(repo, settings['repos'][repo]) elif isinstance(settings['repos'][repo], dict): # HACK. We should figure out where to stick this. This does not belong in paths - debug_working = settings['repos'][repo].get( - "debug_working", None) + debug_working = settings['repos'][repo].get("debug_working", None) learning_observer.paths.register_repo( repo, @@ -204,8 +198,7 @@ def load_settings(config): debug_working=debug_working ) else: - raise ValueError( - "settings.repos.{repo} should be a string or a dict. Please fix the settings file.".format(repo=repo)) + raise ValueError("settings.repos.{repo} should be a string or a dict. Please fix the settings file.".format(repo=repo)) return settings @@ -235,8 +228,7 @@ def initialized(): # Not all of these are guaranteed to work on every branch of the codebase. -AVAILABLE_FEATURE_FLAGS = ['uvloop', 'watchdog', 'auth_headers_page', - 'merkle', 'save_google_ajax', 'use_google_ajax'] +AVAILABLE_FEATURE_FLAGS = ['uvloop', 'watchdog', 'auth_headers_page', 'merkle', 'save_google_ajax', 'use_google_ajax'] def feature_flag(flag): diff --git a/learning_observer/util/stream_writing.py b/learning_observer/util/stream_writing.py index 3bc590300..b5d02567c 100644 --- a/learning_observer/util/stream_writing.py +++ b/learning_observer/util/stream_writing.py @@ -36,7 +36,6 @@ import random import sys import time -import hashlib ARGS = docopt.docopt(__doc__) @@ -91,12 +90,10 @@ def argument_list(argument, default): if isinstance(list_string, str): list_string = [list_string] * STREAMS if len(list_string) != STREAMS: - print( - f"Failure: {list_string}\nfrom {argument} should make {STREAMS} items") + print(f"Failure: {list_string}\nfrom {argument} should make {STREAMS} items") sys.exit(-1) return list_string - # TODO what is `source_files` supposed to be? # when running this script for the workshop, we should either # 1) move gpt3 texts out of writing observer (dependency hell) OR @@ -106,11 +103,9 @@ def argument_list(argument, default): if ARGS["--gpt3"] is not None: import writing_observer.sample_essays TEXT = writing_observer.sample_essays.GPT3_TEXTS[ARGS["--gpt3"]] - text_to_use = TEXT[0] - TEXT = [text_to_use for _ in range(STREAMS)] + STREAMS = len(TEXT) elif source_files is None: - TEXT = ["\n".join(loremipsum.get_paragraphs( - int(ARGS.get("--text-length", 5)))) for i in range(STREAMS)] + TEXT = ["\n".join(loremipsum.get_paragraphs(int(ARGS.get("--text-length", 5)))) for i in range(STREAMS)] else: TEXT = [open(filename).read() for filename in source_files] @@ -129,7 +124,6 @@ def argument_list(argument, default): None ) - if ARGS['--users'] is not None: USERS = argument_list('--users', None) elif ARGS['--fake-name']: @@ -142,7 +136,6 @@ def argument_list(argument, default): assert len(USERS) == STREAMS, "len(users) != STREAMS." assert len(DOC_IDS) == STREAMS, "len(document IDs) != STREAMS." - def current_millis(): return round(time.time() * 1000) @@ -185,13 +178,6 @@ def identify(user): ] -def str_to_hex(s: str) -> str: - s = s.encode() - sha256_hasher = hashlib.sha256() - sha256_hasher.update(s) - return sha256_hasher.hexdigest() - - async def stream_document(text, ici, user, doc_id): ''' Send a document to the server. From 7511bbe521bee789b59266bcc8a46f66037093f3 Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Thu, 21 Nov 2024 20:54:57 -0500 Subject: [PATCH 10/12] revert another minor but unncessary change (related to importing spacy in main) --- learning_observer/learning_observer/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learning_observer/learning_observer/main.py b/learning_observer/learning_observer/main.py index faa8cf1a6..ef42ca552 100644 --- a/learning_observer/learning_observer/main.py +++ b/learning_observer/learning_observer/main.py @@ -9,7 +9,6 @@ assessment). ''' -import spacy import sys import asyncio @@ -53,6 +52,7 @@ # This will need to move but for the moment we hack with # this to prefer the GPU where possible. +import spacy # spacy.prefer_gpu() # debug_log("Preferring GPU Use.") spacy.require_gpu() From b8eb4822739134db02eff1b147321ba2a64f588f Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Thu, 5 Dec 2024 20:12:34 -0500 Subject: [PATCH 11/12] remove unused port cli argument --- learning_observer/learning_observer/main.py | 4 ++-- learning_observer/learning_observer/settings.py | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/learning_observer/learning_observer/main.py b/learning_observer/learning_observer/main.py index ef42ca552..a48a028ed 100644 --- a/learning_observer/learning_observer/main.py +++ b/learning_observer/learning_observer/main.py @@ -55,8 +55,8 @@ import spacy # spacy.prefer_gpu() # debug_log("Preferring GPU Use.") -spacy.require_gpu() -debug_log("Requiring GPU Use.") +# spacy.require_gpu() +# debug_log("Requiring GPU Use.") def configure_event_loop(): diff --git a/learning_observer/learning_observer/settings.py b/learning_observer/learning_observer/settings.py index ba3434cf1..f9140b3b5 100644 --- a/learning_observer/learning_observer/settings.py +++ b/learning_observer/learning_observer/settings.py @@ -91,11 +91,6 @@ def parse_and_validate_arguments(): help='Launce the Learning Observer application. This can be used with `--ipython-console` and `--ipython-kernel`.', default=True, nargs='?', const=True, type=str_to_bool) - parser.add_argument( - '--port', - default=8888 - ) - args = parser.parse_args() if not os.path.exists(args.config_file): From 23be1c952e01e806841da972dca0bc04999de074 Mon Sep 17 00:00:00 2001 From: Brendon Hablutzel Date: Thu, 5 Dec 2024 20:18:28 -0500 Subject: [PATCH 12/12] create a bash script for running two instances of learning observer --- servermanagement/RunTwoLO.sh | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 servermanagement/RunTwoLO.sh diff --git a/servermanagement/RunTwoLO.sh b/servermanagement/RunTwoLO.sh new file mode 100644 index 000000000..600488341 --- /dev/null +++ b/servermanagement/RunTwoLO.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# =============================== +# This a modified version RunLearningObserver.sh that automatically starts two processes, +# each with a different creds.yaml ('creds-a.yaml' and 'creds-b.yaml')--this should be +# used for static routing, where one creds.yaml has only dashboard routes enabled, +# and the other has only extension routes enabled. Note that these creds.yaml should +# specify different ports. +# +# This bash script provides a simple wrapper to run the +# learning observer service and pipe the data to a logfile +# over time this should be integrated into the systemd +# service process. This uses static variables to specify +# the location of the virtualenv and the command and +# specifies the location for the running logfile. + +# System Variables +# -------------------------------------- +VIRTUALENV_PATH="/usr/local/share/projects/WritingObserver/VirtualENVs/WOvenv" +#VIRTUALENV_PYTHON="/usr/local/share/Projects/WritingObserver/VirtualENVs/learning_observer/bin/python3.9" +LEARNING_OBSERVER_LOC="/usr/local/share/projects/WritingObserver/Repositories/ArgLab_writing_observer/learning_observer" +LOGFILE_DEST="/usr/local/share/projects/WritingObserver/Repositories/ArgLab_writing_observer/learning_observer/learning_observer/logs" + +# Make the logfile name +# --------------------------------------- +LOG_DATE=$(date "+%m-%d-%Y--%H-%M-%S") +LOGFILE_NAME="$LOGFILE_DEST/learning_observer_service_$LOG_DATE.log" +echo $LOG_NAME; + + +# Run both processes +# -------------------------------------- +echo "Running Learning Observer Service..." +cd $LEARNING_OBSERVER_LOC +source $VIRTUALENV_PATH/bin/activate +nohup python learning_observer --config-file=creds-a.yaml > $LOGFILE_NAME 2>&1 & +PROCESS_ID=$! +echo $PROCESS_ID > $LOGFILE_DEST/run.pid + +# NOTE: if this should go to separate log file location, modify here +nohup python learning_observer --config-file=creds-b.yaml > $LOGFILE_NAME 2>&1 & +PROCESS_ID=$! +echo $PROCESS_ID > $LOGFILE_DEST/run.pid + +# Set the number of allowed open files to something large 8192 +prlimit --pid $PROCESS_ID --nofile=8192