diff --git a/.gitignore b/.gitignore index d951ebd57..1ad3eaf7a 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,5 @@ LanguageTool-5.4 package-lock.json learning_observer/learning_observer/static_data/google/ learning_observer/learning_observer/static_data/admins.yaml -.ipynb_checkpoints/ \ No newline at end of file +.ipynb_checkpoints/ + diff --git a/docs/lms_integrations/canvas.md b/docs/lms_integrations/canvas.md new file mode 100644 index 000000000..2f15f3038 --- /dev/null +++ b/docs/lms_integrations/canvas.md @@ -0,0 +1,107 @@ +## Canvas LMS Documentation: +Reference: https://canvas.instructure.com/doc/api/file.oauth.html + +This guide will walk you through the process of obtaining the `client_id`, `client_secret`, and `refresh_token` for interacting with the Canvas LMS API. These credentials are essential for making authenticated API requests to Canvas. + +### Prerequisites + +- You need to have administrator access to the Canvas LMS instance. + +### Steps to Obtain the `client_id` and `client_secret` + +1. **Log in to Your Canvas LMS Account**: + - Go to your Canvas LMS instance and log in with your administrator credentials. + +2. **Navigate to the Developer Keys Section**: + - From the Canvas dashboard, click on the **Admin** panel located on the left-hand side. + - Select the specific account (usually your institution's name) where you want to manage developer keys. + - Scroll down and click on **Developer Keys** in the left-hand menu under the **Settings** section. + +3. **Create a New Developer Key**: + - In the Developer Keys section, click the **+ Developer Key** button at the top-right corner. + - Choose **API Key** from the dropdown menu. + +4. **Fill Out the Developer Key Details**: + - **Name**: Enter a name for the Developer Key (e.g., "My Canvas API Integration"). + - **Owner's Email**: Enter administrator's email. + - **Redirect URIs**: Provide the redirect URI that will handle OAuth callbacks. This is typically a URL on your institution server where you handle OAuth responses. + +5. **Save and Enable the Developer Key**: + - After filling out the required information, click **Save Key**. + - Ensure the key is **enabled** by toggling the switch next to your newly created key. + +6. **Obtain the `client_id` and `client_secret`**: + - After saving, your `client_id` and `client_secret` will be displayed in the list of developer keys. + - **Client ID**: This is usually displayed as a numeric value in the details column. + - **Client Secret**: Click on the `show key` button and it will display the `client_secret`. + +### Steps to Obtain the `refresh_token` + +1. **Redirect User to Canvas Authorization Endpoint**: + - To obtain the `refresh_token`, you need to perform an OAuth flow. + - Direct the user to the Canvas OAuth authorization endpoint: + ``` + https://canvas.instructure.com/login/oauth2/auth?client_id=YOUR_CLIENT_ID&response_type=code&redirect_uri=YOUR_REDIRECT_URI + ``` + - Replace `YOUR_CLIENT_ID` with the `client_id` obtained earlier and `YOUR_REDIRECT_URI` with the redirect URI you configured. + +2. **User Authorizes the Application**: + - The user will be prompted to log in (if not already logged in) and authorize the application to access their Canvas data. + +3. **Handle the Authorization Code**: + - After the user authorizes the application, they will be redirected to the `redirect_uri` you provided, with an authorization `code` appended as a query parameter. + - Example: `https://your-redirect-uri.com?code=AUTHORIZATION_CODE` + +4. **Exchange the Authorization Code for a Refresh Token**: + - Use the authorization `code` to request an access token and refresh token by making a POST request to the Canvas token endpoint: + ``` + POST https://canvas.instructure.com/login/oauth2/token + ``` + - Include the following parameters in the request body: + - `client_id`: Your Canvas `client_id` + - `client_secret`: Your Canvas `client_secret` + - `redirect_uri`: Your `redirect_uri` used in the authorization request + - `code`: The authorization code you received + - `grant_type`: Set this to `authorization_code` + + - Example of the POST request in `curl`: + ```bash + curl -X POST https://canvas.instructure.com/login/oauth2/token \ + -F 'client_id=YOUR_CLIENT_ID' \ + -F 'client_secret=YOUR_CLIENT_SECRET' \ + -F 'redirect_uri=YOUR_REDIRECT_URI' \ + -F 'code=AUTHORIZATION_CODE' \ + -F 'grant_type=authorization_code' + ``` + +5. **Extract the Refresh Token**: + - The response to the token request will include an `access_token`, a `refresh_token`, and other token information. + - **Refresh Token**: This token can be used to obtain new access tokens without requiring the user to re-authorize. + +### Example JSON Response from Token Request + +```json +{ + "access_token": "ACCESS_TOKEN", + "token_type": "Bearer", + "expires_in": 3600, + "refresh_token": "REFRESH_TOKEN", + "user": { + "id": 12345, + "name": "John Doe", + "sortable_name": "Doe, John", + "short_name": "John" + } +} +``` + +- **`refresh_token`**: The value you will need to store securely for future use. + +### Important Notes + +- **Security**: The `client_id`, `client_secret`, and `refresh_token` should be stored securely. Do not expose them in client-side code or public repositories. +- **Token Expiration**: The `access_token` typically expires after a short period (e.g., 1 hour). The `refresh_token` does not expire as quickly and can be used to obtain new `access_token`s. + +### Conclusion + +By following these steps, you will obtain the necessary credentials (`client_id`, `client_secret`, and `refresh_token`) to interact with the Canvas LMS API programmatically. These credentials are essential for making authenticated requests to access and manage Canvas resources through the API. \ No newline at end of file diff --git a/learning_observer/learning_observer/auth/handlers.py b/learning_observer/learning_observer/auth/handlers.py index 4506afdaa..11575ed6e 100644 --- a/learning_observer/learning_observer/auth/handlers.py +++ b/learning_observer/learning_observer/auth/handlers.py @@ -55,8 +55,11 @@ async def user_from_session(request): ''' session = await aiohttp_session.get_session(request) session_user = session.get(constants.USER, None) - if constants.AUTH_HEADERS in session: - request[constants.AUTH_HEADERS] = session[constants.AUTH_HEADERS] + header_keys = [constants.AUTH_HEADERS, constants.CANVAS_AUTH_HEADERS] + # Set headers in the request if they exist in the session + for key in header_keys: + if key in session: + request[key] = session[key] return session_user diff --git a/learning_observer/learning_observer/auth/social_sso.py b/learning_observer/learning_observer/auth/social_sso.py index 6d47d7b5a..f2d79bda1 100644 --- a/learning_observer/learning_observer/auth/social_sso.py +++ b/learning_observer/learning_observer/auth/social_sso.py @@ -67,22 +67,33 @@ pmss.register_field( name="client_id", type=pmss.pmsstypes.TYPES.string, - description="The Google OAuth client ID", + description="The Google/Canvas OAuth client ID", required=True ) pmss.register_field( name="client_secret", type=pmss.pmsstypes.TYPES.string, - description="The Google OAuth client secret", + description="The Google/Canvas OAuth client secret", required=True ) pmss.register_field( name='fetch_additional_info_from_teacher_on_login', type=pmss.pmsstypes.TYPES.boolean, - description='Whether we should start an additional task that will '\ - 'fetch all text from current rosters.', + description='Whether we should start an additional task that will fetch all text from current rosters.', default=False ) +pmss.register_field( + name="token_uri", + type=pmss.pmsstypes.TYPES.string, + description="The Canvas OAuth token uri", + required=True +) +pmss.register_field( + name="refresh_token", + type=pmss.pmsstypes.TYPES.string, + description="The Canvas OAuth refresh token", + required=True +) DEFAULT_GOOGLE_SCOPES = [ @@ -128,7 +139,11 @@ async def social_handler(request): "We only handle Google logins. Non-google Provider" ) - user = await _google(request) + user = await _handle_google_authorization(request) + + roster_source = settings.pmss_settings.source(types=['roster_data']) + + await _set_lms_header_information(request, roster_source) if constants.USER_ID in user: await learning_observer.auth.utils.update_session_user_info(request, user) @@ -143,6 +158,21 @@ async def social_handler(request): return aiohttp.web.HTTPFound(url) +async def _set_lms_header_information(request, roster_source): + """ + Handles the authorization of the specified Learning Management System (LMS) + based on the roster data source and delegating the request to the appropriate handler + based on the data source type. + """ + lms_map = { + constants.CANVAS: _handle_canvas_authorization + } + + # Handle the request depending on the roster source + if roster_source in lms_map: + return await lms_map[roster_source](request) + + async def _store_teacher_info_for_background_process(id, request): '''HACK this code stores 2 pieces of information when teacher logs in with a social handlers. @@ -211,7 +241,37 @@ async def _process_student_documents(student): # TODO saved skipped doc ids somewhere? -async def _google(request): +async def _handle_canvas_authorization(request): + ''' + Handle Canvas authorization + ''' + if 'error' in request.query: + return {} + + token_uri = settings.pmss_settings.token_uri(types=['lms', 'canvas_oauth']) + url = token_uri + + params = { + "grant_type": "refresh_token", + 'client_id': settings.pmss_settings.client_id(types=['lms', 'canvas_oauth']), + 'client_secret': settings.pmss_settings.client_secret(types=['lms', 'canvas_oauth']), + "refresh_token": settings.pmss_settings.refresh_token(types=['lms', 'canvas_oauth']) + } + async with aiohttp.ClientSession(loop=request.app.loop) as client: + async with client.post(url, data=params) as resp: + data = await resp.json() + assert 'access_token' in data, data + + # get user profile + canvas_headers = {'Authorization': 'Bearer ' + data['access_token']} + session = await aiohttp_session.get_session(request) + session[constants.CANVAS_AUTH_HEADERS] = canvas_headers + request[constants.CANVAS_AUTH_HEADERS] = canvas_headers + + return data + + +async def _handle_google_authorization(request): ''' Handle Google login ''' diff --git a/learning_observer/learning_observer/canvas.py b/learning_observer/learning_observer/canvas.py new file mode 100755 index 000000000..de94161ee --- /dev/null +++ b/learning_observer/learning_observer/canvas.py @@ -0,0 +1,67 @@ +import functools + +import learning_observer.auth +import learning_observer.lms_integration +import learning_observer.constants as constants + + +LMS_NAME = constants.CANVAS + +CANVAS_ENDPOINTS = list(map(lambda x: learning_observer.lms_integration.Endpoint(*x, "", None, LMS_NAME), [ + ("course_list", "/courses"), + ("course_roster", "/courses/{courseId}/students"), + ("course_assignments", "/courses/{courseId}/assignments"), + ("course_assignments_submissions", "/courses/{courseId}/assignments/{assignmentId}/submissions"), +])) + +register_cleaner_with_endpoints = functools.partial(learning_observer.lms_integration.register_cleaner, endpoints=CANVAS_ENDPOINTS) + + +class CanvasLMS(learning_observer.lms_integration.LMS): + def __init__(self): + super().__init__(lms_name=LMS_NAME, endpoints=CANVAS_ENDPOINTS) + + @register_cleaner_with_endpoints("course_roster", "roster") + def clean_course_roster(canvas_json): + students = canvas_json + students_updated = [] + for student_json in students: + canvas_id = student_json['id'] + integration_id = student_json['integration_id'] + local_id = learning_observer.auth.google_id_to_user_id(integration_id) + student = { + "course_id": "1", + "user_id": local_id, + "profile": { + "id": canvas_id, + "name": { + "given_name": student_json['name'], + "family_name": student_json['name'], + "full_name": student_json['name'] + } + } + } + if 'external_ids' not in student_json: + student_json['external_ids'] = [] + student_json['external_ids'].append({"source": constants.CANVAS, "id": integration_id}) + students_updated.append(student) + return students_updated + + @register_cleaner_with_endpoints("course_list", "courses") + def clean_course_list(canvas_json): + courses = canvas_json + courses.sort(key=lambda x: x.get('name', 'ZZ')) + return courses + + @register_cleaner_with_endpoints("course_assignments", "assignments") + def clean_course_assignment_list(canvas_json): + assignments = canvas_json + assignments.sort(key=lambda x: x.get('name', 'ZZ')) + return assignments + + +canvas_lms = CanvasLMS() + + +def initialize_canvas_routes(app): + canvas_lms.initialize_routes(app) diff --git a/learning_observer/learning_observer/constants.py b/learning_observer/learning_observer/constants.py index 6a923793c..070c29b20 100644 --- a/learning_observer/learning_observer/constants.py +++ b/learning_observer/learning_observer/constants.py @@ -10,6 +10,7 @@ ''' # used in request headers to hold auth information AUTH_HEADERS = 'auth_headers' +CANVAS_AUTH_HEADERS = 'canvas_auth_headers' # used for storing impersonation information in session IMPERSONATING_AS = 'impersonating_as' @@ -17,3 +18,7 @@ USER = 'user' # common user id reference for user object USER_ID = 'user_id' + +# used to identify LMSes +GOOGLE = 'google' +CANVAS = 'canvas' diff --git a/learning_observer/learning_observer/creds.yaml.example b/learning_observer/learning_observer/creds.yaml.example index 33d06c722..0bc3e4070 100644 --- a/learning_observer/learning_observer/creds.yaml.example +++ b/learning_observer/learning_observer/creds.yaml.example @@ -101,3 +101,10 @@ modules: writing_observer: use_nlp: false openai_api_key: '' # can also be set with OPENAI_API_KEY environment variable +lms: + canvas_oauth: + lms_api: {canvas-lms-api} + token_uri: {canvas-token-uri} + client_id: {canvas-client-id} + client_secret: {canvas-client-secret} + refresh_token: {canvas-refresh-token} \ No newline at end of file diff --git a/learning_observer/learning_observer/google.py b/learning_observer/learning_observer/google.py index 35df5dd99..831298244 100644 --- a/learning_observer/learning_observer/google.py +++ b/learning_observer/learning_observer/google.py @@ -21,16 +21,10 @@ analysis. ''' -import collections import itertools import json -import recordclass -import string import re - -import aiohttp -import aiohttp.web -import aiohttp_session +import functools import learning_observer.constants as constants import learning_observer.settings as settings @@ -39,9 +33,11 @@ import learning_observer.auth import learning_observer.runtime import learning_observer.prestartup +import learning_observer.lms_integration cache = None +LMS_NAME = constants.GOOGLE GOOGLE_FIELDS = [ @@ -59,38 +55,7 @@ GOOGLE_TO_SNAKE = {field: camel_to_snake.sub('_', field).lower() for field in GOOGLE_FIELDS} -# These took a while to find, but many are documented here: -# https://developers.google.com/drive/api/v3/reference/ -# This list might change. Many of these contain additional (optional) parameters -# which we might add later. This is here for debugging, mostly. We'll stabilize -# APIs later. -class Endpoint(recordclass.make_dataclass("Endpoint", ["name", "remote_url", "doc", "cleaners"], defaults=["", None])): - def arguments(self): - return extract_parameters_from_format_string(self.remote_url) - - def _local_url(self): - parameters = "}/{".join(self.arguments()) - base_url = f"/google/{self.name}" - if len(parameters) == 0: - return base_url - else: - return base_url + "/{" + parameters + "}" - - def _add_cleaner(self, name, cleaner): - if self.cleaners is None: - self.cleaners = dict() - self.cleaners[name] = cleaner - if 'local_url' not in cleaner: - cleaner['local_url'] = self._local_url + "/" + name - - def _cleaners(self): - if self.cleaners is None: - return [] - else: - return self.cleaners - - -ENDPOINTS = list(map(lambda x: Endpoint(*x), [ +GOOGLE_ENDPOINTS = list(map(lambda x: learning_observer.lms_integration.Endpoint(*x, "", None, LMS_NAME), [ ("document", "https://docs.googleapis.com/v1/documents/{documentId}"), ("course_list", "https://classroom.googleapis.com/v1/courses"), ("course_roster", "https://classroom.googleapis.com/v1/courses/{courseId}/students"), @@ -104,281 +69,7 @@ def _cleaners(self): ("drive_revisions", "https://www.googleapis.com/drive/v3/files/{documentId}/revisions") ])) - -def extract_parameters_from_format_string(format_string): - ''' - Extracts parameters from a format string. E.g. - - >>> ("hello {hi} my {bye}")] - ['hi', 'bye'] - ''' - # The parse returns a lot of context, which we discard. In particular, the - # last item is often about the suffix after the last parameter and may be - # `None` - return [f[1] for f in string.Formatter().parse(format_string) if f[1] is not None] - - -async def raw_google_ajax(runtime, target_url, **kwargs): - ''' - Make an AJAX call to Google, managing auth + auth. - - * runtime is a Runtime class containing request information. - * default_url is typically grabbed from ENDPOINTS - * ... and we pass the named parameters - ''' - request = runtime.get_request() - url = target_url.format(**kwargs) - user = await learning_observer.auth.get_active_user(request) - if constants.AUTH_HEADERS not in request: - raise aiohttp.web.HTTPUnauthorized(text="Please log in") # TODO: Consistent way to flag this - - cache_key = "raw_google/" + learning_observer.auth.encode_id('session', user[constants.USER_ID]) + '/' + learning_observer.util.url_pathname(url) - if settings.feature_flag('use_google_ajax') is not None: - value = await cache[cache_key] - if value is not None: - return learning_observer.util.translate_json_keys( - json.loads(value), - GOOGLE_TO_SNAKE - ) - async with aiohttp.ClientSession(loop=request.app.loop) as client: - async with client.get(url, headers=request[constants.AUTH_HEADERS]) as resp: - response = await resp.json() - learning_observer.log_event.log_ajax(target_url, response, request) - if settings.feature_flag('use_google_ajax') is not None: - await cache.set(cache_key, json.dumps(response, indent=2)) - return learning_observer.util.translate_json_keys( - response, - GOOGLE_TO_SNAKE - ) - - -def raw_access_partial(remote_url, name=None): - ''' - This is a helper which allows us to create a function which calls specific - Google APIs. - - To test this, try: - - print(await raw_document(request, documentId="some_google_doc_id")) - ''' - async def caller(request, **kwargs): - ''' - Make an AJAX request to Google - ''' - return await raw_google_ajax(request, remote_url, **kwargs) - setattr(caller, "__qualname__", name) - - return caller - - -@learning_observer.prestartup.register_startup_check -def connect_to_google_cache(): - '''Setup cache for requests to the Google API. - The cache is currently only used with the `use_google_ajax` - feature flag. - ''' - if 'google_routes' not in settings.settings['feature_flags']: - return - - for key in ['save_google_ajax', 'use_google_ajax', 'save_clean_ajax', 'use_clean_ajax']: - if key in settings.settings['feature_flags']: - global cache - try: - cache = learning_observer.kvs.KVS.google_cache() - except AttributeError: - error_text = 'The google_cache KVS is not configured.\n'\ - 'Please add a `google_cache` kvs item to the `kvs` '\ - 'key in `creds.yaml`.\n'\ - '```\ngoogle_cache:\n type: filesystem\n path: ./learning_observer/static_data/google\n'\ - ' subdirs: true\n```\nOR\n'\ - '```\ngoogle_cache:\n type: redis_ephemeral\n expiry: 600\n```' - raise learning_observer.prestartup.StartupCheck("Google KVS: " + error_text) - - -def initialize_and_register_routes(app): - ''' - This is a big 'ol function which might be broken into smaller ones at some - point. We: - - - Created debug routes to pass through AJAX requests to Google - - Created production APIs to have access to cleaned versions of said data - - Create local function calls to call from other pieces of code - within process - - We probably don't need all of this in production, but a lot of this is - very important for debugging. Having APIs is more useful than it looks, since - making use of Google APIs requires a lot of infrastructure (registering - apps, auth/auth, etc.) which we already have in place on dev / debug servers. - ''' - # # For now, all of this is behind one big feature flag. In the future, - # # we'll want seperate ones for the debugging tools and the production - # # staff - # if 'google_routes' not in settings.settings['feature_flags']: - # return - - # Provide documentation on what we're doing - app.add_routes([ - aiohttp.web.get("/google", api_docs_handler) - ]) - - def make_ajax_raw_handler(remote_url): - ''' - This creates a handler to forward Google requests to the client. It's used - for debugging right now. We should think through APIs before relying on this. - ''' - async def ajax_passthrough(request): - ''' - And the actual handler.... - ''' - runtime = learning_observer.runtime.Runtime(request) - response = await raw_google_ajax( - runtime, - remote_url, - **request.match_info - ) - - return aiohttp.web.json_response(response) - return ajax_passthrough - - def make_cleaner_handler(raw_function, cleaner_function, name=None): - async def cleaner_handler(request): - ''' - ''' - response = cleaner_function( - await raw_function(request, **request.match_info) - ) - if isinstance(response, dict) or isinstance(response, list): - return aiohttp.web.json_response( - response - ) - elif isinstance(response, str): - return aiohttp.web.Response( - text=response - ) - else: - raise AttributeError(f"Invalid response type: {type(response)}") - if name is not None: - setattr(cleaner_handler, "__qualname__", name + "_handler") - - return cleaner_handler - - def make_cleaner_function(raw_function, cleaner_function, name=None): - async def cleaner_local(request, **kwargs): - google_response = await raw_function(request, **kwargs) - clean = cleaner_function(google_response) - return clean - if name is not None: - setattr(cleaner_local, "__qualname__", name) - return cleaner_local - - for e in ENDPOINTS: - function_name = f"raw_{e.name}" - raw_function = raw_access_partial(remote_url=e.remote_url, name=e.name) - globals()[function_name] = raw_function - cleaners = e._cleaners() - for c in cleaners: - app.add_routes([ - aiohttp.web.get( - cleaners[c]['local_url'], - make_cleaner_handler( - raw_function, - cleaners[c]['function'], - name=cleaners[c]['name'] - ) - ) - ]) - globals()[cleaners[c]['name']] = make_cleaner_function( - raw_function, - cleaners[c]['function'], - name=cleaners[c]['name'] - ) - app.add_routes([ - aiohttp.web.get( - e._local_url(), - make_ajax_raw_handler(e.remote_url) - ) - ]) - - -def api_docs_handler(request): - ''' - Return a list of available endpoints. - - Eventually, we should also document available function calls - ''' - response = "URL Endpoints:\n\n" - for endpoint in ENDPOINTS: - response += f"{endpoint._local_url()}\n" - cleaners = endpoint._cleaners() - for c in cleaners: - response += f" {cleaners[c]['local_url']}\n" - response += "\n\n Globals:" - if False: - response += str(globals()) - return aiohttp.web.Response(text=response) - - -def register_cleaner(data_source, cleaner_name): - ''' - This will register a cleaner function, for export both as a web service - and as a local function call. - ''' - def decorator(f): - found = False - for endpoint in ENDPOINTS: - if endpoint.name == data_source: - found = True - endpoint._add_cleaner( - cleaner_name, - { - 'function': f, - 'local_url': f'{endpoint._local_url()}/{cleaner_name}', - 'name': cleaner_name - } - ) - - if not found: - raise AttributeError(f"Data source {data_source} invalid; not found in endpoints.") - return f - - return decorator - - -# Rosters -@register_cleaner("course_roster", "roster") -def clean_course_roster(google_json): - ''' - Retrieve the roster for a course, alphabetically - ''' - students = google_json.get('students', []) - students.sort( - key=lambda x: x.get('name', {}).get('fullName', 'ZZ'), - ) - # Convert Google IDs to internal ideas (which are the same, but with a gc- prefix) - for student_json in students: - google_id = student_json['profile']['id'] - local_id = learning_observer.auth.google_id_to_user_id(google_id) - student_json[constants.USER_ID] = local_id - del student_json['profile']['id'] - - # For the present there is only one external id so we will add that directly. - if 'external_ids' not in student_json['profile']: - student_json['profile']['external_ids'] = [] - student_json['profile']['external_ids'].append({"source": "google", "id": google_id}) - return students - - -@register_cleaner("course_list", "courses") -def clean_course_list(google_json): - ''' - Google's course list is one object deeper than we'd like, and alphabetic - sort order is nicer. This will clean it up a bit - ''' - courses = google_json.get('courses', []) - courses.sort( - key=lambda x: x.get('name', 'ZZ'), - ) - return courses +register_cleaner_with_endpoints = functools.partial(learning_observer.lms_integration.register_cleaner, endpoints=GOOGLE_ENDPOINTS) # Google Docs @@ -405,72 +96,146 @@ def get_error_details(error): return {'error': {'code': code, 'message': message}} -@register_cleaner("document", "doctext") -def extract_text_from_google_doc_json( - j, align=True, - EXTRACT_DEBUG_CHECKS=False): - ''' - Extract text from a Google Docs JSON object, ignoring formatting. +class GoogleLMS(learning_observer.lms_integration.LMS): + def __init__(self): + super().__init__(lms_name=LMS_NAME, endpoints=GOOGLE_ENDPOINTS) - There is an alignment issue between Google's and Python's handling - of Unicode. We can either: - * extract text faithfully (align=False) - * extract text with aligned indexes by cutting text / adding - spaces (align=True) + # Rosters + @register_cleaner_with_endpoints("course_roster", "roster") + def clean_course_roster(google_json): + ''' + Retrieve the roster for a course, alphabetically + ''' + students = google_json.get('students', []) + students.sort( + key=lambda x: x.get('name', {}).get('fullName', 'ZZ'), + ) + # Convert Google IDs to internal ideas (which are the same, but with a gc- prefix) + for student_json in students: + google_id = student_json['profile']['id'] + local_id = learning_observer.auth.google_id_to_user_id(google_id) + student_json[constants.USER_ID] = local_id + del student_json['profile']['id'] + + # For the present there is only one external id so we will add that directly. + if 'external_ids' not in student_json['profile']: + student_json['profile']['external_ids'] = [] + student_json['profile']['external_ids'].append({"source": constants.GOOGLE, "id": google_id}) + return students + + @register_cleaner_with_endpoints("course_list", "courses") + def clean_course_list(google_json): + ''' + Google's course list is one object deeper than we'd like, and alphabetic + sort order is nicer. This will clean it up a bit + ''' + courses = google_json.get('courses', []) + courses.sort( + key=lambda x: x.get('name', 'ZZ'), + ) + return courses + + @register_cleaner_with_endpoints("document", "doctext") + def extract_text_from_google_doc_json( + j, align=True, + EXTRACT_DEBUG_CHECKS=False): + ''' + Extract text from a Google Docs JSON object, ignoring formatting. - This issue came up in text with a Russian flag unicode symbol - (referencing the current conflict). I tried various encodings, - and none quite matched Google 100%. + There is an alignment issue between Google's and Python's handling + of Unicode. We can either: + * extract text faithfully (align=False) + * extract text with aligned indexes by cutting text / adding + spaces (align=True) - Note that align=True doesn't necessarily give perfect local alignment - within text chunks, since we do have different lengths for something like - this flag. It does work okay globally. - ''' - # return error message for text - if 'error' in j: - return get_error_details(j['error']) - length = j['body']['content'][-1]['endIndex'] - elements = [a.get('paragraph', {}).get('elements', []) for a in j['body']['content']] - flat = sum(elements, []) - text_chunks = [f['textRun']['content'] for f in flat] - if align: - lengths = [f['endIndex'] - f['startIndex'] for f in flat] - text_chunks = [_force_text_length(chunk, length) for chunk, length in zip(text_chunks, lengths)] - text = ''.join(text_chunks) - - if EXTRACT_DEBUG_CHECKS: - print("Text length versus Google length:") - print(len(text), length) - print("We expect these to be off by one, since Google seems to starts at 1 (and Python at 0)") + This issue came up in text with a Russian flag unicode symbol + (referencing the current conflict). I tried various encodings, + and none quite matched Google 100%. + + Note that align=True doesn't necessarily give perfect local alignment + within text chunks, since we do have different lengths for something like + this flag. It does work okay globally. + ''' + # return error message for text + if 'error' in j: + return get_error_details(j['error']) + length = j['body']['content'][-1]['endIndex'] + elements = [a.get('paragraph', {}).get('elements', []) for a in j['body']['content']] + flat = sum(elements, []) + text_chunks = [f.get('textRun', {}).get('content', '') for f in flat] if align: - print - print("Offsets (these should match):") - print(list(zip(itertools.accumulate(map(len, text_chunks)), itertools.accumulate(lengths)))) + for f in flat: + text = f.get('textRun', {}).get('content', None) + if text is not None: + length = f['endIndex'] - f['startIndex'] + text_chunks.append(_force_text_length(text, length)) + else: + for f in flat: + text = f.get('textRun', {}).get('content', None) + if text is not None: + text_chunks.append(text) + text = ''.join(text_chunks) + + if EXTRACT_DEBUG_CHECKS: + print("Text length versus Google length:") + print(len(text), length) + print("We expect these to be off by one, since Google seems to starts at 1 (and Python at 0)") + if align: + print + print("Offsets (these should match):") + print(list(zip(itertools.accumulate(map(len, text_chunks)), itertools.accumulate(lengths)))) + + return {'text': text} + + @register_cleaner_with_endpoints("coursework_submissions", "assigned_docs") + def clean_assignment_docs(google_json): + ''' + Retrieve set of documents per student associated with an assignment + ''' + student_submissions = google_json.get('studentSubmissions', []) + for student_json in student_submissions: + google_id = student_json[constants.USER_ID] + local_id = learning_observer.auth.google_id_to_user_id(google_id) + student_json[constants.USER_ID] = local_id + docs = [d['driveFile'] for d in learning_observer.util.get_nested_dict_value(student_json, 'assignmentSubmission.attachments', []) if 'driveFile' in d] + student_json['documents'] = docs + # TODO we should probably remove some of the keys provided + return student_submissions + + @learning_observer.prestartup.register_startup_check + def connect_to_google_cache(): + '''Setup cache for requests to the Google API. + The cache is currently only used with the `use_google_ajax` + feature flag. + ''' + if 'google_routes' not in settings.settings['feature_flags']: + return - return {'text': text} + for key in ['save_google_ajax', 'use_google_ajax', 'save_clean_ajax', 'use_clean_ajax']: + if key in settings.settings['feature_flags']: + global cache + try: + cache = learning_observer.kvs.KVS.google_cache() + except AttributeError: + error_text = 'The google_cache KVS is not configured.\n'\ + 'Please add a `google_cache` kvs item to the `kvs` '\ + 'key in `creds.yaml`.\n'\ + '```\ngoogle_cache:\n type: filesystem\n path: ./learning_observer/static_data/google\n'\ + ' subdirs: true\n```\nOR\n'\ + '```\ngoogle_cache:\n type: redis_ephemeral\n expiry: 600\n```' + raise learning_observer.prestartup.StartupCheck("Google KVS: " + error_text) -@register_cleaner("coursework_submissions", "assigned_docs") -def clean_assignment_docs(google_json): - ''' - Retrieve set of documents per student associated with an assignment - ''' - student_submissions = google_json.get('studentSubmissions', []) - for student_json in student_submissions: - google_id = student_json[constants.USER_ID] - local_id = learning_observer.auth.google_id_to_user_id(google_id) - student_json[constants.USER_ID] = local_id - docs = [d['driveFile'] for d in learning_observer.util.get_nested_dict_value(student_json, 'assignmentSubmission.attachments', []) if 'driveFile' in d] - student_json['documents'] = docs - # TODO we should probably remove some of the keys provided - return student_submissions +google_lms = GoogleLMS() + + +def initialize_google_routes(app): + google_lms.initialize_routes(app) if __name__ == '__main__': import json import sys j = json.load(open(sys.argv[1])) - # extract_text_from_google_doc_json(j, align=False, EXTRACT_DEBUG_CHECKS=True) - # extract_text_from_google_doc_json(j, align=True, EXTRACT_DEBUG_CHECKS=True) - output = clean_assignment_docs(j) + output = google_lms.clean_assignment_docs(j) print(json.dumps(output, indent=2)) diff --git a/learning_observer/learning_observer/lms_integration.py b/learning_observer/learning_observer/lms_integration.py new file mode 100644 index 000000000..4a8fbef68 --- /dev/null +++ b/learning_observer/learning_observer/lms_integration.py @@ -0,0 +1,487 @@ +import json +import recordclass +import string +import aiohttp +import aiohttp.web + +import learning_observer +import learning_observer.runtime +import learning_observer.google +import learning_observer.constants as constants +import learning_observer.settings as settings +import pmss + +pmss.register_field( + name="lms_api", + type=pmss.pmsstypes.TYPES.string, + description="The Canvas Base API URL", + required=True +) + +cache = None + + +class Endpoint(recordclass.make_dataclass("Endpoint", ["name", "remote_url", "doc", "cleaners", "lms"], defaults=["", None])): + """ + The Endpoint class represents an API endpoint, allowing for parameter extraction, + URL construction, and cleaner (function) management. + + Attributes: + name (str): The name of the endpoint. + remote_url (str): The remote URL of the endpoint, which may contain parameters. + doc (str): Documentation or description of the endpoint. + cleaners (dict): A dictionary of cleaner functions associated with the endpoint. + lms (str): The learning management system (LMS) that the endpoint belongs to. + """ + + def arguments(self): + """ + Extracts the parameters from the remote URL. + + Returns: + list: A list of parameters extracted from the remote_url. + """ + return extract_parameters_from_format_string(self.remote_url) + + def _local_url(self): + """ + Constructs the local URL based on the LMS, endpoint name, and any parameters. + + Returns: + str: The constructed local URL in the format "/{lms}/{name}/{parameters}". + If there are no parameters, the URL will be "/{lms}/{name}". + """ + parameters = "}/{".join(self.arguments()) + base_url = f"/{self.lms}/{self.name}" + if len(parameters) == 0: + return base_url + else: + return base_url + "/{" + parameters + "}" + + def _add_cleaner(self, name, cleaner): + """ + Adds a cleaner function to the endpoint, assigning it a name. If the cleaner + doesn't have a local URL, one is generated. + + Args: + name (str): The name to associate with the cleaner. + cleaner (dict): The cleaner function to be added, optionally containing + additional metadata such as its local URL. + """ + if self.cleaners is None: + self.cleaners = dict() + self.cleaners[name] = cleaner + if 'local_url' not in cleaner: + cleaner['local_url'] = self._local_url + "/" + name + + def _cleaners(self): + """ + Retrieves the list of cleaner functions associated with the endpoint. + + Returns: + list: A list of cleaner functions, or an empty list if no cleaners exist. + """ + if self.cleaners is None: + return [] + else: + return self.cleaners + + +def extract_parameters_from_format_string(format_string): + ''' + Extracts parameters from a format string. E.g. + >>> ("hello {hi} my {bye}")] + ['hi', 'bye'] + + Args: + format_string (str): The format string containing parameters enclosed in braces. + + Returns: + list: A list of parameter names extracted from the format string. + ''' + return [f[1] for f in string.Formatter().parse(format_string) if f[1] is not None] + + +def raw_access_partial(raw_ajax_function, target_url, name=None): + ''' + Creates an asynchronous function that calls a specific LMS API. + + This helper function allows you to wrap an AJAX function to easily + call a specific API endpoint. + + Args: + raw_ajax_function (callable): The function to be called for making the AJAX request. + target_url (str): The target URL for the API call. + name (str, optional): The name to assign to the created function. + + Returns: + callable: An asynchronous function that can be called to perform the AJAX request. + ''' + async def ajax_caller(request, **kwargs): + ''' + Make an AJAX request to LMS + + Args: + request: The incoming request object. + **kwargs: Additional keyword arguments to pass to the raw AJAX function. + + Returns: + The response from the raw AJAX function. + ''' + return await raw_ajax_function(request, target_url, **kwargs) + setattr(ajax_caller, "__qualname__", name) + + return ajax_caller + + +def api_docs_handler(endpoints): + ''' + Returns a list of available endpoints in a human-readable format. + + Eventually, we should also document available function calls + + Args: + endpoints (list): A list of Endpoint objects to document. + + Returns: + aiohttp.web.Response: A response object containing the documentation of endpoints. + ''' + + response = "URL Endpoints:\n\n" + for endpoint in endpoints: + response += f"{endpoint._local_url()}\n" + cleaners = endpoint._cleaners() + for c in cleaners: + response += f" {cleaners[c]['local_url']}\n" + response += "\n\n Globals:" + return aiohttp.web.Response(text=response) + + +def register_cleaner(data_source, cleaner_name, endpoints): + ''' + Registers a cleaner function, allowing it to be exported both as a web service + and as a local function call. + + Args: + data_source (str): The name of the data source to associate with the cleaner. + cleaner_name (str): The name of the cleaner function to register. + endpoints (list): A list of Endpoint objects to search for the data source. + + Returns: + callable: A decorator for registering the cleaner function. + + Raises: + AttributeError: If the data source is not found in the endpoints. + ''' + def add_cleaner(f): + found = False + for endpoint in endpoints: + if endpoint.name == data_source: + found = True + endpoint._add_cleaner( + cleaner_name, + { + 'function': f, + 'local_url': f'{endpoint._local_url()}/{cleaner_name}', + 'name': cleaner_name + } + ) + + if not found: + raise AttributeError(f"Data source {data_source} invalid; not found in endpoints.") + return f + + return add_cleaner + + +def make_ajax_raw_handler(raw_ajax_function, remote_url): + ''' + Creates an AJAX passthrough handler that calls a raw AJAX function. + + This function handles requests and passes them to the specified raw AJAX function, + returning the response as a JSON response. + + Args: + raw_ajax_function (callable): The raw AJAX function to call. + remote_url (str): The URL to which the AJAX request is sent. + + Returns: + callable: An asynchronous function that handles AJAX requests. + ''' + async def ajax_passthrough(request): + ''' + Handle the AJAX request by calling the raw AJAX function. + + Args: + request: The incoming request object. + + Returns: + aiohttp.web.json_response: A JSON response containing the result of the AJAX function. + ''' + runtime = learning_observer.runtime.Runtime(request) + response = await raw_ajax_function(runtime, remote_url, retry=True, **request.match_info) + return aiohttp.web.json_response(response) + return ajax_passthrough + + +def make_cleaner_handler(raw_function, cleaner_function, name=None): + ''' + Creates a handler for the cleaner function. + + This function will process the input from the raw function, apply the cleaner, + and return the cleaned response. + + Args: + raw_function (callable): The raw function to call. + cleaner_function (callable): The function to clean the response from the raw function. + name (str, optional): The name to assign to the created function. + + Returns: + callable: An asynchronous function that handles requests and cleans the responses. + ''' + async def cleaner_handler(request): + ''' + Handle the request by applying the cleaner function to the raw function's response. + + Args: + request: The incoming request object. + + Returns: + aiohttp.web.json_response: A JSON response containing the cleaned data. + ''' + # Call the raw function with the request and match_info as parameters + response = cleaner_function(await raw_function(request, **request.match_info)) + + # Determine the response type and return appropriately + if isinstance(response, dict) or isinstance(response, list): + return aiohttp.web.json_response(response) # Return JSON response for dict or list + elif isinstance(response, str): + return aiohttp.web.Response(text=response) # Return plain text response if it's a string + else: + raise AttributeError(f"Invalid response type: {type(response)}") # Handle unexpected response types + if name is not None: + setattr(cleaner_handler, "__qualname__", name + "_handler") + + return cleaner_handler + + +def make_cleaner_function(raw_function, cleaner_function, name=None): + """ + Creates a cleaner function that processes the output of a raw function. + + This function wraps a raw function and a cleaner function, allowing the cleaner + to be applied to the response of the raw function. + + Args: + raw_function (callable): The function that makes the raw API call. + cleaner_function (callable): The function that cleans the response. + name (str, optional): The name to assign to the created cleaner function. + + Returns: + callable: An asynchronous cleaner function that calls the raw function + and processes its output with the cleaner function. + """ + async def cleaner_local(request, **kwargs): + """ + Handles the request, calls the raw function, and applies the cleaner function. + + Args: + request: The incoming request object. + **kwargs: Additional keyword arguments for the raw function. + + Returns: + The cleaned response from the cleaner function. + """ + lms_response = await raw_function(request, **kwargs) + clean = cleaner_function(lms_response) + return clean + + if name is not None: + setattr(cleaner_local, "__qualname__", name) + return cleaner_local + + +async def raw_ajax(runtime, target_url, lms_name, base_url=None, **kwargs): + """ + Make an authenticated AJAX call to a specified service (e.g., Google, Canvas), handling + authorization, caching, and retries. + + Parameters: + - runtime: An instance of the Runtime class containing request information. + - lms_name: A string indicating the name of the service ('google' or 'canvas'). + - target_url: The URL endpoint to be called, with optional formatting using kwargs. + - base_url: An optional base URL for the service. If provided, it will be prefixed + to target_url. + - kwargs: Additional keyword arguments to format the target_url or control behavior + (e.g., retry). + + Returns: + - A JSON response from the requested service, with key translation if necessary. + + Raises: + - aiohttp.web.HTTPUnauthorized: If the request lacks necessary authorization. + - aiohttp.ClientResponseError: If the request fails, with special handling for 401 errors on Canvas. + """ + # Retrieve the incoming request and active user + request = runtime.get_request() + user = await learning_observer.auth.get_active_user(request) + + # Extract 'retry' flag from kwargs (defaults to False) + retry = kwargs.pop('retry', False) + + # mapping to determine the appropriate headers based on the service + headers = { + constants.GOOGLE: request.get(constants.AUTH_HEADERS), + constants.CANVAS: request.get(constants.CANVAS_AUTH_HEADERS) + } + + # Ensure Google requests are authenticated + if lms_name == constants.GOOGLE and constants.AUTH_HEADERS not in request: + raise aiohttp.web.HTTPUnauthorized(text="Please log in") + + # Construct the full URL using the base URL if provided, otherwise use the target URL directly + if base_url: + url = base_url + target_url.format(**kwargs) + else: + url = target_url.format(**kwargs) + + # Generate a unique cache key based on the service, user, and request URL + cache_key = f"raw_{lms_name}/" + learning_observer.auth.encode_id('session', user[constants.USER_ID]) + '/' + learning_observer.util.url_pathname(url) + + cache_flag = f"use_{lms_name}_ajax" + # Check cache and return cached response if available + if settings.feature_flag(cache_flag) is not None: + value = await cache[cache_key] + if value is not None: + # Translate keys if the service is Google, otherwise return raw JSON + if lms_name == constants.GOOGLE: + return learning_observer.util.translate_json_keys( + json.loads(value), + learning_observer.google.GOOGLE_TO_SNAKE + ) + else: + return json.loads(value) + + # Make the actual AJAX call to the service + async with aiohttp.ClientSession(loop=request.app.loop) as client: + try: + async with client.get(url, headers=headers[lms_name]) as resp: + response = await resp.json() + + # Log the AJAX request and response + learning_observer.log_event.log_ajax(target_url, response, request) + # Cache the response if the feature flag is enabled + if settings.feature_flag(cache_flag) is not None: + await cache.set(cache_key, json.dumps(response, indent=2)) + # Translate keys if the service is Google, otherwise return raw JSON + if lms_name == constants.GOOGLE: + return learning_observer.util.translate_json_keys( + response, + learning_observer.google.GOOGLE_TO_SNAKE + ) + # Return response for other LMSes + else: + # Raise an exception for non-successful HTTP responses + resp.raise_for_status() + return response + # Handle 401 errors for Canvas with an optional retry + except aiohttp.ClientResponseError as e: + if lms_name == constants.CANVAS and e.status == 401 and retry: + new_tokens = await learning_observer.auth.social_sso._handle_canvas_authorization(request) + if 'access_token' in new_tokens: + return await raw_ajax(runtime, target_url, lms_name, base_url, **kwargs) + raise + + +# Abstract raw_ajax for each LMS to specify their different arguments + +async def raw_google_ajax(runtime, target_url, **kwargs): + """Make an authenticated AJAX call to the Google API.""" + return await raw_ajax(runtime, target_url, constants.GOOGLE, **kwargs) + + +async def raw_canvas_ajax(runtime, target_url, **kwargs): + """Make an authenticated AJAX call to the Canvas API.""" + base_url = settings.pmss_settings.lms_api(types=['lms', 'canvas_oauth']) + # This is used to request the access token again in order to retry the ajax call one more time + kwargs.setdefault('retry', True) + return await raw_ajax(runtime, target_url, constants.CANVAS, base_url, **kwargs) + + +class LMS: + """ + The LMS class represents a Learning Management System, encapsulating + the necessary information and methods for API interactions. + + Attributes: + lms_name (str): The name of the LMS (e.g., 'google', 'canvas'). + endpoints (list): A list of Endpoint objects that represent the API endpoints. + raw_ajax_function (dict): A dictionary mapping LMS names to their respective AJAX functions. + """ + def __init__(self, lms_name, endpoints): + """ + Initializes the LMS instance with the specified name and endpoints. + + Args: + lms_name (str): The name of the LMS. + endpoints (list): A list of Endpoint objects. + """ + self.lms_name = lms_name + self.endpoints = endpoints + self.raw_ajax_function = { + constants.GOOGLE: raw_google_ajax, + constants.CANVAS: raw_canvas_ajax + } + + def initialize_routes(self, app): + """ + Initializes the API routes for the specified LMS within the given web application. + + This method sets up the endpoint routes and associates them with their corresponding + handler functions. + + Args: + app: An instance of the aiohttp web application to which routes will be added. + """ + + # Add the main API documentation route + app.add_routes([ + aiohttp.web.get(f"/{self.lms_name}", lambda _: api_docs_handler(self.endpoints)) + ]) + + # Iterate through the endpoints to set up routes for each one + for e in self.endpoints: + function_name = f"raw_{e.name}" # Construct the function name for the raw AJAX function + raw_function = raw_access_partial( + raw_ajax_function=self.raw_ajax_function[self.lms_name], # Get the appropriate raw AJAX function + target_url=e.remote_url, # Use the endpoint's remote URL + name=e.name # Set the name for the function + ) + globals()[function_name] = raw_function # Register the raw function globally + + # Add routes for each cleaner associated with the endpoint + cleaners = e._cleaners() + for c in cleaners: + app.add_routes([ + aiohttp.web.get( + cleaners[c]['local_url'], # The local URL for the cleaner + make_cleaner_handler(raw_function, cleaners[c]['function'], name=cleaners[c]['name']) # Handler for the cleaner + ) + ]) + lms_module = getattr(learning_observer, self.lms_name) # Get the module for the LMS + + # Create the cleaner function and set it in the LMS module + cleaner_function = make_cleaner_function( + raw_function, + cleaners[c]['function'], + name=cleaners[c]['name'] + ) + setattr(lms_module, cleaners[c]['name'], cleaner_function) + + # Add the main route for the endpoint + app.add_routes([ + aiohttp.web.get(e._local_url(), make_ajax_raw_handler( + self.raw_ajax_function[self.lms_name], # The raw AJAX function for the LMS + e.remote_url # The endpoint's remote URL + )) + ]) diff --git a/learning_observer/learning_observer/rosters.py b/learning_observer/learning_observer/rosters.py index 0f233f92a..92face58b 100644 --- a/learning_observer/learning_observer/rosters.py +++ b/learning_observer/learning_observer/rosters.py @@ -10,6 +10,7 @@ We can either retrieve class rosters from: - Google Classroom (config setting: 'google') +- Canvas (config setting: 'canvas') - Text files on the disk for testing. (config setting: 'test') We have two files: - courses.json @@ -27,14 +28,14 @@ As well as the option for several sources in the same system, perhaps. This file could be cleaned up a lot. Right now, we do a lot of this by -mock calls to Google AJAX. It also contains a large number of hacks which +mock calls to Google or Canvas AJAX. It also contains a large number of hacks which we use to manage the data and to address variations in the roster sources -whether we are taking them from google or from our own backup data. +whether we are taking them from google or canvas or from our own backup data. As of now this partially implements a separation between the internal ID which shows up in our rosters as id or `user_id` and the id used for the external sources of data. We store external ids on student data under -external_ids and keep space for ids from google etc. However as of now +external_ids and keep space for ids from google, canvas etc. However as of now we do not make use of it. Ultimately it would be ideal to move so that remote data retreival and raw document storage are done under an internal id with this translation taking place at event storage time *or* that the @@ -44,7 +45,7 @@ the potential to create some extra, though probably manageable, queries. In either case we get around it now by also adding in a cheap hack that -makes the internal ID for google-sourced users match the google ID. This +makes the internal ID for google/canvas-sourced users match the google ID. This will need to change in a stable way for future use. Note that these APIs and file locations aren't finished. In the future, @@ -73,6 +74,7 @@ import learning_observer.cache import learning_observer.constants as constants import learning_observer.google +import learning_observer.canvas import learning_observer.kvs import learning_observer.log_event as log_event from learning_observer.log_event import debug_log @@ -86,22 +88,23 @@ COURSE_URL = 'https://classroom.googleapis.com/v1/courses' ROSTER_URL = 'https://classroom.googleapis.com/v1/courses/{courseid}/students' -pmss.parser('roster_source', parent='string', choices=['google_api', 'all', 'test', 'filesystem'], transform=None) +pmss.parser('roster_source', parent='string', choices=['google_api', 'all', 'test', 'canvas', 'filesystem'], transform=None) pmss.register_field( name='source', type='roster_source', - description='Source to use for student class rosters. This can be\n'\ - '`all`: aggregate all available students into a single class\n'\ - '`test`: use sample course and student files\n'\ - '`filesystem`: read rosters defined on filesystem\n'\ - '`google_api`: fetch from Google API', + description='Source to use for student class rosters. This can be\n' + '`all`: aggregate all available students into a single class\n' + '`test`: use sample course and student files\n' + '`filesystem`: read rosters defined on filesystem\n' + '`google_api`: fetch from Google API\n' + '`canvas`: fetch from Canvas API', required=True ) -def clean_google_ajax_data(resp_json, key, sort_key, default=None, source=None): +def clean_combined_ajax_data(resp_json, key, sort_key, default=None, source=None): ''' - This cleans up / standardizes Google AJAX data. In particular: + This cleans up / standardizes Google/Canvas AJAX data. In particular: - We want to handle errors and empty lists better - We often don't want the whole response, but just one field (`key`) @@ -176,7 +179,7 @@ def adjust_external_gc_ids(resp_json): student_json[constants.USER_ID] = google_id # For the present there is only one external id so we will add that directly. - ext_ids = [{"source": "google", "id": google_id}] + ext_ids = [{"source": constants.GOOGLE, "id": google_id}] student_profile['external_ids'] = ext_ids @@ -256,7 +259,7 @@ async def synthetic_ajax( request, url, parameters=None, key=None, sort_key=None, default=None): ''' - Stub similar to google_ajax, but grabbing data from local files. + Stub similar to combined_ajax, but grabbing data from local files. This is helpful for testing, but it's even more helpful since Google is an amazingly unreliable B2B company, and this lets us @@ -301,11 +304,11 @@ async def synthetic_ajax( return data -async def google_ajax( +async def combined_ajax( request, url, parameters=None, key=None, sort_key=None, default=None): ''' - Request information through Google's API + Request information through the specified API Most requests return a dictionary with one key. If we just want that element, set `key` to be the element of the dictionary we want @@ -329,7 +332,7 @@ async def google_ajax( async with client.get(url.format(**parameters), headers=request[constants.AUTH_HEADERS]) as resp: resp_json = await resp.json() log_event.log_ajax(url, resp_json, request) - return clean_google_ajax_data( + return clean_combined_ajax_data( resp_json, key, sort_key, default=default ) @@ -359,8 +362,8 @@ def init(): ) elif roster_source in ['test', 'filesystem']: ajax = synthetic_ajax - elif roster_source in ["google_api"]: - ajax = google_ajax + elif roster_source in ['google_api', constants.CANVAS]: + ajax = combined_ajax elif roster_source in ["all"]: ajax = all_ajax else: @@ -369,6 +372,7 @@ def init(): "set to either:\n" " test (retrieve from files courses.json and students.json)\n" " google_api (retrieve roster data from Google)\n" + " canvas (retrieve roster data from Canvas)\n" " filesystem (retrieve roster data from file system hierarchy\n" " all (retrieve roster data as all students)" ) @@ -409,10 +413,18 @@ async def courselist(request): ''' List all of the courses a teacher manages: Helper ''' - # New code - if settings.pmss_settings.source(types=['roster_data']) in ["google_api"]: - runtime = learning_observer.runtime.Runtime(request) - return await learning_observer.google.courses(runtime) + + # A map of LMSes to their respective handler functions + lms_map = { + constants.GOOGLE: learning_observer.google.courses, + constants.CANVAS: learning_observer.canvas.courses + } + + runtime = learning_observer.runtime.Runtime(request) + + roster_source = settings.pmss_settings.source(types=['roster_data']) + if roster_source in lms_map: + return await lms_map[roster_source](runtime) # Legacy code course_list = await ajax( @@ -454,9 +466,18 @@ async def courseroster(request, course_id): ''' List all of the students in a course: Helper ''' - if settings.pmss_settings.source(types=['roster_data']) in ["google_api"]: - runtime = learning_observer.runtime.Runtime(request) - return await learning_observer.google.roster(runtime, courseId=course_id) + + # A map of LMSes to their respective handler functions + lms_map = { + constants.GOOGLE: learning_observer.google.roster, + constants.CANVAS: learning_observer.canvas.roster + } + + runtime = learning_observer.runtime.Runtime(request) + + roster_source = settings.pmss_settings.source(types=['roster_data']) + if roster_source in lms_map: + return await lms_map[roster_source](runtime, courseId=course_id) roster = await ajax( request, diff --git a/learning_observer/learning_observer/routes.py b/learning_observer/learning_observer/routes.py index b7adf3812..b3c193141 100644 --- a/learning_observer/learning_observer/routes.py +++ b/learning_observer/learning_observer/routes.py @@ -23,11 +23,13 @@ import learning_observer.incoming_student_event as incoming_student_event import learning_observer.dashboard import learning_observer.google +import learning_observer.canvas import learning_observer.rosters as rosters import learning_observer.module_loader import learning_observer.paths as paths import learning_observer.settings as settings +import learning_observer.constants as constants from learning_observer.log_event import debug_log, startup_state @@ -66,7 +68,7 @@ def tracemalloc_handler(request): register_static_routes(app) register_incoming_event_views(app) register_debug_routes(app) - learning_observer.google.initialize_and_register_routes(app) + register_lms_routes(app) app.add_routes([ aiohttp.web.get( @@ -165,6 +167,17 @@ def tracemalloc_handler(request): register_wsgi_routes(app) +def register_lms_routes(app): + """ + Register routes for the various Learning Management Systems (LMS). + + Parameters: + - app: An instance of aiohttp.web.Application where the routes will be registered. + """ + learning_observer.google.initialize_google_routes(app) + learning_observer.canvas.initialize_canvas_routes(app) + + def register_debug_routes(app): ''' Handy-dandy information views, useful for debugging and development. diff --git a/learning_observer/learning_observer/settings.py b/learning_observer/learning_observer/settings.py index f9140b3b5..8582507a0 100644 --- a/learning_observer/learning_observer/settings.py +++ b/learning_observer/learning_observer/settings.py @@ -39,6 +39,7 @@ args = None parser = None + def str_to_bool(arg): if isinstance(arg, bool): return arg @@ -123,9 +124,9 @@ def parse_and_validate_arguments(): pmss.register_field( name='run_mode', type='run_mode', - description="Set which mode the server is running in.\n"\ - "`dev` for local development with full debugging\n"\ - "`deploy` for running on a server with better performance\n"\ + description="Set which mode the server is running in.\n" + "`dev` for local development with full debugging\n" + "`deploy` for running on a server with better performance\n" "`interactive` for processing data offline", required=True ) @@ -223,7 +224,7 @@ def initialized(): # Not all of these are guaranteed to work on every branch of the codebase. -AVAILABLE_FEATURE_FLAGS = ['uvloop', 'watchdog', 'auth_headers_page', 'merkle', 'save_google_ajax', 'use_google_ajax'] +AVAILABLE_FEATURE_FLAGS = ['uvloop', 'watchdog', 'auth_headers_page', 'merkle', 'save_google_ajax', 'use_google_ajax', 'use_canvas_ajax'] def feature_flag(flag):