diff --git a/VERSION b/VERSION index bc02b868..c959dfbb 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.6.11 +2.6.12 diff --git a/docker/docker-compose.deployment.yml b/docker/docker-compose.deployment.yml index 32c52058..83992daf 100644 --- a/docker/docker-compose.deployment.yml +++ b/docker/docker-compose.deployment.yml @@ -10,5 +10,6 @@ services: limits: cpus: '8' # 50% memory: 19.2G # 60% - - + volumes: + - # Mount conf.d on host machine to the nginx conf.d on container + - "./entity-api/nginx/conf.d:/etc/nginx/conf.d" diff --git a/docker/docker-compose.development.yml b/docker/docker-compose.development.yml index c89b1284..57e6a8f1 100644 --- a/docker/docker-compose.development.yml +++ b/docker/docker-compose.development.yml @@ -3,7 +3,7 @@ services: entity-api: build: context: ./entity-api - # Uncomment if tesitng against a specific branch of commons other than the PyPI package + # Uncomment if testing against a specific branch of commons other than the PyPI package # Will also need to use the 'git+https://github.com/hubmapconsortium/commons.git@${COMMONS_BRANCH}#egg=hubmap-commons' # in src/requirements.txt accordingly args: @@ -25,3 +25,5 @@ services: - "../BUILD:/usr/src/app/BUILD" # Mount the source code to container - "../src:/usr/src/app/src" + - # Mount conf.d on host machine to the nginx conf.d on container + - "./entity-api/nginx/conf.d:/etc/nginx/conf.d" diff --git a/docker/docker-compose.localhost.yml b/docker/docker-compose.localhost.yml new file mode 100644 index 00000000..80e23a51 --- /dev/null +++ b/docker/docker-compose.localhost.yml @@ -0,0 +1,33 @@ +services: + + entity-api: + build: + context: ./entity-api + args: + - COMMONS_BRANCH=${COMMONS_BRANCH:-main} + image: hubmap/entity-api:${ENTITY_API_VERSION:?err} + environment: + - DEPLOY_MODE=localhost + volumes: + # Mount VERSION and BUILD files + - "../VERSION:/usr/src/app/VERSION" + - "../BUILD:/usr/src/app/BUILD" + ## Mount source code for live development + #- "../src:/usr/src/app/src" + # Mount localhost-specific nginx config + - "${PWD}/entity-api/nginx/conf.d-localhost:/etc/nginx/conf.d" + healthcheck: + # Replaces base healthcheck - Check port 8080 inside container (nginx listening port) + test: [ "CMD", "curl", "--fail", "http://localhost:8080/status" ] + interval: 12h # Set long on localhost to avoid logging that interferes with development + timeout: 5s + retries: 5 + start_period: 30s + logging: + # Override CloudWatch logging - use local json-file driver for localhost + driver: json-file + options: + max-size: "10m" + max-file: "3" # Keep 3 files, rotating oldest out + networks: + - gateway_hubmap # Same network as hubmap-auth for communication diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 8b36e89d..df022337 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -27,8 +27,6 @@ services: - "../log:/usr/src/app/log" # Mount the schema yaml file - "../src/schema/provenance_schema.yaml:/usr/src/app/src/schema/provenance_schema.yaml" - # Mount conf.d on host machine to the nginx conf.d on container - - "./entity-api/nginx/conf.d:/etc/nginx/conf.d" networks: - gateway_hubmap # Send docker logs to AWS CloudWatch @@ -40,6 +38,6 @@ services: awslogs-stream: ${LOG_STREAM} networks: - # This is the network created by gateway to enable communicaton between multiple docker-compose projects + # This is the network created by gateway to enable communication between multiple docker-compose projects gateway_hubmap: external: true diff --git a/docker/docker-localhost.sh b/docker/docker-localhost.sh new file mode 100755 index 00000000..a734c166 --- /dev/null +++ b/docker/docker-localhost.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +# Print a new line and the banner +echo +echo "==================== Entity-API ====================" + +function tier_check() { + # Get the script name and extract DEPLOY_TIER + SCRIPT_NAME=$(basename "${0}") + + # Extract deploy tier from script name (docker-*.sh pattern) + if [[ ${SCRIPT_NAME} =~ docker-(.*)\.sh ]]; then + DEPLOY_TIER="${BASH_REMATCH[1]}" + else + echo "Error: Script name doesn't match pattern 'docker-*.sh'" + exit 1 + fi + echo "Executing ${SCRIPT_NAME} to deploy in Docker on ${DEPLOY_TIER}" +} + +# Chances are localhost development is not being done on an RHEL server with +# the environment variables set. Unset HOST_UID and HOST_GID to ensure +# docker-compose defaults (1001:1001) are used. +function export_host_ids() { + if [ -n "${HOST_UID}" ] || [ -n "${HOST_GID}" ]; then + echo "WARNING: HOST_UID and HOST_GID are set in your environment but will be ignored for localhost." + echo " Localhost development uses docker-compose.yml defaults." + fi + # Unset to ensure docker-compose defaults are used + unset HOST_UID + unset HOST_GID +} + +# The `absent_or_newer` checks if the copied src at docker/some-api/src directory exists +# and if the source src directory is newer. +# If both conditions are true `absent_or_newer` writes an error message +# and causes script to exit with an error code. +function absent_or_newer() { + if [ \( -e ${1} \) -a \( ${2} -nt ${1} \) ]; then + echo "${1} is out of date" + exit -1 + fi +} + +function get_dir_of_this_script() { + # This function sets DIR to the directory in which this script itself is found. + # Thank you https://stackoverflow.com/questions/59895/how-to-get-the-source-directory-of-a-bash-script-from-within-the-script-itself + SCRIPT_SOURCE="${BASH_SOURCE[0]}" + while [ -h "${SCRIPT_SOURCE}" ]; do # resolve $SCRIPT_SOURCE until the file is no longer a symlink + DIR="$( cd -P "$( dirname "${SCRIPT_SOURCE}" )" >/dev/null 2>&1 && pwd )" + SCRIPT_SOURCE="$(readlink "${SCRIPT_SOURCE}")" + [[ ${SCRIPT_SOURCE} != /* ]] && SCRIPT_SOURCE="${DIR}/${SCRIPT_SOURCE}" # if $SCRIPT_SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located + done + DIR="$( cd -P "$( dirname "${SCRIPT_SOURCE}" )" >/dev/null 2>&1 && pwd )" + echo "DIR of script: ${DIR}" +} + +# Generate the build version based on git branch name and short commit hash and write into BUILD file +function generate_build_version() { + GIT_BRANCH_NAME=$(git branch | sed -n -e 's/^\* \(.*\)/\1/p') + GIT_SHORT_COMMIT_HASH=$(git rev-parse --short HEAD) + # Clear the old BUILD version and write the new one + truncate -s 0 ../BUILD + # Note: echo to file appends newline + echo "${GIT_BRANCH_NAME}:${GIT_SHORT_COMMIT_HASH}" >> ../BUILD + # Remove the trailing newline character + truncate -s -1 ../BUILD + echo "BUILD(git branch name:short commit hash): ${GIT_BRANCH_NAME}:${GIT_SHORT_COMMIT_HASH}" +} + +# Set the version environment variable for the docker build +# Version number is from the VERSION file +# Also remove newlines and leading/trailing slashes if present in that VERSION file +function export_version() { + export ENTITY_API_VERSION=$(tr -d "\n\r" < ../VERSION | xargs) + echo "ENTITY_API_VERSION: ${ENTITY_API_VERSION}" +} + +if [[ "${1}" != "check" && "${1}" != "config" && "${1}" != "build" && "${1}" != "start" && "${1}" != "stop" && "${1}" != "down" ]]; then + echo "Unknown command '${1}', specify one of the following: check|config|build|start|stop|down" +else + # Echo this script name and the tier expected for Docker deployment + tier_check + + # Always show the script dir + get_dir_of_this_script + + # Always export and show the version + export_version + + # Unset HOST_UID/HOST_GID for localhost to use defaults + export_host_ids + + # Always show the build in case branch changed or new commits + generate_build_version + + # Print empty line + echo + + if [ "${1}" = "check" ]; then + # Bash array + config_paths=( + '../src/instance/app.cfg' + ) + + for pth in "${config_paths[@]}"; do + if [ ! -e ${pth} ]; then + echo "Missing file (relative path to DIR of script): ${pth}" + exit -1 + fi + done + + absent_or_newer entity-api/src ../src + + echo 'Checks complete, all good :)' + elif [ "${1}" = "config" ]; then + docker compose -f docker-compose.yml -f docker-compose.${DEPLOY_TIER}.yml -p entity-api config + elif [ "${1}" = "build" ]; then + # Delete the copied source code dir if exists + if [ -d "entity-api/src" ]; then + rm -rf entity-api/src + fi + + # Copy over the src folder + cp -r ../src entity-api/ + + # Delete old VERSION and BUILD files if found + if [ -f "entity-api/VERSION" ]; then + rm -rf entity-api/VERSION + fi + + if [ -f "entity-api/BUILD" ]; then + rm -rf entity-api/BUILD + fi + + # Copy over the VERSION and BUILD files + cp ../VERSION entity-api + cp ../BUILD entity-api + + docker compose -f docker-compose.yml -f docker-compose.${DEPLOY_TIER}.yml -p entity-api build --no-cache + elif [ "${1}" = "start" ]; then + docker compose -f docker-compose.yml -f docker-compose.${DEPLOY_TIER}.yml -p entity-api up -d + elif [ "${1}" = "stop" ]; then + docker compose -f docker-compose.yml -f docker-compose.${DEPLOY_TIER}.yml -p entity-api stop + elif [ "${1}" = "down" ]; then + docker compose -f docker-compose.yml -f docker-compose.${DEPLOY_TIER}.yml -p entity-api down + fi +fi \ No newline at end of file diff --git a/docker/entity-api/nginx/conf.d-localhost/entity-api.conf b/docker/entity-api/nginx/conf.d-localhost/entity-api.conf new file mode 100644 index 00000000..408fcdba --- /dev/null +++ b/docker/entity-api/nginx/conf.d-localhost/entity-api.conf @@ -0,0 +1,82 @@ +server { + # Only root can listen on ports below 1024, we use higher-numbered ports + # since nginx is running under non-root user hubmap + listen 8080; + server_name localhost; + root /usr/share/nginx/html; + + # Docker's internal DNS resolver + resolver 127.0.0.11 valid=10s; + resolver_timeout 5s; + + # We need this logging for inspecting auth requests from other internal services + # Logging to the mounted volume for outside container access + access_log /usr/src/app/log/nginx_access_entity-api.log; + error_log /usr/src/app/log/nginx_error_entity-api.log warn; + + # Set payload size limit to 10M, default is 1M. + client_max_body_size 10M; + + # Pass requests to the uWSGI server using the "uwsgi" protocol on port 5000 + location / { + # Always enable CORS + # Response to preflight requests + if ($request_method = 'OPTIONS') { + add_header 'Access-Control-Allow-Origin' '*' always; + add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, OPTIONS' always; + # These are permitted headers to be used with the actual request + add_header 'Access-Control-Allow-Headers' 'Authorization, Cache-Control, Content-Type, X-Hubmap-Application' always; + # Cache the response to this preflight request in browser for the max age 86400 seconds (= 24 hours) + add_header 'Access-Control-Max-Age' 86400 always; + # No Content + return 204; + } + + # Response to the original requests (HTTP methods are case-sensitive) with CORS enabled + if ($request_method ~ (POST|GET|PUT)) { + add_header 'Access-Control-Allow-Origin' '*' always; + add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, OPTIONS' always; + add_header 'Access-Control-Allow-Headers' 'Authorization, Cache-Control, Content-Type, X-Hubmap-Application' always; + } + + # Capture original request details BEFORE auth_request subrequest + set $original_uri $request_uri; + set $original_method $request_method; + + # Call hubmap-auth for authorization before passing to entity-api + auth_request /api_auth; + + # Pass authorization headers from hubmap-auth response to the Flask app + auth_request_set $auth_user $upstream_http_x_hubmap_user; + auth_request_set $auth_groups $upstream_http_x_hubmap_groups; + uwsgi_param X-Hubmap-User $auth_user; + uwsgi_param X-Hubmap-Groups $auth_groups; + + include uwsgi_params; + uwsgi_pass uwsgi://localhost:5000; + } + + # Internal location for auth requests - calls hubmap-auth container + location = /api_auth { + internal; + # Use variable to enable runtime DNS resolution via Docker's internal DNS. + # This allows hubmap-auth to restart without requiring entity-api nginx reload. + set $hubmap_auth_backend "hubmap-auth:7777"; + # Call hubmap-auth via Docker network using container hostname and port 7777 + proxy_pass http://$hubmap_auth_backend/api_auth; + proxy_pass_request_body off; + proxy_set_header Content-Length ""; + # + # These headers match what hubmap-auth app.py expects. + # + # We need to hard-code the Host to the Docker service name on the + # Docker network gateway_hubmap, rather than $http_host, so that + # a value like localhost:3333 is not passed. The value must be a + # JSON Object key in the gateway repository's api_endpoints.localhost.json. + proxy_set_header Host "entity-api"; + #proxy_set_header X-Original-URI $request_uri; + #proxy_set_header X-Original-Request-Method $request_method; + proxy_set_header X-Original-URI $original_uri; + proxy_set_header X-Original-Request-Method $original_method; + } +} \ No newline at end of file diff --git a/docker/entity-api/nginx/nginx.conf b/docker/entity-api/nginx/nginx.conf index 85bb1e4f..ff4445c0 100644 --- a/docker/entity-api/nginx/nginx.conf +++ b/docker/entity-api/nginx/nginx.conf @@ -18,6 +18,7 @@ http { include /etc/nginx/mime.types; default_type application/octet-stream; + # Extend nginx's default Combined Log Format with $http_x_forwarded_for log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; diff --git a/src/app.py b/src/app.py index ab832ddc..b9e7616b 100644 --- a/src/app.py +++ b/src/app.py @@ -38,6 +38,7 @@ from schema.schema_constants import TriggerTypeEnum from metadata_constraints import get_constraints, constraints_json_is_valid # from lib.ontology import initialize_ubkg, init_ontology, Ontology, UbkgSDK +from setup_lifecycle_hooks import setup_flask_lifecycle_hooks # HuBMAP commons from hubmap_commons import string_helper @@ -64,6 +65,9 @@ # will be inherited by the sub-module loggers logger = logging.getLogger() +# Add in Flask lifecycle hooks which rely on the logger being instantiated +setup_flask_lifecycle_hooks(app) + # Remove trailing slash / from URL base to avoid "//" caused by config with trailing slash app.config['UUID_API_URL'] = app.config['UUID_API_URL'].strip('/') app.config['INGEST_API_URL'] = app.config['INGEST_API_URL'].strip('/') @@ -390,7 +394,17 @@ def get_status(): @app.route('/usergroups', methods = ['GET']) def get_user_groups(): token = get_user_token(request) - groups_list = auth_helper_instance.get_user_groups_deprecated(token) + try: + groups_list = auth_helper_instance.get_user_groups_deprecated(token) + except HTTPException as he: + # Retrieve the appropriate error handling function based upon the + # code, or the HTTP 500 handling function if nothing is mapped. + handler = error_handlers.get(he.status_code + , error_handlers[500]) + # Execute the error handler, using the description from the HTTPException + handler(he.description) + except Exception as e: + internal_server_error(e) return jsonify(groups_list) @@ -4409,6 +4423,15 @@ def not_found_error(err_msg): def internal_server_error(err_msg): abort(500, description = err_msg) +# Create a dict which allows preceding error handling functions to be invoked with +# less if-then-else coding. +error_handlers = { + 400: bad_request_error, + 401: unauthorized_error, + 403: forbidden_error, + 404: not_found_error, + 500: internal_server_error +} """ Parse the token from Authorization header diff --git a/src/setup_lifecycle_hooks.py b/src/setup_lifecycle_hooks.py new file mode 100644 index 00000000..794fdd8b --- /dev/null +++ b/src/setup_lifecycle_hooks.py @@ -0,0 +1,141 @@ +""" +Flask lifecycle hooks for API request/response logging. Uses the existing global logger configured in app.py. + +Provides before_request and after_request hooks that log API usage in using +Common Log Format, as previously used for API Gateway custom access log format on AWS. +https://en.wikipedia.org/wiki/Common_Log_Format#Combined_Log_Format + +Log format: + $sourceIp $caller $user [$requestTime] "$method $resourcePath $protocol" $status $responseLength $requestId +replacement for AWS API Gateway custom access log format: + $context.identity.sourceIp $context.identity.caller $context.identity.user [$context.requestTime] + "$context.httpMethod $context.resourcePath $context.protocol" + $context.status $context.responseLength $context.requestId + +Example log output: + [2026-03-18 18:52:25] API_USAGE in setup_lifecycle_hooks: Request started: DELETE /flush-cache/12345678901234567890123456789012 from 172.19.0.1 [ID: req-1773859945850-1262] + [2026-03-18 18:52:25] API_USAGE in setup_lifecycle_hooks: 172.19.0.1 - - [18/Mar/2026:18:52:25 +0000] "DELETE /flush-cache/12345678901234567890123456789012 HTTP/1.1" 200 69 req-1773859945850-1262 +""" + +import logging +import time +from flask import request, g +from datetime import datetime, timezone + +# Use the same logger configuration as app.py +logger = logging.getLogger(__name__) + +# For the hooks used to log endpoint usage, set the level to use while +# logging these events, and to be used to return quickly when the +# logger is not enabled for that level. +ENDPOINT_LOG_LEVEL=logging.INFO-1 +logging.addLevelName(ENDPOINT_LOG_LEVEL, "API_USAGE") + +def setup_flask_lifecycle_hooks(app): + """ + Register Flask lifecycle hooks for request/response logging. + + Sets up before_request and after_request handlers that log all API calls + using the existing logger configured in app.py. + + Args: + app: Flask application instance + + Usage: + from setup_lifecycle_hooks import setup_flask_lifecycle_hooks + + app = Flask(__name__) + # ... existing logger configuration ... + setup_flask_lifecycle_hooks(app) + """ + + @app.before_request + def log_endpoint_request(): + """ + Log basic request information at ENDPOINT_LOG_LEVEL level when request starts. + + Runs BEFORE any route function executes. + Captures request start time and generates unique request ID. + """ + # Bail out on this hook method immediately if the logger statement at + # the end of the method would not be logged. + if not logger.isEnabledFor(ENDPOINT_LOG_LEVEL): + return + + # Store request start time for potential duration calculation + g.request_start_time = time.time() + + # Generate unique request ID for tracking this request + g.request_id = f"req-{int(time.time() * 1000)}-{hash(request.remote_addr) % 10000}" + + logger.log(level=ENDPOINT_LOG_LEVEL + , msg= f"Request started: {request.method} {request.path} " + f"from {request.remote_addr} [ID: {g.request_id}]") + + @app.after_request + def log_endpoint_response(response): + """ + Log complete API usage in AWS API Gateway format at INFO level. + + Runs AFTER route function executes (or after error handler if route failed). + Has access to both request and response data. + + Format matches AWS API Gateway custom access logs: + $sourceIp $caller $user [$requestTime] "$method $resourcePath $protocol" $status $responseLength $requestId + + Args: + response: Flask response object + + Returns: + response: Must return the response unchanged + """ + # Bail out on this hook method immediately if the logger statement at + # the end of the method would not be logged. + if not logger.isEnabledFor(ENDPOINT_LOG_LEVEL): + return response + + # Extract request details + source_ip = request.remote_addr or '-' + + # Caller - not available without AWS IAM, use '-' + caller = '-' + + # User from X-Hubmap-User header (set by hubmap-auth after authorization) + # Falls back to '-' if not authenticated + user = request.headers.get('X-Hubmap-User', '-') + + # Request time in AWS/Apache format: [DD/MMM/YYYY:HH:MM:SS +0000] + request_time = datetime.now(timezone.utc).strftime('%d/%b/%Y:%H:%M:%S +0000') + + # HTTP method, path, and protocol + method = request.method + resource_path = request.path + protocol = request.environ.get('SERVER_PROTOCOL', 'HTTP/1.1') + + # Response status code + status = response.status_code + + # Response length (content length in bytes) + response_length = '-' + if response.content_length: + response_length = response.content_length + elif hasattr(response, 'data'): + response_length = len(response.data) + + # Request ID (generated in before_request, or '-' if not available) + request_id = getattr(g, 'request_id', '-') + + # Format log message matching AWS API Gateway custom access log format: + # $sourceIp $caller $user [$requestTime] "$method $resourcePath $protocol" $status $responseLength $requestId + log_message = ( + f'{source_ip} {caller} {user} ' + f'[{request_time}] ' + f'"{method} {resource_path} {protocol}" ' + f'{status} {response_length} {request_id}' + ) + + logger.log(level=ENDPOINT_LOG_LEVEL + , msg=log_message) + + # Must return response unchanged for Flask + return response diff --git a/test/README.md b/test/README.md new file mode 100644 index 00000000..181205fa --- /dev/null +++ b/test/README.md @@ -0,0 +1,177 @@ +# Entity-API Test Suite + +This directory contains all tests for the entity-api service, organized by test type and deployment environment. + +## Directory Structure + +``` +test/ +├── README.md # This file - test suite overview +├── localhost/ # Tests for localhost Docker deployment +│ ├── integration/ # Integration tests with hubmap-auth +│ └── performance/ # Performance benchmarks (future) +└── [existing test files] # Other test types +``` + +## Test Categories + +### Localhost Tests (`localhost/`) + +Tests for entity-api running in Docker Desktop for local development and proof-of-concept deployments. + +**When to run:** Before pushing changes that affect localhost deployment, Docker configuration, or hubmap-auth integration. + +**See:** [localhost/README.md](localhost/README.md) + +### Integration Tests (`localhost/integration/`) + +End-to-end tests verifying entity-api integrates correctly with hubmap-auth for authorization over the `gateway_hubmap` Docker network. + +**See:** [localhost/integration/README.md](localhost/integration/README.md) + +### Performance Tests (`localhost/performance/`) - Future + +Load testing and performance benchmarks for localhost deployment. + +## Quick Start + +### Run All Tests + +```bash +# Activate virtual environment +source .venv/bin/activate + +# Run all tests +python -m unittest discover -s test -v +``` + +### Run Localhost Integration Tests Only + +```bash +source .venv/bin/activate +python -m unittest discover -s test/localhost/integration -v +``` + +### Prerequisites + +1. **Docker containers running:** + ```bash + # Start hubmap-auth first + cd gateway + ./docker-localhost.sh start + + # Then start entity-api + cd entity-api/docker + ./docker-localhost.sh start + + # Verify both are healthy + docker ps | grep -E "hubmap-auth|entity-api" + ``` + +2. **Python virtual environment:** + + Tests use the same dependencies as the main application: + + ```bash + # Create virtual environment (first time only) + python3 -m venv .venv + + # Activate virtual environment + source .venv/bin/activate + + # Install application dependencies (includes requests) + pip install -r src/requirements.txt + ``` + +## CI/CD Integration + +These tests are designed to run in GitHub Actions or similar CI/CD systems. Example workflow: + +```yaml +name: Entity-API Localhost Integration Tests + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Checkout gateway repo + uses: actions/checkout@v3 + with: + repository: hubmapconsortium/gateway + path: gateway + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.13' + + - name: Create Docker network + run: docker network create gateway_hubmap + + - name: Start hubmap-auth + run: | + cd gateway + ./docker-localhost.sh build + ./docker-localhost.sh start + + - name: Wait for hubmap-auth healthy + run: timeout 60 bash -c 'until docker ps | grep hubmap-auth | grep healthy; do sleep 2; done' + + - name: Start entity-api + run: | + cd docker + ./docker-localhost.sh build + ./docker-localhost.sh start + + - name: Wait for entity-api healthy + run: timeout 60 bash -c 'until docker ps | grep entity-api | grep healthy; do sleep 2; done' + + - name: Install test dependencies + run: | + python -m venv .venv + source .venv/bin/activate + pip install -r src/requirements.txt + + - name: Run integration tests + run: | + source .venv/bin/activate + python -m unittest discover -s test/localhost/integration -v +``` + +## Contributing + +When adding new tests: + +1. **Choose the right directory** - Place tests in the appropriate subdirectory based on type +2. **Follow existing patterns** - Match the style and structure of existing tests +3. **Add documentation** - Update relevant README files +4. **Keep tests independent** - Each test should run in isolation +5. **Use descriptive names** - Test names should clearly indicate what they verify +6. **Handle errors gracefully** - Provide actionable error messages + +## Test Execution Order + +Tests are discovered and run alphabetically by default. If execution order matters: + +1. Use `setUpClass` and `tearDownClass` for class-level setup +2. Use `setUp` and `tearDown` for test-level setup +3. Name test files to control discovery order if needed + +## Getting Help + +- **Test failures:** Check container logs with `docker logs entity-api` +- **Connection errors:** Verify containers are running with `docker ps` +- **Import errors:** Ensure virtual environment is activated +- **Docker issues:** Check Docker Desktop is running +- **Auth failures:** Verify hubmap-auth is running and healthy + +## Related Documentation + +- [Entity-API Deployment Guide](../README.md) +- [Gateway API Endpoints Configuration](../../gateway/api_endpoints.localhost.json) +- [Docker Compose Configuration](../docker/docker-compose.localhost.yml) +- [Gateway Test Suite](../../gateway/test/README.md) diff --git a/test/localhost/README.md b/test/localhost/README.md new file mode 100644 index 00000000..d9aafac6 --- /dev/null +++ b/test/localhost/README.md @@ -0,0 +1,202 @@ +# Localhost Testing for Entity-API + +This directory contains tests for entity-api running in Docker Desktop on localhost. These tests verify the service works correctly in a local development/proof-of-concept environment and properly integrates with hubmap-auth for authorization. + +## Purpose + +Localhost tests serve multiple purposes: + +1. **Pre-deployment verification** - Validate configuration changes before pushing to DEV +2. **Authorization integration** - Verify entity-api correctly uses hubmap-auth +3. **Proof-of-concept** - Demonstrate entity-api deployment without AWS infrastructure +4. **Regression testing** - Ensure changes don't break existing functionality + +## Test Types + +### Integration Tests (`integration/`) + +End-to-end tests that verify entity-api integrates correctly with hubmap-auth over Docker networking. + +**What they test:** +- Container startup and health +- nginx auth_request integration with hubmap-auth +- Public endpoints accessible without auth +- Protected endpoints require proper authorization +- Flask application responses +- Docker network connectivity + +**See:** [integration/README.md](integration/README.md) + +### Performance Tests (`performance/`) - Future + +Benchmarks and load tests for localhost deployment. + +**What they will test:** +- Response time under load +- Concurrent request handling +- Database query performance +- Memory usage patterns + +## Prerequisites + +### 1. Docker Setup + +Create the shared Docker network (one-time setup): +```bash +docker network create gateway_hubmap +``` + +### 2. Build and Start Containers + +```bash +# Start hubmap-auth first (entity-api depends on it) +cd gateway +./docker-localhost.sh build +./docker-localhost.sh start + +# Wait for healthy status +docker ps | grep hubmap-auth # Should show "healthy" + +# Then start entity-api +cd entity-api/docker +./docker-localhost.sh build +./docker-localhost.sh start + +# Verify both containers are healthy +docker ps | grep -E "hubmap-auth|entity-api" +``` + +### 3. Python Environment + +Tests use the same dependencies as the main application: + +```bash +# Create virtual environment (first time only, from entity-api repo root) +python3 -m venv .venv + +# Activate virtual environment +source .venv/bin/activate + +# Install application dependencies (includes requests) +pip install -r src/requirements.txt +``` + +## Running Tests + +### All Localhost Tests + +```bash +source .venv/bin/activate +python -m unittest discover -s test/localhost -v +``` + +### Integration Tests Only + +```bash +source .venv/bin/activate +python -m unittest discover -s test/localhost/integration -v +``` + +### Specific Test File + +```bash +source .venv/bin/activate +python -m unittest test.localhost.integration.test_endpoints_public -v +``` + +## Environment Differences + +Localhost deployment differs from higher tiers in several ways: + +| Aspect | Localhost | DEV/TEST/PROD | +|--------|-----------|---------------| +| Authorization | hubmap-auth (Docker) | AWS API Gateway + Lambda | +| SSL/TLS | Disabled | Let's Encrypt certificates | +| Ports | 3333 (custom) | 8080 (standard) | +| Logging | Local files + Docker logs | CloudWatch Logs | +| Network | `gateway_hubmap` (Docker) | AWS VPC | +| Database | Local Neo4j or remote | AWS-hosted Neo4j | + +Tests in this directory account for these differences. + +## Debugging Failed Tests + +### Container Not Running + +```bash +# Check container status +docker ps -a | grep entity-api + +# Check logs +docker logs entity-api + +# Restart if needed +cd docker +./docker-localhost.sh down +./docker-localhost.sh start +``` + +### Container Not Healthy + +```bash +# Check health status +docker inspect entity-api | grep -A 10 Health + +# Common causes: +# - Port 3333 already in use +# - nginx configuration error +# - Cannot reach hubmap-auth +# - Flask app.cfg missing +``` + +### Authorization Failures + +```bash +# Verify hubmap-auth is running and healthy +docker ps | grep hubmap-auth + +# Test entity-api can reach hubmap-auth +docker exec entity-api curl http://hubmap-auth:7777/status.json + +# Check entity-api nginx logs for auth requests +docker exec entity-api cat /usr/src/app/log/nginx_access_entity-api.log | tail -20 +``` + +### Connection Refused + +```bash +# Verify port mapping +docker port entity-api + +# Test from host +curl http://localhost:3333/status + +# Test from inside container +docker exec entity-api curl http://localhost:8080/status +``` + +### Docker Network Issues + +```bash +# Inspect network +docker network inspect gateway_hubmap + +# Verify both containers are on the network +docker network inspect gateway_hubmap | grep -E "hubmap-auth|entity-api" +``` + +## Adding New Test Types + +When adding new test categories: + +1. **Create subdirectory** under `test/localhost/` +2. **Add README.md** explaining the test type and how to run +3. **Update this README** to document the new test type +4. **Follow best practices** from existing integration tests + +## Related Documentation + +- [Parent Test Suite Overview](../README.md) +- [Docker Localhost Deployment](../../docker/README.md) +- [Gateway API Endpoints Configuration](../../../gateway/api_endpoints.localhost.json) +- [Gateway Test Suite](../../../gateway/test/README.md) diff --git a/test/localhost/integration/README.md b/test/localhost/integration/README.md new file mode 100644 index 00000000..b9f27386 --- /dev/null +++ b/test/localhost/integration/README.md @@ -0,0 +1,284 @@ +# Entity-API Localhost Integration Tests + +Integration tests for entity-api localhost deployment with hubmap-auth. These tests verify that entity-api correctly integrates with hubmap-auth for authorization and properly serves requests. + +## Test Files + +This directory contains tests organized by functionality: + +- **test_endpoints_public.py** - Public endpoints (no auth required) +- **test_endpoints_protected.py** - Protected endpoints (auth required) +- **test_authorization.py** - Authorization integration with hubmap-auth +- **test_configuration.py** - nginx and app configuration validation +- **test_cors.py** - CORS headers and preflight +- **test_flask_app.py** - Flask application behavior + +Files are named to group together alphabetically by purpose (all `test_endpoints_*` files group together, etc.). + +## Prerequisites + +### Running Containers + +The tests require both hubmap-auth and entity-api to be running: + +```bash +# Start hubmap-auth first +cd gateway +./docker-localhost.sh build +./docker-localhost.sh start + +# Verify it's healthy +docker ps | grep hubmap-auth # Should show "healthy" + +# Start entity-api +cd entity-api/docker +./docker-localhost.sh build +./docker-localhost.sh start + +# Verify it's healthy +docker ps | grep entity-api # Should show "healthy" +``` + +### Python Environment + +Tests use the same dependencies as the main application: + +```bash +# Create virtual environment (first time only, from entity-api repo root) +python3 -m venv .venv + +# Activate virtual environment +source .venv/bin/activate + +# Install application dependencies (includes requests) +pip install -r src/requirements.txt + +# Suppress pip upgrade notices (optional) +export PIP_DISABLE_PIP_VERSION_CHECK=1 +``` + +## Running the Tests + +### From entity-api repository root + +```bash +# Activate virtual environment +source .venv/bin/activate + +# Run all localhost integration tests +python -m unittest discover -s test/localhost/integration -p "test_*.py" -v +``` + +### Run specific test file + +```bash +source .venv/bin/activate + +# Run all public endpoint tests +python -m unittest test.localhost.integration.test_endpoints_public -v + +# Run all protected endpoint tests +python -m unittest test.localhost.integration.test_endpoints_protected -v + +# Run all authorization tests +python -m unittest test.localhost.integration.test_authorization -v +``` + +### Run Specific Test Classes + +```bash +source .venv/bin/activate + +# Run just public GET endpoint tests +python -m unittest test.localhost.integration.test_endpoints_public.EndpointsGETPublicTests -v + +# Run just protected POST endpoint tests +python -m unittest test.localhost.integration.test_endpoints_protected.EndpointsPOSTProtectedTests -v + +# Run just nginx integration tests +python -m unittest test.localhost.integration.test_authorization.NginxAuthRequestTests -v +``` + +### Run Individual Tests + +```bash +source .venv/bin/activate +python -m unittest test.localhost.integration.test_endpoints_public.EndpointsGETPublicTests.test_status_endpoint -v +``` + +### Run with Verbose Output + +Add `-v` flag for detailed output showing each test: + +```bash +python -m unittest discover -s test/localhost/integration -p "test_*.py" -v +``` + +### Run with Summary Output + +Remove `-v` flag for just pass/fail summary: + +```bash +python -m unittest discover -s test/localhost/integration -p "test_*.py" +``` + +Output will be: +``` +...................... +---------------------------------------------------------------------- +Ran 22 tests in 3.456s + +OK +``` + +## Test Structure + +### Test Classes + +**EndpointsGETPublicTests** +- Public GET endpoints accessible without authentication +- Status, entity lookups, provenance, etc. + +**EndpointsGETProtectedTests** +- Protected GET endpoints requiring authentication +- Usergroups, unpublished datasets, etc. + +**EndpointsPOSTProtectedTests** +- Protected POST endpoints requiring authentication +- Entity creation, dataset components, etc. + +**EndpointsPUTProtectedTests** +- Protected PUT endpoints requiring authentication +- Entity updates, dataset retraction, etc. + +**EndpointsDELETEProtectedTests** +- Protected DELETE endpoints requiring authentication +- Cache management, etc. + +**NginxAuthRequestTests** +- Verifies nginx correctly calls hubmap-auth +- Tests header passing to authorization service + +**FlaskApplicationTests** +- Tests Flask app responses after authorization +- Validates 404 handling for undefined routes + +## Best Practices Used + +### Code Quality +- **Type hints** - All parameters and return types annotated for clarity +- **Docstrings** - Every test has descriptive documentation +- **Descriptive names** - Test names clearly describe what they verify +- **Proper assertions** - Meaningful assertion messages for failures + +### Test Organization +- **Class-level constants** - `BASE_URL`, `TIMEOUT` defined once and reused +- **setUpClass** - Expensive setup (container checks) run once per class +- **subTest** - Parameterized tests provide clear failure reporting per endpoint +- **Focused tests** - Each test validates one specific behavior + +### Robustness +- **Timeout handling** - All requests have explicit timeouts +- **Connection error handling** - Graceful failure with helpful messages +- **Conditional skipping** - Tests skip gracefully when containers unavailable +- **Clear error messages** - Failures indicate exactly what went wrong and how to fix + +### CI/CD Ready +- **No extra dependencies** - Uses application's existing requirements.txt +- **Subprocess isolation** - Docker commands use subprocess with timeout +- **Exit codes** - Proper test success/failure reporting +- **Environment agnostic** - Works in local development and CI pipelines + +## Test Coverage + +### What These Tests Verify + +✅ entity-api container starts and becomes healthy +✅ nginx integrates with hubmap-auth via auth_request +✅ Public endpoints accessible without authentication +✅ Protected endpoints block access without authentication +✅ Flask application handles authorized requests +✅ 404 returned for undefined routes +✅ CORS headers properly configured +✅ Docker network communication works +✅ Authorization headers passed correctly + +### What These Tests Don't Cover + +❌ Token validation with real Globus tokens (requires valid credentials) +❌ Group membership validation (requires test users in specific groups) +❌ Database operations (Neo4j integration) +❌ Load testing / performance under stress +❌ Security penetration testing + +## Troubleshooting + +### "Cannot connect to entity-api" +**Cause:** Container not running or not accessible +**Solution:** +```bash +cd docker +./docker-localhost.sh start +docker ps | grep entity-api +``` + +### "entity-api not ready: status returned 401" +**Cause:** hubmap-auth not running or misconfigured +**Solution:** Start hubmap-auth first +```bash +cd gateway +./docker-localhost.sh start +docker ps | grep hubmap-auth +``` + +### "Authorization Required" on public endpoints +**Cause:** api_endpoints.localhost.json misconfigured or nginx not passing correct Host header +**Solution:** Check configuration +```bash +docker exec hubmap-auth cat /usr/src/app/api_endpoints.json | grep entity-api +docker exec entity-api cat /etc/nginx/conf.d/entity-api.conf | grep "proxy_set_header Host" +``` + +### Tests hang or timeout +**Cause:** Network connectivity issues between containers +**Solution:** Verify both containers on same network +```bash +docker network inspect gateway_hubmap | grep -E "hubmap-auth|entity-api" +``` + +## Future Enhancements + +### Pytest Migration (Optional) + +While these tests use Python's built-in `unittest`, you can optionally migrate to pytest for additional features: + +**Benefits of pytest:** +- More concise syntax with simple `assert` statements +- Better parameterized testing with `@pytest.mark.parametrize` +- Richer output formatting and failure reporting +- Extensive plugin ecosystem (coverage, parallel execution, etc.) +- Fixture system for complex setup/teardown + +**Recommendation:** Stick with unittest for now unless you need pytest-specific features. Unittest is part of Python's standard library and sufficient for these integration tests. + +## Contributing + +When adding new tests: + +1. **Follow existing patterns** - Use the same class structure and naming conventions +2. **Add docstrings** - Every test should explain what it validates +3. **Use subTest for parameters** - When testing multiple similar cases +4. **Handle failures gracefully** - Provide actionable error messages +5. **Keep tests independent** - Each test should work in isolation +6. **Update this README** - Document new test files or significant changes + +## CI/CD Integration + +These tests are designed to run in GitHub Actions. See the parent [test/README.md](../README.md) for example workflow configuration. + +## Related Documentation + +- [Parent Test Suite Overview](../README.md) +- [Entity-API Deployment Guide](../../README.md) +- [Gateway API Endpoints Configuration](../../../gateway/api_endpoints.localhost.json) +- [Gateway Test Suite](../../../gateway/test/README.md) diff --git a/test/localhost/integration/test_authorization_integration.py b/test/localhost/integration/test_authorization_integration.py new file mode 100644 index 00000000..93d53c84 --- /dev/null +++ b/test/localhost/integration/test_authorization_integration.py @@ -0,0 +1,236 @@ +""" +Tests for entity-api authorization integration with hubmap-auth. + +These tests verify the nginx ↔ hubmap-auth integration mechanism, +Docker networking, and configuration. Tests here have knowledge of +the authorization infrastructure. + +Run all authorization integration tests: + python -m unittest test.localhost.integration.test_authorization_integration -v +""" + +import subprocess +import unittest +import requests + + +class NginxAuthRequestIntegrationTests(unittest.TestCase): + """Test nginx auth_request integration with hubmap-auth.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_nginx_config_has_auth_request(self): + """Test that nginx config includes auth_request directive.""" + try: + result = subprocess.run( + ["docker", "exec", "entity-api", "cat", "/etc/nginx/conf.d/entity-api.conf"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + self.assertIn("auth_request /api_auth", result.stdout) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect nginx configuration") + + def test_nginx_config_calls_hubmap_auth(self): + """Test that nginx config proxies to hubmap-auth:7777.""" + try: + result = subprocess.run( + ["docker", "exec", "entity-api", "grep", "-A", "10", + "location = /api_auth", "/etc/nginx/conf.d/entity-api.conf"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + # Should proxy to hubmap-auth + self.assertIn("hubmap-auth:7777", result.stdout) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect nginx configuration") + + def test_nginx_sends_correct_host_header(self): + """Test that nginx sends Host: entity-api to hubmap-auth.""" + try: + result = subprocess.run( + ["docker", "exec", "entity-api", "grep", "proxy_set_header Host", + "/etc/nginx/conf.d/entity-api.conf"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + # Should set Host to "entity-api" not $http_host + self.assertIn('proxy_set_header Host "entity-api"', result.stdout) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect nginx configuration") + + def test_nginx_sends_original_uri_header(self): + """Test that nginx sends X-Original-URI header.""" + try: + result = subprocess.run( + ["docker", "exec", "entity-api", "grep", "X-Original-URI", + "/etc/nginx/conf.d/entity-api.conf"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + self.assertIn("X-Original-URI", result.stdout) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect nginx configuration") + + def test_nginx_sends_original_method_header(self): + """Test that nginx sends X-Original-Request-Method header.""" + try: + result = subprocess.run( + ["docker", "exec", "entity-api", "grep", "X-Original-Request-Method", + "/etc/nginx/conf.d/entity-api.conf"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + self.assertIn("X-Original-Request-Method", result.stdout) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect nginx configuration") + + +class DockerNetworkConnectivityTests(unittest.TestCase): + """Test Docker network connectivity between containers.""" + + def test_entity_api_can_reach_hubmap_auth(self): + """Test that entity-api can communicate with hubmap-auth.""" + try: + result = subprocess.run( + ["docker", "exec", "entity-api", "curl", "-f", + "http://hubmap-auth:7777/status.json"], + capture_output=True, + text=True, + timeout=10, + check=True + ) + + self.assertEqual(result.returncode, 0) + + except subprocess.CalledProcessError: + self.fail("entity-api cannot reach hubmap-auth on Docker network") + except (subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot test Docker network connectivity") + + def test_containers_on_gateway_hubmap_network(self): + """Test that both containers are on gateway_hubmap network.""" + try: + result = subprocess.run( + ["docker", "network", "inspect", "gateway_hubmap", + "--format", "{{range .Containers}}{{.Name}} {{end}}"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + container_names = result.stdout + self.assertIn("hubmap-auth", container_names) + self.assertIn("entity-api", container_names) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect Docker network") + + def test_docker_dns_resolves_hubmap_auth(self): + """Test that Docker DNS resolves hubmap-auth hostname.""" + try: + result = subprocess.run( + ["docker", "exec", "entity-api", "getent", "hosts", "hubmap-auth"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + # Should resolve to an IP address + self.assertIn("hubmap-auth", result.stdout) + # Output format: "172.19.0.2 hubmap-auth" + self.assertRegex(result.stdout, r'\d+\.\d+\.\d+\.\d+') + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot test DNS resolution") + + +class ContainerHealthTests(unittest.TestCase): + """Test container health and startup.""" + + def test_entity_api_container_healthy(self): + """Test that entity-api container reports healthy status.""" + try: + result = subprocess.run( + ["docker", "inspect", "entity-api", + "--format", "{{.State.Health.Status}}"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + health_status = result.stdout.strip() + self.assertEqual(health_status, "healthy") + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect container health") + + def test_hubmap_auth_container_healthy(self): + """Test that hubmap-auth container is healthy (prerequisite).""" + try: + result = subprocess.run( + ["docker", "inspect", "hubmap-auth", + "--format", "{{.State.Health.Status}}"], + capture_output=True, + text=True, + timeout=5, + check=True + ) + + health_status = result.stdout.strip() + self.assertEqual( + health_status, + "healthy", + "hubmap-auth must be healthy for entity-api tests to work" + ) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect container health") + + def test_flask_app_loaded_successfully(self): + """Test that Flask app loaded without configuration errors.""" + try: + result = subprocess.run( + ["docker", "logs", "entity-api"], + capture_output=True, + text=True, + timeout=5 + ) + + # Should show WSGI app ready + self.assertIn("WSGI app 0", result.stdout) + self.assertIn("ready", result.stdout) + + # Should NOT show critical errors + self.assertNotIn("Unable to load configuration file", result.stdout) + + except (subprocess.TimeoutExpired, FileNotFoundError): + self.skipTest("Cannot inspect container logs") + + +if __name__ == "__main__": + unittest.main() diff --git a/test/localhost/integration/test_cors.py b/test/localhost/integration/test_cors.py new file mode 100644 index 00000000..bc31348e --- /dev/null +++ b/test/localhost/integration/test_cors.py @@ -0,0 +1,94 @@ +""" +Tests for CORS (Cross-Origin Resource Sharing) configuration in entity-api. + +CORS headers enable web browsers to make requests to the API from +different origins. These tests verify proper CORS configuration in nginx. + +Run all CORS tests: + python -m unittest test.localhost.integration.test_cors -v +""" + +import unittest +import requests + + +class CORSHeaderTests(unittest.TestCase): + """Test CORS headers on entity-api responses.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_cors_allow_origin_header(self): + """Test that Access-Control-Allow-Origin header is set to *.""" + response = requests.get(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertEqual(response.status_code, 200) + self.assertIn("Access-Control-Allow-Origin", response.headers) + self.assertEqual(response.headers["Access-Control-Allow-Origin"], "*") + + def test_cors_allow_methods_header(self): + """Test that Access-Control-Allow-Methods header is present.""" + response = requests.get(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertIn("Access-Control-Allow-Methods", response.headers) + allowed_methods = response.headers["Access-Control-Allow-Methods"] + + # Should include common methods + self.assertIn("GET", allowed_methods) + self.assertIn("POST", allowed_methods) + + def test_cors_allow_headers(self): + """Test that Access-Control-Allow-Headers includes required headers.""" + response = requests.get(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertIn("Access-Control-Allow-Headers", response.headers) + allowed_headers = response.headers["Access-Control-Allow-Headers"] + + # Should include Authorization header for token-based auth + self.assertIn("Authorization", allowed_headers) + + def test_cors_headers_on_protected_endpoints(self): + """Test that CORS headers are present even on 401 responses.""" + response = requests.get(f"{self.BASE_URL}/usergroups", timeout=self.TIMEOUT) + + # Should return 401 but still have CORS headers + self.assertEqual(response.status_code, 401) + self.assertIn("Access-Control-Allow-Origin", response.headers) + + +class CORSPreflightTests(unittest.TestCase): + """Test CORS preflight OPTIONS requests.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_options_request_returns_204(self): + """Test that OPTIONS requests return 204 No Content.""" + response = requests.options(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertEqual(response.status_code, 204) + + def test_options_includes_allow_methods(self): + """Test that OPTIONS response includes allowed methods.""" + response = requests.options(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertEqual(response.status_code, 204) + self.assertIn("Access-Control-Allow-Methods", response.headers) + + def test_options_includes_allow_headers(self): + """Test that OPTIONS response includes allowed headers.""" + response = requests.options(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertIn("Access-Control-Allow-Headers", response.headers) + + def test_options_includes_max_age(self): + """Test that OPTIONS response includes max age for caching.""" + response = requests.options(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertIn("Access-Control-Max-Age", response.headers) + # Should be 86400 (24 hours) per nginx config + self.assertEqual(response.headers["Access-Control-Max-Age"], "86400") + + +if __name__ == "__main__": + unittest.main() diff --git a/test/localhost/integration/test_endpoints_protected.py b/test/localhost/integration/test_endpoints_protected.py new file mode 100644 index 00000000..83852a2f --- /dev/null +++ b/test/localhost/integration/test_endpoints_protected.py @@ -0,0 +1,171 @@ +""" +Tests for protected entity-api endpoints requiring authentication. + +These tests call entity-api endpoints directly and verify they require +proper authentication. No knowledge of hubmap-auth /api_auth internals. + +Run all protected endpoint tests: + python -m unittest test.localhost.integration.test_endpoints_protected -v + +Run specific HTTP method tests: + python -m unittest test.localhost.integration.test_endpoints_protected.EndpointsGETProtectedTests -v +""" + +import unittest +import requests + + +class EndpointsGETProtectedTests(unittest.TestCase): + """Test protected GET endpoints - authentication required.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_usergroups_requires_auth(self): + """Test GET /usergroups returns 401 without token.""" + response = requests.get(f"{self.BASE_URL}/usergroups", timeout=self.TIMEOUT) + + self.assertEqual(response.status_code, 401) + + def test_datasets_unpublished_requires_auth(self): + """Test GET /datasets/unpublished returns 401 without token.""" + response = requests.get( + f"{self.BASE_URL}/datasets/unpublished", + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + def test_descendants_requires_auth(self): + """Test GET /descendants/ returns 401 without token.""" + response = requests.get( + f"{self.BASE_URL}/descendants/test-id", + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + def test_children_requires_auth(self): + """Test GET /children/ returns 401 without token.""" + response = requests.get( + f"{self.BASE_URL}/children/test-id", + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + def test_previous_revisions_requires_auth(self): + """Test GET /previous_revisions/ returns 401 without token.""" + response = requests.get( + f"{self.BASE_URL}/previous_revisions/test-id", + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + def test_next_revisions_requires_auth(self): + """Test GET /next_revisions/ returns 401 without token.""" + response = requests.get( + f"{self.BASE_URL}/next_revisions/test-id", + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + +class EndpointsPOSTProtectedTests(unittest.TestCase): + """Test protected POST endpoints - authentication required.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_entities_create_requires_auth(self): + """Test POST /entities/ returns 401 without token.""" + response = requests.post( + f"{self.BASE_URL}/entities/sample", + json={"direct_ancestor_uuid": "test-uuid"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + def test_datasets_components_requires_auth(self): + """Test POST /datasets/components returns 401 without token.""" + response = requests.post( + f"{self.BASE_URL}/datasets/components", + json={"test": "data"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + def test_entities_multiple_samples_requires_auth(self): + """Test POST /entities/multiple-samples/ returns 401 without token.""" + response = requests.post( + f"{self.BASE_URL}/entities/multiple-samples/5", + json={"direct_ancestor_uuid": "test-uuid"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + +class EndpointsPUTProtectedTests(unittest.TestCase): + """Test protected PUT endpoints - authentication required.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_entities_update_requires_auth(self): + """Test PUT /entities/ returns 401 without token.""" + response = requests.put( + f"{self.BASE_URL}/entities/test-uuid", + json={"description": "updated"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + def test_datasets_retract_requires_admin_auth(self): + """Test PUT /datasets//retract returns 401 without admin token.""" + response = requests.put( + f"{self.BASE_URL}/datasets/test-id/retract", + json={"retraction_reason": "test reason"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + +class EndpointsDELETEProtectedTests(unittest.TestCase): + """Test protected DELETE endpoints - authentication required.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_flush_cache_requires_auth(self): + """Test DELETE /flush-cache/ returns 401 without token.""" + response = requests.delete( + f"{self.BASE_URL}/flush-cache/test-id", + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + def test_flush_all_cache_requires_admin_auth(self): + """Test DELETE /flush-all-cache returns 401 without admin token.""" + response = requests.delete( + f"{self.BASE_URL}/flush-all-cache", + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 401) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/localhost/integration/test_endpoints_public.py b/test/localhost/integration/test_endpoints_public.py new file mode 100644 index 00000000..bd9b6949 --- /dev/null +++ b/test/localhost/integration/test_endpoints_public.py @@ -0,0 +1,297 @@ +""" +Tests for public entity-api endpoints accessible without authentication. + +These tests call entity-api endpoints directly and verify responses. +No knowledge of hubmap-auth internal mechanisms. + +Run all public endpoint tests: + python -m unittest test.localhost.integration.test_endpoints_public -v + +Run specific HTTP method tests: + python -m unittest test.localhost.integration.test_endpoints_public.EndpointsGETPublicTests -v +""" + +import unittest +import requests +from requests.exceptions import ConnectionError + + +class EndpointsGETPublicTests(unittest.TestCase): + """Test public GET endpoints - no authentication required.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + @classmethod + def setUpClass(cls): + """Verify entity-api is accessible before running tests.""" + try: + response = requests.get(f"{cls.BASE_URL}/status", timeout=cls.TIMEOUT) + if response.status_code not in [200, 401]: + raise RuntimeError( + f"entity-api not responding: /status returned {response.status_code}" + ) + except ConnectionError as e: + raise RuntimeError( + f"Cannot connect to entity-api at {cls.BASE_URL}. " + "Ensure containers are running:\n" + " cd gateway && ./docker-localhost.sh start\n" + " cd entity-api/docker && ./docker-localhost.sh start" + ) from e + + def test_status_endpoint(self): + """Test GET /status returns valid status.""" + response = requests.get(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertEqual(response.status_code, 200) + data = response.json() + self.assertIsInstance(data, dict) + + def test_root_endpoint(self): + """Test that GET / is publicly accessible.""" + response = requests.get(f"{self.BASE_URL}/", timeout=self.TIMEOUT) + + # Root may require auth based on your config - adjust if needed + # Currently configured as auth: false in api_endpoints.localhost.json + self.assertEqual(response.status_code, 200) + + def test_entity_types_endpoint(self): + """Test GET /entity-types returns entity type information.""" + response = requests.get(f"{self.BASE_URL}/entity-types", timeout=self.TIMEOUT) + + self.assertEqual(response.status_code, 200) + data = response.json() + self.assertIsInstance(data, (list, dict)) + + def test_entities_lookup(self): + """Test GET /entities/ for entity lookup.""" + test_uuid = "00000000000000000000000000000000" + response = requests.get( + f"{self.BASE_URL}/entities/{test_uuid}", + timeout=self.TIMEOUT + ) + + # Should NOT return 401 (endpoint is public) + # Likely returns 400 or 404 for invalid/non-existent UUID + self.assertNotEqual(response.status_code, 401) + + def test_provenance_endpoint(self): + """Test GET /entities//provenance.""" + test_id = "test-entity-id" + response = requests.get( + f"{self.BASE_URL}/entities/{test_id}/provenance", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_revisions_endpoint(self): + """Test GET /entities//revisions.""" + test_id = "test-entity-id" + response = requests.get( + f"{self.BASE_URL}/entities/{test_id}/revisions", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_datasets_sankey_data(self): + """Test GET /datasets/sankey_data.""" + response = requests.get( + f"{self.BASE_URL}/datasets/sankey_data", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_datasets_prov_info(self): + """Test GET /datasets//prov-info.""" + test_id = "test-dataset-id" + response = requests.get( + f"{self.BASE_URL}/datasets/{test_id}/prov-info", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_datasets_prov_metadata(self): + """Test GET /datasets//prov-metadata.""" + test_id = "test-dataset-id" + response = requests.get( + f"{self.BASE_URL}/datasets/{test_id}/prov-metadata", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_redirect_endpoints(self): + """Test redirect endpoints are public.""" + test_id = "test-id" + + redirect_endpoints = [ + f"/redirect/{test_id}", + f"/doi/redirect/{test_id}", + f"/collection/redirect/{test_id}" + ] + + for endpoint in redirect_endpoints: + with self.subTest(endpoint=endpoint): + response = requests.get( + f"{self.BASE_URL}{endpoint}", + timeout=self.TIMEOUT, + allow_redirects=False + ) + + self.assertNotEqual(response.status_code, 401) + + def test_globus_url_endpoints(self): + """Test Globus URL endpoints are public.""" + test_id = "test-id" + + globus_endpoints = [ + f"/entities/{test_id}/globus-url", + f"/dataset/globus-url/{test_id}", + f"/entities/dataset/globus-url/{test_id}" + ] + + for endpoint in globus_endpoints: + with self.subTest(endpoint=endpoint): + response = requests.get( + f"{self.BASE_URL}{endpoint}", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_relationship_endpoints(self): + """Test entity relationship endpoints are public.""" + test_id = "test-entity-id" + + relationship_endpoints = [ + f"/entities/{test_id}/tuplets", + f"/entities/{test_id}/collections", + f"/entities/{test_id}/uploads", + f"/entities/{test_id}/siblings", + f"/entities/{test_id}/ancestor-organs", + f"/ancestors/{test_id}", + f"/parents/{test_id}" + ] + + for endpoint in relationship_endpoints: + with self.subTest(endpoint=endpoint): + response = requests.get( + f"{self.BASE_URL}{endpoint}", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_dataset_relationship_endpoints(self): + """Test dataset relationship endpoints are public.""" + test_id = "test-dataset-id" + + dataset_endpoints = [ + f"/datasets/{test_id}/revisions", + f"/datasets/{test_id}/revision", + f"/datasets/{test_id}/latest-revision", + f"/datasets/{test_id}/donors", + f"/datasets/{test_id}/samples", + f"/datasets/{test_id}/organs", + f"/datasets/{test_id}/paired-dataset" + ] + + for endpoint in dataset_endpoints: + with self.subTest(endpoint=endpoint): + response = requests.get( + f"{self.BASE_URL}{endpoint}", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_instanceof_endpoints(self): + """Test type checking endpoints are public.""" + endpoints = [ + "/entities/type/Sample/instanceof/Entity", + "/entities/test-id/instanceof/Sample" + ] + + for endpoint in endpoints: + with self.subTest(endpoint=endpoint): + response = requests.get( + f"{self.BASE_URL}{endpoint}", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_documents_endpoint(self): + """Test GET /documents/ is public.""" + response = requests.get( + f"{self.BASE_URL}/documents/test-doc-id", + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + +class EndpointsPOSTPublicTests(unittest.TestCase): + """Test public POST endpoints - no authentication required.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_entities_batch_ids(self): + """Test POST /entities/batch-ids is public.""" + response = requests.post( + f"{self.BASE_URL}/entities/batch-ids", + json={"ids": ["id1", "id2"]}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_constraints_endpoint(self): + """Test POST /constraints is public.""" + response = requests.post( + f"{self.BASE_URL}/constraints", + json={"test": "data"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + +class EndpointsPUTPublicTests(unittest.TestCase): + """Test public PUT endpoints - no authentication required.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_datasets_bulk_update(self): + """Test PUT /datasets is public.""" + response = requests.put( + f"{self.BASE_URL}/datasets", + json={"test": "data"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + def test_uploads_update(self): + """Test PUT /uploads is public.""" + response = requests.put( + f"{self.BASE_URL}/uploads", + json={"test": "data"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertNotEqual(response.status_code, 401) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/localhost/integration/test_flask_app.py b/test/localhost/integration/test_flask_app.py new file mode 100644 index 00000000..05fbb9f4 --- /dev/null +++ b/test/localhost/integration/test_flask_app.py @@ -0,0 +1,142 @@ +""" +Tests for entity-api Flask application behavior. + +These tests verify Flask-specific functionality including error handling, +404 responses for undefined routes, and application-level logic. + +Run all Flask app tests: + python -m unittest test.localhost.integration.test_flask_app -v +""" + +import unittest +import requests + + +class FlaskErrorHandlingTests(unittest.TestCase): + """Test Flask application error handling.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_undefined_endpoint_returns_404(self): + """Test that undefined endpoints return 404 from Flask.""" + response = requests.get( + f"{self.BASE_URL}/this-endpoint-does-not-exist", + timeout=self.TIMEOUT + ) + + # Catch-all in api_endpoints.json should allow through to Flask + # Flask should return 404 for undefined routes + self.assertEqual(response.status_code, 404) + + def test_undefined_post_endpoint_returns_404(self): + """Test that undefined POST endpoints return 404.""" + response = requests.post( + f"{self.BASE_URL}/undefined-post-endpoint", + json={"test": "data"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 404) + + def test_undefined_put_endpoint_returns_404(self): + """Test that undefined PUT endpoints return 404.""" + response = requests.put( + f"{self.BASE_URL}/undefined-put-endpoint", + json={"test": "data"}, + headers={"Content-Type": "application/json"}, + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 404) + + def test_undefined_delete_endpoint_returns_404(self): + """Test that undefined DELETE endpoints return 404.""" + response = requests.delete( + f"{self.BASE_URL}/undefined-delete-endpoint", + timeout=self.TIMEOUT + ) + + self.assertEqual(response.status_code, 404) + + def test_malformed_uuid_handled_gracefully(self): + """Test that malformed UUIDs are handled with proper error codes.""" + # Send request with clearly invalid UUID format + response = requests.get( + f"{self.BASE_URL}/entities/not-a-valid-uuid", + timeout=self.TIMEOUT + ) + + # Should return 400 (bad request) or 404 (not found), not crash + self.assertIn(response.status_code, [400, 404]) + + +class FlaskResponseTests(unittest.TestCase): + """Test Flask application responses.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_status_returns_valid_json(self): + """Test that /status returns valid JSON structure.""" + response = requests.get(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + self.assertEqual(response.status_code, 200) + + # Should be valid JSON + data = response.json() + self.assertIsInstance(data, dict) + + def test_entity_types_returns_list_or_dict(self): + """Test that /entity-types returns proper data structure.""" + response = requests.get(f"{self.BASE_URL}/entity-types", timeout=self.TIMEOUT) + + self.assertEqual(response.status_code, 200) + + data = response.json() + # Entity types can be returned as list or dict depending on implementation + self.assertIsInstance(data, (list, dict)) + + def test_flask_handles_large_payloads(self): + """Test that Flask handles large request payloads.""" + # Send moderately large payload (under nginx 10M limit) + large_payload = {"data": "x" * 100000} # ~100KB + + response = requests.post( + f"{self.BASE_URL}/constraints", + json=large_payload, + timeout=self.TIMEOUT + ) + + # Should not return 413 (payload too large) for reasonable sizes + # May return 400 (bad request) or other application errors + self.assertNotEqual(response.status_code, 413) + + +class FlaskPerformanceTests(unittest.TestCase): + """Test Flask application performance characteristics.""" + + BASE_URL = "http://localhost:3333" + TIMEOUT = 10 + + def test_status_endpoint_fast_response(self): + """Test that status endpoint responds quickly.""" + response = requests.get(f"{self.BASE_URL}/status", timeout=self.TIMEOUT) + + # Status endpoint should be fast (< 1 second) + self.assertLess(response.elapsed.total_seconds(), 1.0) + + def test_simple_lookup_reasonable_time(self): + """Test that simple lookups complete in reasonable time.""" + response = requests.get( + f"{self.BASE_URL}/entity-types", + timeout=self.TIMEOUT + ) + + # Should complete in under 2 seconds + self.assertLess(response.elapsed.total_seconds(), 2.0) + + +if __name__ == "__main__": + unittest.main()