diff --git a/PolyLingua/.env.example b/PolyLingua/.env.example new file mode 100644 index 0000000000..a48a3de7cd --- /dev/null +++ b/PolyLingua/.env.example @@ -0,0 +1,119 @@ +# ================================================ +# PolyLingua Environment Configuration +# ================================================ +# Copy this file to .env and update with your values +# Run: cp .env.example .env +# Then edit .env with your actual configuration + +# ================================================ +# HuggingFace Configuration +# ================================================ +# Required: Get your token from https://huggingface.co/settings/tokens +# This is needed to download models from HuggingFace Hub +HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + +# ================================================ +# Model Configuration +# ================================================ +# LLM model ID from HuggingFace +# Default model supports multilingual translation +LLM_MODEL_ID=swiss-ai/Apertus-8B-Instruct-2509 + +# Directory to cache downloaded models +# Models can be large (several GB), ensure sufficient disk space +MODEL_CACHE=./data + +# ================================================ +# Host Configuration +# ================================================ +# Your server/machine IP address +# Use 'localhost' for local development +# Use actual IP (e.g., 192.168.1.100) for network access +host_ip=localhost + +# ================================================ +# Backend Service Configuration +# ================================================ +# vLLM (vLLM Inference) endpoint +# This is the LLM inference service endpoint +VLLM_ENDPOINT=http://localhost:8028 + +# LLM microservice configuration +# Host and port for the LLM microservice +LLM_SERVICE_HOST_IP=localhost +LLM_SERVICE_PORT=9000 + +# PolyLingua megaservice configuration +# Main translation service host and port +MEGA_SERVICE_HOST_IP=localhost +MEGA_SERVICE_PORT=8888 + +# Backend service details +BACKEND_SERVICE_NAME=polylingua +BACKEND_SERVICE_IP=localhost +BACKEND_SERVICE_PORT=8888 + +# ================================================ +# Frontend Configuration +# ================================================ +# Backend endpoint URL for the frontend +# This is what the UI uses to connect to the backend +BACKEND_SERVICE_ENDPOINT=http://localhost:8888 + +# Frontend service configuration +# Next.js development server configuration +FRONTEND_SERVICE_IP=localhost +FRONTEND_SERVICE_PORT=5173 + +# ================================================ +# Docker Configuration +# ================================================ +# Docker registry for pulling images +# Use 'opea' for official OPEA images +REGISTRY=opea + +# Docker image tag +# Use 'latest' for most recent version +TAG=latest + +# ================================================ +# Nginx Configuration +# ================================================ +# Nginx reverse proxy port +# Default HTTP port +NGINX_PORT=80 + +# ================================================ +# Proxy Settings (Optional) +# ================================================ +# Configure if behind a corporate proxy +# Leave empty if not using a proxy + +# HTTP proxy URL (e.g., http://proxy.company.com:8080) +http_proxy= + +# HTTPS proxy URL (e.g., http://proxy.company.com:8080) +https_proxy= + +# Comma-separated list of hosts to bypass proxy +no_proxy=localhost,127.0.0.1 + +# ================================================ +# Quick Start Guide +# ================================================ +# +# 1. Copy this file: +# cp .env.example .env +# +# 2. Edit .env and set your HF_TOKEN +# +# 3. Update host_ip if deploying to network +# (use actual IP instead of localhost) +# +# 4. Start services: +# docker compose up -d +# +# 5. Access UI at: +# http://localhost:5173 (or http://:5173) +# +# ================================================ diff --git a/PolyLingua/.gitignore b/PolyLingua/.gitignore new file mode 100644 index 0000000000..cb3baf35fa --- /dev/null +++ b/PolyLingua/.gitignore @@ -0,0 +1,68 @@ +# Environment variables +.env +.env.local + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +# Python library directories (but allow ui/lib) +/lib/ +/lib64/ +!ui/lib/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +env/ +ENV/ +.venv + +# Model cache +data/ +models/ +*.bin +*.safetensors + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log +logs/ + +# Temporary files +tmp/ +temp/ +*.tmp + +# Node modules (for UI) +ui/node_modules/ +ui/.next/ +ui/out/ +ui/build/ + +# Docker +docker-compose.override.yml diff --git a/PolyLingua/Dockerfile b/PolyLingua/Dockerfile new file mode 100644 index 0000000000..5bdedaf097 --- /dev/null +++ b/PolyLingua/Dockerfile @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +WORKDIR /home/user + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# Copy polylingua service +COPY polylingua.py . + +# Expose service port +EXPOSE 8888 + +# Run the polylingua service +ENTRYPOINT ["python", "polylingua.py"] diff --git a/PolyLingua/README.md b/PolyLingua/README.md new file mode 100644 index 0000000000..2e770ec48e --- /dev/null +++ b/PolyLingua/README.md @@ -0,0 +1,287 @@ +# PolyLingua + +A production-ready translation service built with **OPEA (Open Platform for Enterprise AI)** components, featuring a modern Next.js UI and microservices architecture. + +## Components + +1. **vLLM Service** - High-performance LLM inference engine for model serving +2. **LLM Microservice** - OPEA wrapper providing standardized API +3. **PolyLingua Megaservice** - Orchestrator that formats prompts and routes requests +4. **UI Service** - Next.js 14 frontend with React and TypeScript +5. **Nginx** - Reverse proxy for unified access + +## 🚀 Quick Start + +### Prerequisites + +- Docker and Docker Compose +- Git +- HuggingFace Account (for model access) +- 8GB+ RAM recommended +- ~10GB disk space for models + +### 1. Clone and Setup + +```bash +cd PolyLingua + +# Configure environment variables +./set_env.sh +``` + +You'll be prompted for: + +- **HuggingFace API Token** - Get from https://huggingface.co/settings/tokens +- **Model ID** - Default: `swiss-ai/Apertus-8B-Instruct-2509` (translation-optimized model) +- **Host IP** - Your server's IP address +- **Ports and proxy settings** + +### 2. Build Images + +```bash +./deploy/build.sh +``` + +This builds: + +- Translation backend service +- Next.js UI service + +### 3. Start Services + +```bash +./deploy/start.sh +``` + +Wait for services to initialize (~2-5 minutes for first run as models download). + +### 4. Access the Application + +- **Web UI**: http://localhost:80 +- **API Endpoint**: http://localhost:8888/v1/translation + +### 5. Test the Service + +```bash +./deploy/test.sh +``` + +Or test manually: + +```bash +curl -X POST http://localhost:8888/v1/translation \ + -H "Content-Type: application/json" \ + -d '{ + "language_from": "English", + "language_to": "Spanish", + "source_language": "Hello, how are you today?" + }' +``` + +## 📋 Configuration + +### Environment Variables + +Key variables in `.env`: + +| Variable | Description | Default | +| -------------- | ---------------------------- | ----------------------------------- | +| `HF_TOKEN` | HuggingFace API token | Required | +| `LLM_MODEL_ID` | Model to use for translation | `swiss-ai/Apertus-8B-Instruct-2509` | +| `MODEL_CACHE` | Directory for model storage | `./data` | +| `host_ip` | Server IP address | `localhost` | +| `NGINX_PORT` | External port for web access | `80` | + +See `.env.example` for full configuration options. + +### Supported Models + +The service works with any HuggingFace text generation model. Recommended models: + +- **swiss-ai/Apertus-8B-Instruct-2509** - Multilingual translation (default) +- **haoranxu/ALMA-7B** - Specialized translation model + +## 🛠️ Development + +### Project Structure + +``` +PolyLingua/ +├── polylingua.py # Backend polylingua service +├── requirements.txt # Python dependencies +├── Dockerfile # Backend container definition +├── docker-compose.yaml # Multi-service orchestration +├── set_env.sh # Environment setup script +├── .env.example # Environment template +├── ui/ # Next.js frontend +│ ├── app/ # Next.js app directory +│ ├── components/ # React components +│ ├── Dockerfile # UI container definition +│ └── package.json # Node dependencies +└── deploy/ # Deployment scripts + ├── nginx.conf # Nginx configuration + ├── build.sh # Image build script + ├── start.sh # Service startup script + ├── stop.sh # Service shutdown script + └── test.sh # API testing script +``` + +### Running Locally (Development) + +**Backend:** + +```bash +# Install dependencies +pip install -r requirements.txt + +# Set environment variables +export LLM_SERVICE_HOST_IP=localhost +export LLM_SERVICE_PORT=9000 +export MEGA_SERVICE_PORT=8888 + +# Run service +python polylingua.py +``` + +**Frontend:** + +```bash +cd ui +npm install +npm run dev +``` + +### API Reference + +#### POST /v1/translation + +Translate text between languages. + +**Request:** + +```json +{ + "language_from": "English", + "language_to": "Spanish", + "source_language": "Your text to translate" +} +``` + +**Response:** + +```json +{ + "model": "polylingua", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Translated text here" + }, + "finish_reason": "stop" + } + ], + "usage": {} +} +``` + +## 🔧 Operations + +### View Logs + +```bash +# All services +docker compose logs -f + +# Specific service +docker compose logs -f polylingua-backend-server +docker compose logs -f polylingua-ui-server +``` + +### Stop Services + +```bash +./deploy/stop.sh +``` + +### Update Services + +```bash +# Rebuild images +./deploy/build.sh + +# Restart services +docker compose down +./deploy/start.sh +``` + +### Clean Up + +```bash +# Stop and remove containers +docker compose down + +# Remove volumes (including model cache) +docker compose down -v +``` + +## 🐛 Troubleshooting + +### Service won't start + +1. Check if ports are available: + + ```bash + sudo lsof -i :80,8888,9000,8028,5173 + ``` + +2. Verify environment variables: + + ```bash + cat .env + ``` + +3. Check service health: + ```bash + docker compose ps + docker compose logs + ``` + +### Model download fails + +- Ensure `HF_TOKEN` is set correctly +- Check internet connection +- Verify model ID exists on HuggingFace +- Check disk space in `MODEL_CACHE` directory + +### Translation errors + +- Wait for vLLM service to fully initialize (check logs) +- Verify LLM service is healthy: `curl http://localhost:9000/v1/health` +- Check vLLM service: `curl http://localhost:8028/health` + +### UI can't connect to backend + +- Verify `BACKEND_SERVICE_ENDPOINT` in `.env` +- Check if backend is running: `docker compose ps` +- Test API directly: `curl http://localhost:8888/v1/translation` + +## 🔗 Resources + +- [OPEA Project](https://github.com/opea-project) +- [GenAIComps](https://github.com/opea-project/GenAIComps) +- [GenAIExamples](https://github.com/opea-project/GenAIExamples) +- [vLLM](https://github.com/vllm-project/vllm) + +## 📧 Support + +For issues and questions: + +- Open an issue on GitHub +- Check existing issues for solutions +- Review OPEA documentation + +--- + +**Built with OPEA - Open Platform for Enterprise AI** 🚀 diff --git a/PolyLingua/deploy/build.sh b/PolyLingua/deploy/build.sh new file mode 100755 index 0000000000..143786a9ac --- /dev/null +++ b/PolyLingua/deploy/build.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright (C) 2024 +# SPDX-License-Identifier: Apache-2.0 + +set -e + +echo "======================================" +echo "Building OPEA PolyLingua Service Images" +echo "======================================" + +# Source environment variables +if [ -f .env ]; then + echo "Loading environment from .env file..." + export $(cat .env | grep -v '^#' | xargs) +else + echo "Warning: .env file not found. Using default values." + echo "Run './set_env.sh' to configure environment variables." +fi + +# Build polylingua backend +echo "" +echo "Building polylingua backend service..." +docker build --no-cache -t ${REGISTRY:-opea}/polylingua:${TAG:-latest} -f Dockerfile . + +# Build polylingua UI +echo "" +echo "Building polylingua UI service..." +docker build --no-cache \ + --build-arg BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} \ + -t ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest} \ + -f ui/Dockerfile ./ui + +echo "" +echo "======================================" +echo "Build completed successfully!" +echo "======================================" +echo "" +echo "Images built:" +echo " - ${REGISTRY:-opea}/polylingua:${TAG:-latest}" +echo " - ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest}" +echo "" +echo "To start the services, run:" +echo " ./deploy/start.sh" +echo "" diff --git a/PolyLingua/deploy/nginx.conf b/PolyLingua/deploy/nginx.conf new file mode 100644 index 0000000000..cfc152eab4 --- /dev/null +++ b/PolyLingua/deploy/nginx.conf @@ -0,0 +1,70 @@ +# Copyright (C) 2024 +# SPDX-License-Identifier: Apache-2.0 + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + error_log /var/log/nginx/error.log warn; + + sendfile on; + keepalive_timeout 65; + + # Frontend server + upstream frontend { + server polylingua-ui-server:5173; + } + + # Backend server + upstream backend { + server polylingua-backend-server:8888; + } + + server { + listen 80; + server_name localhost; + + # Frontend routes + location / { + proxy_pass http://frontend; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Backend API routes + location /v1/ { + proxy_pass http://backend; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_connect_timeout 600s; + proxy_send_timeout 600s; + proxy_read_timeout 600s; + send_timeout 600s; + } + + # Health check endpoint + location /health { + access_log off; + return 200 "healthy\n"; + add_header Content-Type text/plain; + } + } +} diff --git a/PolyLingua/deploy/start.sh b/PolyLingua/deploy/start.sh new file mode 100755 index 0000000000..545b86d3a1 --- /dev/null +++ b/PolyLingua/deploy/start.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# Copyright (C) 2024 +# SPDX-License-Identifier: Apache-2.0 + +set -e + +echo "======================================" +echo "Starting OPEA PolyLingua Service" +echo "======================================" + +# Source environment variables +if [ -f .env ]; then + echo "Loading environment from .env file..." + export $(cat .env | grep -v '^#' | xargs) +else + echo "ERROR: .env file not found!" + echo "Please run './set_env.sh' first to configure environment variables." + exit 1 +fi + +# Check for HuggingFace token +if [ -z "$HF_TOKEN" ]; then + echo "WARNING: HF_TOKEN is not set!" + echo "You may need a HuggingFace token to download models." + read -p "Continue anyway? (y/N): " confirm + if [[ ! $confirm =~ ^[Yy]$ ]]; then + exit 1 + fi +fi + +# Create model cache directory if it doesn't exist +mkdir -p ${MODEL_CACHE:-./data} + +echo "" +echo "Starting services with docker compose..." +docker compose up -d + +echo "" +echo "Waiting for services to start..." +sleep 5 + +echo "" +echo "======================================" +echo "Service Status" +echo "======================================" +docker compose ps + +echo "" +echo "======================================" +echo "Services started successfully!" +echo "======================================" +echo "" +echo "Access points:" +echo " - Frontend UI: http://${host_ip:-localhost}:${NGINX_PORT:-80}" +echo " - Backend API: http://${host_ip:-localhost}:8888" +# echo " - LLM Service: http://${host_ip:-localhost}:9000" +echo "" +# echo "To view logs:" +# echo " docker compose logs -f" +# echo "" +echo "To stop services:" +echo " ./deploy/stop.sh" +echo "" diff --git a/PolyLingua/deploy/stop.sh b/PolyLingua/deploy/stop.sh new file mode 100755 index 0000000000..d661a75d57 --- /dev/null +++ b/PolyLingua/deploy/stop.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (C) 2024 +# SPDX-License-Identifier: Apache-2.0 + +set -e + +echo "======================================" +echo "Stopping OPEA PolyLingua Service" +echo "======================================" + +echo "" +echo "Stopping services..." +docker compose down + +echo "" +echo "======================================" +echo "Services stopped successfully!" +echo "======================================" +echo "" +echo "To start services again:" +echo " ./deploy/start.sh" +echo "" +echo "To remove all data (including model cache):" +echo " docker compose down -v" +echo "" diff --git a/PolyLingua/deploy/test.sh b/PolyLingua/deploy/test.sh new file mode 100755 index 0000000000..7cb36aeeb0 --- /dev/null +++ b/PolyLingua/deploy/test.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright (C) 2024 +# SPDX-License-Identifier: Apache-2.0 + +set -e + +echo "======================================" +echo "Testing OPEA PolyLingua Service" +echo "======================================" + +# Source environment variables +if [ -f .env ]; then + export $(cat .env | grep -v '^#' | xargs) +fi + +HOST=${host_ip:-localhost} +PORT=${BACKEND_SERVICE_PORT:-8888} + +echo "" +echo "Testing translation endpoint..." +echo "Target: http://${HOST}:${PORT}/v1/translation" +echo "" + +response=$(curl -s -w "\n%{http_code}" -X POST "http://${HOST}:${PORT}/v1/translation" \ + -H "Content-Type: application/json" \ + -d '{ + "language_from": "English", + "language_to": "Spanish", + "source_language": "Hello, how are you today?" + }') + +http_code=$(echo "$response" | tail -n1) +body=$(echo "$response" | head -n-1) + +echo "HTTP Status: $http_code" +echo "" + +if [ "$http_code" -eq 200 ]; then + echo "✓ PolyLingua service is working!" + echo "" + echo "Response:" + echo "$body" | jq '.' 2>/dev/null || echo "$body" +else + echo "✗ PolyLingua service returned an error!" + echo "" + echo "Response:" + echo "$body" + exit 1 +fi + +echo "" +echo "======================================" +echo "Test completed successfully!" +echo "======================================" diff --git a/PolyLingua/docker-compose.yaml b/PolyLingua/docker-compose.yaml new file mode 100644 index 0000000000..e1d41f3d51 --- /dev/null +++ b/PolyLingua/docker-compose.yaml @@ -0,0 +1,101 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + vllm-service: + image: budstudio/vllm-cpu:0.11.0 + container_name: vllm-service + ports: + - "8028:8000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g + command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 8000 --max-model-len 8192 + + llm: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + container_name: llm-textgen-server + depends_on: + vllm-service: + condition: service_healthy + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${VLLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + restart: unless-stopped + + polylingua-backend-server: + image: ${REGISTRY:-opea}/polylingua:${TAG:-latest} + container_name: polylingua-backend-server + depends_on: + - vllm-service + - llm + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - MEGA_SERVICE_PORT=8888 + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - LLM_SERVICE_PORT=${LLM_SERVICE_PORT} + ipc: host + restart: always + + polylingua-ui-server: + image: ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest} + container_name: polylingua-ui-server + depends_on: + - polylingua-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + + polylingua-nginx-server: + image: nginx:alpine + container_name: polylingua-nginx-server + depends_on: + - polylingua-backend-server + - polylingua-ui-server + ports: + - "${NGINX_PORT:-80}:80" + volumes: + - ./deploy/nginx.conf:/etc/nginx/nginx.conf:ro + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/PolyLingua/docker_image_build/build.yaml b/PolyLingua/docker_image_build/build.yaml new file mode 100644 index 0000000000..7ae835699f --- /dev/null +++ b/PolyLingua/docker_image_build/build.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + polylingua: + build: + args: + IMAGE_REPO: ${REGISTRY} + BASE_TAG: ${TAG} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile + image: ${REGISTRY:-opea}/polylingua:${TAG:-latest} + + polylingua-ui: + build: + context: ../ui + dockerfile: ./Dockerfile + extends: polylingua + image: ${REGISTRY:-opea}/polylingua-ui:${TAG:-latest} + + llm-textgen: + build: + context: GenAIComps + dockerfile: comps/llms/src/text-generation/Dockerfile + extends: polylingua + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} diff --git a/PolyLingua/polylingua.py b/PolyLingua/polylingua.py new file mode 100644 index 0000000000..e57a326f98 --- /dev/null +++ b/PolyLingua/polylingua.py @@ -0,0 +1,409 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import json +import os +import tempfile +from pathlib import Path + +from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + ChatCompletionResponse, + ChatCompletionResponseChoice, + ChatMessage, + UsageInfo, +) +from docling.datamodel.base_models import InputFormat +from docling.document_converter import DocumentConverter +from fastapi import File, Form, HTTPException, Request, UploadFile +from fastapi.responses import StreamingResponse +from langdetect import LangDetectException, detect + +MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) +LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") +LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000)) +LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "swiss-ai/Apertus-8B-Instruct-2509") + +# Language code to name mapping +LANGUAGE_MAP = { + "en": "English", + "es": "Spanish", + "fr": "French", + "de": "German", + "it": "Italian", + "pt": "Portuguese", + "ru": "Russian", + "ja": "Japanese", + "ko": "Korean", + "zh-cn": "Chinese (Simplified)", + "zh-tw": "Chinese (Traditional)", + "ar": "Arabic", + "hi": "Hindi", + "nl": "Dutch", + "pl": "Polish", + "tr": "Turkish", + "sv": "Swedish", +} + +# Text formats that can be read directly (no conversion needed) +TEXT_FORMATS = {".txt", ".md", ".markdown", ".rst", ".log", ".csv"} + +# Document formats that require docling conversion +DOCUMENT_FORMATS = {".docx", ".html"} + +# All supported extensions +SUPPORTED_EXTENSIONS = TEXT_FORMATS | DOCUMENT_FORMATS + +# Maximum file size (20MB) +MAX_FILE_SIZE = 20 * 1024 * 1024 + + +class DocumentProcessor: + """Handles document processing using docling for various file formats.""" + + def __init__(self): + # Initialize document converter for office documents + self.converter = DocumentConverter() + + async def process_file(self, file: UploadFile) -> list[str]: + """Process an uploaded file and extract text content in chunks. + + Args: + file: The uploaded file + + Returns: + List of text chunks (each chunk as markdown string) + + Raises: + ValueError: If file type is not supported or file is too large + """ + # Check file size + contents = await file.read() + if len(contents) > MAX_FILE_SIZE: + raise ValueError(f"File size exceeds maximum limit of {MAX_FILE_SIZE / 1024 / 1024}MB") + + # Check file extension + file_ext = Path(file.filename).suffix.lower() + if file_ext not in SUPPORTED_EXTENSIONS: + raise ValueError( + f"Unsupported file type: {file_ext}. " f"Supported types: {', '.join(sorted(SUPPORTED_EXTENSIONS))}" + ) + + page_texts = [] + CHUNK_SIZE = 8000 # ~2000 words per chunk + + # Handle plain text files (fast path - no conversion needed) + if file_ext in TEXT_FORMATS: + print(f"Reading text file {file.filename}...") + try: + # Try UTF-8 first + text_content = contents.decode("utf-8") + except UnicodeDecodeError: + # Fallback to latin-1 for other encodings + print("UTF-8 decode failed, trying latin-1...") + text_content = contents.decode("latin-1") + + print(f"Read {len(text_content)} characters from text file") + + # Split into chunks if needed + if len(text_content) > CHUNK_SIZE: + print(f"Splitting into chunks of {CHUNK_SIZE} chars") + for i in range(0, len(text_content), CHUNK_SIZE): + chunk = text_content[i : i + CHUNK_SIZE] + page_texts.append(chunk) + print(f"Chunk {len(page_texts)}: {len(chunk)} chars") + else: + page_texts.append(text_content) + print(f"Single chunk: {len(text_content)} chars") + + print(f"Total chunks: {len(page_texts)}") + return page_texts + + # Handle document files (DOCX, HTML - requires docling conversion) + if file_ext in DOCUMENT_FORMATS: + # Save file temporarily for docling + with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp_file: + tmp_file.write(contents) + tmp_path = tmp_file.name + + try: + # Convert document using docling + print(f"Converting document {file.filename}...") + result = self.converter.convert(tmp_path) + print("Conversion completed") + + # Export entire document to markdown + full_markdown = result.document.export_to_markdown() + print(f"Extracted {len(full_markdown)} characters from document") + + # Split into chunks for translation + if len(full_markdown) > CHUNK_SIZE: + print(f"Splitting into chunks of {CHUNK_SIZE} chars") + # Split into manageable chunks + for i in range(0, len(full_markdown), CHUNK_SIZE): + chunk = full_markdown[i : i + CHUNK_SIZE] + page_texts.append(chunk) + print(f"Chunk {len(page_texts)}: {len(chunk)} chars") + else: + # Small enough to translate as single chunk + page_texts.append(full_markdown) + print(f"Single chunk: {len(full_markdown)} chars") + + print(f"Total chunks: {len(page_texts)}") + return page_texts + + finally: + # Clean up temporary file + Path(tmp_path).unlink(missing_ok=True) + + # Should never reach here due to extension check above + raise ValueError(f"Unsupported file type: {file_ext}") + + +class PolyLinguaService: + def __init__(self, host="0.0.0.0", port=8000): + self.host = host + self.port = port + self.megaservice = ServiceOrchestrator() + self.endpoint = str(MegaServiceEndpoint.TRANSLATION) + self.doc_processor = DocumentProcessor() + + def add_remote_service(self): + llm = MicroService( + name="llm", + host=LLM_SERVICE_HOST_IP, + port=LLM_SERVICE_PORT, + endpoint="/v1/chat/completions", + use_remote_service=True, + service_type=ServiceType.LLM, + ) + self.megaservice.add(llm) + + async def translate_page(self, page_text: str, language_from: str, language_to: str) -> str: + """Translate a single page of text by consuming streaming response.""" + prompt_template = """ + You are a translation assistant who is specialized in translating {language_from} to {language_to}. + + 1. Answer should only contain the translation of the source language to the target language. + 2. Do not include any other text or information. + 3. Do not include any other language than the target language. + 4. Do not include any other information than the translation. + + Translate this from {language_from} to {language_to}: + + {source_language} + + """ + prompt = prompt_template.format(language_from=language_from, language_to=language_to, source_language=page_text) + + # Create chat completion request with streaming + chat_request_dict = { + "model": LLM_MODEL_ID, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 4096, + "stream": True, + } + + result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=chat_request_dict) + + # Find the LLM service response + for node, response in result_dict.items(): + if ( + isinstance(response, StreamingResponse) + and node == list(self.megaservice.services.keys())[-1] + and self.megaservice.services[node].service_type == ServiceType.LLM + ): + # Consume the streaming response + accumulated_text = "" + + # Get the response body iterator + async for chunk in response.body_iterator: + chunk_str = chunk.decode("utf-8") if isinstance(chunk, bytes) else chunk + + # Parse SSE format + lines = chunk_str.split("\n") + for line in lines: + if line.startswith("data: "): + data = line[6:] # Remove "data: " prefix + + if data == "[DONE]": + continue + + try: + parsed = json.loads(data) + # Extract content from chat completion format + text = parsed.get("choices", [{}])[0].get("delta", {}).get("content", "") + if text: + accumulated_text += text + except: + continue + + return accumulated_text + + # Fallback if no streaming response found + raise Exception("No LLM streaming response found") + + async def handle_request(self, request: Request): + """Handle both JSON text input and multipart file uploads.""" + content_type = request.headers.get("content-type", "") + is_file_upload = False + + # Check if this is a file upload request + if "multipart/form-data" in content_type: + # Handle file upload + is_file_upload = True + form_data = await request.form() + language_from = form_data.get("language_from", "auto") + language_to = form_data.get("language_to") + file = form_data.get("file") + + if not file or not hasattr(file, "filename"): + raise HTTPException(status_code=400, detail="No file uploaded") + + if not language_to: + raise HTTPException(status_code=400, detail="Target language (language_to) is required") + + try: + # Process the uploaded file to extract text page by page + page_texts = await self.doc_processor.process_file(file) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}") + + else: + # Handle JSON text input (existing behavior) + data = await request.json() + language_from = data.get("language_from", "auto") + language_to = data.get("language_to") + source_language = data.get("source_language") + + if not language_to: + raise HTTPException(status_code=400, detail="Target language (language_to) is required") + + if not source_language: + raise HTTPException(status_code=400, detail="Source text (source_language) is required") + + # Handle file upload (page-by-page translation) + if is_file_upload: + # Auto-detect source language from first page + if language_from.lower() == "auto" and page_texts: + try: + detected_code = detect(page_texts[0]) + language_from = LANGUAGE_MAP.get(detected_code, "English") + except LangDetectException: + language_from = "English" + + # Translate each page separately + translated_pages = [] + for page_num, page_text in enumerate(page_texts, start=1): + print(f"Translating page {page_num}/{len(page_texts)}...") + try: + translated_page = await self.translate_page(page_text, language_from, language_to) + translated_pages.append(translated_page) + except Exception as e: + print(f"Error translating page {page_num}: {str(e)}") + translated_pages.append(f"[Error translating page {page_num}]") + + # Combine all translated pages + combined_translation = "\n\n--- Page Break ---\n\n".join(translated_pages) + + # Return combined result + choices = [] + usage = UsageInfo() + choices.append( + ChatCompletionResponseChoice( + index=0, + message=ChatMessage(role="assistant", content=combined_translation), + finish_reason="stop", + ) + ) + return ChatCompletionResponse(model="polylingua", choices=choices, usage=usage) + + # Handle text input (existing streaming behavior) + else: + # Auto-detect source language if set to "auto" + if language_from.lower() == "auto": + try: + detected_code = detect(source_language) + language_from = LANGUAGE_MAP.get(detected_code, "English") + except LangDetectException: + language_from = "English" + + prompt_template = """ + You are a translation assistant who is specialized in translating {language_from} to {language_to}. + + 1. Answer should only contain the translation of the source language to the target language. + 2. Do not include any other text or information. + 3. Do not include any other language than the target language. + 4. Do not include any other information than the translation. + + Translate this from {language_from} to {language_to}: + + {source_language} + + """ + prompt = prompt_template.format( + language_from=language_from, language_to=language_to, source_language=source_language + ) + + # Create chat completion request as dict for the LLM service + chat_request_dict = { + "model": LLM_MODEL_ID, + "messages": [{"role": "user", "content": prompt}], + "stream": True, + } + + result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=chat_request_dict) + for node, response in result_dict.items(): + # Here it suppose the last microservice in the megaservice is LLM. + if ( + isinstance(response, StreamingResponse) + and node == list(self.megaservice.services.keys())[-1] + and self.megaservice.services[node].service_type == ServiceType.LLM + ): + return response + last_node = runtime_graph.all_leaves()[-1] + response = result_dict[last_node]["text"] + choices = [] + usage = UsageInfo() + choices.append( + ChatCompletionResponseChoice( + index=0, + message=ChatMessage(role="assistant", content=response), + finish_reason="stop", + ) + ) + return ChatCompletionResponse(model="polylingua", choices=choices, usage=usage) + + def start(self): + self.service = MicroService( + self.__class__.__name__, + service_role=ServiceRoleType.MEGASERVICE, + host=self.host, + port=self.port, + endpoint=self.endpoint, + input_datatype=ChatCompletionRequest, + output_datatype=ChatCompletionResponse, + ) + self.service.add_route(self.endpoint, self.handle_request, methods=["POST"]) + self.service.start() + + +if __name__ == "__main__": + polylingua = PolyLinguaService(port=MEGA_SERVICE_PORT) + polylingua.add_remote_service() + polylingua.start() diff --git a/PolyLingua/requirements.txt b/PolyLingua/requirements.txt new file mode 100644 index 0000000000..aa1c7d7479 --- /dev/null +++ b/PolyLingua/requirements.txt @@ -0,0 +1,17 @@ + +# Async Support +aiohttp>=3.9.0 +asyncio>=3.4.3 + +# Document Processing +docling>=2.0.0 + +# Core Dependencies +fastapi>=0.109.0 + +# Language Detection +langdetect>=1.0.9 +# OPEA GenAIComps Framework +opea-comps>=1.3.0 +python-multipart>=0.0.9 +uvicorn[standard]>=0.27.0 diff --git a/PolyLingua/set_env.sh b/PolyLingua/set_env.sh new file mode 100755 index 0000000000..3a56740ed5 --- /dev/null +++ b/PolyLingua/set_env.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Environment Setup Script for OPEA PolyLingua Service + +echo "======================================" +echo "OPEA PolyLingua Service Setup" +echo "======================================" +echo "" + +# Function to prompt for input with default value +prompt_with_default() { + local prompt="$1" + local default="$2" + local var_name="$3" + + read -p "$prompt [$default]: " input + input="${input:-$default}" + export $var_name="$input" + echo "export $var_name=\"$input\"" >> .env +} + +# Remove existing .env file +rm -f .env + +# Get host IP +host_ip=$(hostname -I | awk '{print $1}') +if [ -z "$host_ip" ]; then + host_ip="localhost" +fi + +echo "Detected host IP: $host_ip" +echo "" + +# HuggingFace Configuration +echo "--- HuggingFace Configuration ---" +prompt_with_default "Enter your HuggingFace API Token (get from https://huggingface.co/settings/tokens)" "" "HF_TOKEN" + +# Model Configuration +echo "" +echo "--- Model Configuration ---" +prompt_with_default "Enter LLM Model ID" "haoranxu/ALMA-13B" "LLM_MODEL_ID" +prompt_with_default "Enter Model Cache Directory" "./data" "MODEL_CACHE" + +# Host Configuration +echo "" +echo "--- Host Configuration ---" +prompt_with_default "Enter Host IP" "$host_ip" "host_ip" + +# Service Endpoints +echo "" +echo "--- Service Endpoints ---" +export VLLM_ENDPOINT="http://${host_ip}:8028" +echo "export VLLM_ENDPOINT=\"http://${host_ip}:8028\"" >> .env + +export LLM_SERVICE_HOST_IP="${host_ip}" +echo "export LLM_SERVICE_HOST_IP=\"${host_ip}\"" >> .env + +export LLM_SERVICE_PORT="9000" +echo "export LLM_SERVICE_PORT=\"9000\"" >> .env + +export MEGA_SERVICE_HOST_IP="${host_ip}" +echo "export MEGA_SERVICE_HOST_IP=\"${host_ip}\"" >> .env + +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888" +echo "export BACKEND_SERVICE_ENDPOINT=\"http://${host_ip}:8888\"" >> .env + +export FRONTEND_SERVICE_IP="${host_ip}" +echo "export FRONTEND_SERVICE_IP=\"${host_ip}\"" >> .env + +export FRONTEND_SERVICE_PORT="5173" +echo "export FRONTEND_SERVICE_PORT=\"5173\"" >> .env + +export BACKEND_SERVICE_NAME="polylingua" +echo "export BACKEND_SERVICE_NAME=\"polylingua\"" >> .env + +export BACKEND_SERVICE_IP="${host_ip}" +echo "export BACKEND_SERVICE_IP=\"${host_ip}\"" >> .env + +export BACKEND_SERVICE_PORT="8888" +echo "export BACKEND_SERVICE_PORT=\"8888\"" >> .env + +# Docker Configuration +echo "" +echo "--- Docker Configuration ---" +prompt_with_default "Enter Docker Registry" "opea" "REGISTRY" +prompt_with_default "Enter Docker Tag" "latest" "TAG" + +# Nginx Configuration +prompt_with_default "Enter Nginx Port" "80" "NGINX_PORT" + +# Proxy Settings (optional) +echo "" +echo "--- Proxy Settings (optional, press Enter to skip) ---" +prompt_with_default "Enter HTTP Proxy" "" "http_proxy" +prompt_with_default "Enter HTTPS Proxy" "" "https_proxy" +prompt_with_default "Enter No Proxy" "" "no_proxy" + +echo "" +echo "======================================" +echo "Configuration saved to .env" +echo "======================================" +echo "" +echo "To load these environment variables, run:" +echo " source .env" +echo "" +echo "To start the services, run:" +echo " docker compose up -d" +echo "" diff --git a/PolyLingua/tests/test_compose_on_xeon.sh b/PolyLingua/tests/test_compose_on_xeon.sh new file mode 100755 index 0000000000..c26bbc7ad4 --- /dev/null +++ b/PolyLingua/tests/test_compose_on_xeon.sh @@ -0,0 +1,360 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +# Get the directory where this script is located +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# PolyLingua root is one level up from tests directory +WORKPATH=$(dirname "$SCRIPT_DIR") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +echo "Script directory: $SCRIPT_DIR" +echo "Working directory: $WORKPATH" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + cd $WORKPATH/docker_image_build + + # Clone GenAIComps + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + # Build all images using build.yaml + echo "Building PolyLingua images with --no-cache, check docker_image_build.log for details..." + service_list="polylingua polylingua-ui llm-textgen" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log 2>&1 + + echo "Image build completed" + docker images | grep -E "polylingua|llm-textgen" + sleep 1s +} + +function start_services() { + cd $WORKPATH + export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" + + # Load environment variables + if [ ! -f .env ]; then + echo "Creating .env file..." + export HF_TOKEN=${HF_TOKEN} + export LLM_MODEL_ID="swiss-ai/Apertus-8B-Instruct-2509" + export VLLM_ENDPOINT="http://${host_ip}:8028" + export LLM_SERVICE_HOST_IP=${host_ip} + export LLM_SERVICE_PORT=9000 + export MEGA_SERVICE_HOST_IP=${host_ip} + export MEGA_SERVICE_PORT=8888 + export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888" + export BACKEND_SERVICE_NAME="polylingua" + export BACKEND_SERVICE_IP=${host_ip} + export BACKEND_SERVICE_PORT=8888 + export FRONTEND_SERVICE_IP=${host_ip} + export FRONTEND_SERVICE_PORT=5173 + export NGINX_PORT=80 + + cat > .env < ${LOG_PATH}/start_services_with_compose.log 2>&1 + + # Wait for vLLM service to be ready + echo "Waiting for vLLM service to initialize (this may take several minutes)..." + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1 + if grep -E "Uvicorn running|Application startup complete" ${LOG_PATH}/vllm_service_start.log; then + echo "vLLM service is ready!" + break + fi + if grep -q "error" ${LOG_PATH}/vllm_service_start.log; then + echo "Error detected in vLLM service startup" + cat ${LOG_PATH}/vllm_service_start.log + exit 1 + fi + sleep 10s + n=$((n+1)) + done + + if [[ "$n" -ge 100 ]]; then + echo "Timeout waiting for vLLM service" + docker logs vllm-service + exit 1 + fi + + echo "Waiting additional 10s for all services to stabilize..." + sleep 10s +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + local CONTENT_TYPE="${6:-application/json}" + + echo "Testing $SERVICE_NAME at $URL" + + if [[ "$CONTENT_TYPE" == "multipart/form-data" ]]; then + # Handle file upload + local HTTP_STATUS=$(eval curl -s -o /dev/null -w "%{http_code}" -X POST $INPUT_DATA "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + local CONTENT=$(eval curl -s -X POST $INPUT_DATA "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] ✓ Content is as expected." + else + echo "[ $SERVICE_NAME ] ✗ Content does not match expected result" + echo "Expected: $EXPECTED_RESULT" + echo "Got: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] ✗ HTTP status is $HTTP_STATUS (expected 200)" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + # Handle JSON request + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H "Content-Type: $CONTENT_TYPE" "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H "Content-Type: $CONTENT_TYPE" "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] ✓ Content is as expected." + else + echo "[ $SERVICE_NAME ] ✗ Content does not match expected result" + echo "Expected: $EXPECTED_RESULT" + echo "Got: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] ✗ HTTP status is $HTTP_STATUS (expected 200)" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + fi + + sleep 2s +} + +function validate_microservices() { + echo "======================================" + echo "Validating Microservices" + echo "======================================" + + # Test vLLM service health + echo "Testing vLLM service health..." + curl -s http://${ip_address}:8028/health || { + echo "vLLM health check failed" + exit 1 + } + echo "✓ vLLM service health check passed" + + # Test vLLM service chat completions + validate_services \ + "http://${ip_address}:8028/v1/chat/completions" \ + "content" \ + "vllm" \ + "vllm-service" \ + '{"model": "swiss-ai/Apertus-8B-Instruct-2509", "messages": [{"role": "user", "content": "Translate Hello to Spanish"}], "max_tokens": 32}' + + # Test LLM microservice + validate_services \ + "http://${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "llm-textgen-server" \ + '{"query":"Translate Hello to Spanish", "max_tokens": 32}' +} + +function validate_megaservice() { + echo "======================================" + echo "Validating Megaservice" + echo "======================================" + + # Test 1: Basic text translation (English to Spanish) + echo "Test 1: Basic English to Spanish translation..." + validate_services \ + "http://${ip_address}:8888/v1/translation" \ + "choices" \ + "mega-polylingua-basic" \ + "polylingua-backend-server" \ + '{"language_from": "English", "language_to": "Spanish", "source_language": "Hello, how are you today?"}' + + # Test 2: Language auto-detection + echo "Test 2: Auto-detection test..." + validate_services \ + "http://${ip_address}:8888/v1/translation" \ + "choices" \ + "mega-polylingua-auto" \ + "polylingua-backend-server" \ + '{"language_from": "auto", "language_to": "French", "source_language": "Hello world"}' + + # Test 3: Different language pair (English to German) + echo "Test 3: English to German translation..." + validate_services \ + "http://${ip_address}:8888/v1/translation" \ + "choices" \ + "mega-polylingua-german" \ + "polylingua-backend-server" \ + '{"language_from": "English", "language_to": "German", "source_language": "Good morning"}' +} + +function validate_file_translation() { + echo "======================================" + echo "Validating File Upload Translation" + echo "======================================" + + # Create test file + cd $WORKPATH/tests + mkdir -p test_data + echo "Hello, this is a test document for translation. It contains multiple sentences. We want to test if file upload works correctly." > test_data/sample.txt + + # Test file upload translation + echo "Testing file upload translation..." + validate_services \ + "http://${ip_address}:8888/v1/translation" \ + "choices" \ + "file-translation" \ + "polylingua-backend-server" \ + '-F "file=@test_data/sample.txt" -F "language_from=English" -F "language_to=Spanish"' \ + "multipart/form-data" +} + +function validate_nginx() { + echo "======================================" + echo "Validating Nginx Proxy" + echo "======================================" + + # Test translation via nginx + validate_services \ + "http://${ip_address}:80/v1/translation" \ + "choices" \ + "nginx-proxy" \ + "polylingua-nginx-server" \ + '{"language_from": "English", "language_to": "Italian", "source_language": "Thank you very much"}' +} + +function validate_ui() { + echo "======================================" + echo "Validating UI Service" + echo "======================================" + + # Check if UI is accessible + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://${ip_address}:5173) + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ UI ] ✓ UI service is accessible" + else + echo "[ UI ] ✗ UI service returned HTTP status $HTTP_STATUS" + docker logs polylingua-ui-server + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH + echo "Stopping services..." + docker compose down + echo "Services stopped" +} + +function main() { + echo "======================================" + echo "PolyLingua E2E Test Suite" + echo "======================================" + echo "Platform: Intel Xeon (CPU)" + echo "LLM Backend: vLLM" + echo "IP Address: ${ip_address}" + echo "======================================" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then + build_docker_images + else + echo "Skipping image build (using IMAGE_REPO=${IMAGE_REPO})" + fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_microservices" + validate_microservices + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::validate_file_translation" + validate_file_translation + echo "::endgroup::" + + echo "::group::validate_nginx" + validate_nginx + echo "::endgroup::" + + echo "::group::validate_ui" + validate_ui + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + docker system prune -f + + echo "======================================" + echo "✓ All tests passed successfully!" + echo "======================================" +} + +main diff --git a/PolyLingua/ui/.gitignore b/PolyLingua/ui/.gitignore new file mode 100644 index 0000000000..45c1abce86 --- /dev/null +++ b/PolyLingua/ui/.gitignore @@ -0,0 +1,36 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.js + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# local env files +.env*.local +.env + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/PolyLingua/ui/Dockerfile b/PolyLingua/ui/Dockerfile new file mode 100644 index 0000000000..af9a98c0be --- /dev/null +++ b/PolyLingua/ui/Dockerfile @@ -0,0 +1,59 @@ +# Copyright (C) 2024 +# SPDX-License-Identifier: Apache-2.0 + +FROM node:18-alpine AS base + +# Install dependencies +FROM base AS deps +RUN apk add --no-cache libc6-compat +WORKDIR /app + +COPY package.json package-lock.json* ./ +RUN npm install + +# Build the application +FROM base AS builder +WORKDIR /app + +# Accept build argument +ARG BACKEND_SERVICE_ENDPOINT +ENV BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT + +COPY --from=deps /app/node_modules ./node_modules +COPY . . + +# Ensure public directory exists +RUN mkdir -p public + +# Set environment for build +ENV NEXT_TELEMETRY_DISABLED 1 + +RUN npm run build + +# Production image +FROM base AS runner +WORKDIR /app + +ENV NODE_ENV production +ENV NEXT_TELEMETRY_DISABLED 1 + +RUN addgroup --system --gid 1001 nodejs +RUN adduser --system --uid 1001 nextjs + +# Copy build output +COPY --from=builder /app/next.config.js ./ +COPY --from=builder /app/package.json ./ +COPY --from=builder /app/.next ./.next +COPY --from=builder /app/node_modules ./node_modules +COPY --from=builder /app/public ./public + +RUN chown -R nextjs:nodejs /app + +USER nextjs + +EXPOSE 5173 + +ENV PORT 5173 +ENV HOSTNAME "0.0.0.0" + +CMD ["npm", "start"] diff --git a/PolyLingua/ui/README.md b/PolyLingua/ui/README.md new file mode 100644 index 0000000000..4d19874dd1 --- /dev/null +++ b/PolyLingua/ui/README.md @@ -0,0 +1,155 @@ +# PolyLingua UI + +A modern, single-page translation interface built with Next.js 14, React, and shadcn/ui components. + +## Features + +- 🌐 Clean and intuitive translation interface +- 🎨 Beautiful UI using shadcn/ui components and Tailwind CSS +- 📱 Fully responsive design +- 🌍 Support for 15 languages (Spanish, French, German, Italian, Portuguese, Russian, Japanese, Korean, Chinese, Arabic, Hindi, Dutch, Polish, Turkish, Swedish) +- ⚡ Real-time character count +- 🔄 Loading states and smooth animations + +## Tech Stack + +- **Framework**: Next.js 14 (App Router) +- **UI Components**: shadcn/ui (Radix UI + Tailwind CSS) +- **Styling**: Tailwind CSS +- **Icons**: Lucide React +- **Language**: TypeScript + +## Getting Started + +### Prerequisites + +- Node.js 18.x or higher +- npm, yarn, or pnpm + +### Installation + +1. Navigate to the ui directory: + +```bash +cd ui +``` + +2. Install dependencies: + +```bash +npm install +# or +yarn install +# or +pnpm install +``` + +3. Run the development server: + +```bash +npm run dev +# or +yarn dev +# or +pnpm dev +``` + +4. Open [http://localhost:3000](http://localhost:3000) in your browser to see the application. + +## Project Structure + +``` +ui/ +├── app/ +│ ├── globals.css # Global styles and Tailwind configuration +│ ├── layout.tsx # Root layout component +│ └── page.tsx # Main page (home) +├── components/ +│ ├── ui/ # shadcn/ui components +│ │ ├── button.tsx +│ │ ├── card.tsx +│ │ ├── label.tsx +│ │ ├── select.tsx +│ │ └── textarea.tsx +│ └── polylingua-form.tsx # Main translation form component +├── lib/ +│ └── utils.ts # Utility functions +├── package.json +├── tailwind.config.ts +├── tsconfig.json +└── next.config.js +``` + +## Usage + +1. **Enter Text**: Type or paste the text you want to translate in the source text area +2. **Select Language**: Choose your target language from the dropdown menu +3. **Translate**: Click the "Translate" button to see the translation + +## Backend Integration + +Currently, the app uses a mock translation function. To connect to a real translation backend: + +1. Update the `handleTranslate` function in `components/polylingua-form.tsx`: + +```typescript +const handleTranslate = async () => { + if (!sourceText.trim()) return; + + setIsLoading(true); + + try { + const response = await fetch("/api/translate", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + text: sourceText, + targetLanguage: targetLanguage, + }), + }); + + const data = await response.json(); + setTranslatedText(data.translatedText); + } catch (error) { + console.error("Translation error:", error); + setTranslatedText("Error: Translation failed. Please try again."); + } finally { + setIsLoading(false); + } +}; +``` + +2. Create an API route at `app/api/translate/route.ts` to handle the backend connection. + +## Build for Production + +```bash +npm run build +npm start +``` + +## Customization + +### Adding More Languages + +Edit the `languages` array in `components/polylingua-form.tsx`: + +```typescript +const languages = [ + { code: "es", name: "Spanish" }, + { code: "fr", name: "French" }, + // Add more languages here +]; +``` + +### Styling + +- Global styles: `app/globals.css` +- Tailwind configuration: `tailwind.config.ts` +- Component-specific styles: Use Tailwind utility classes + +## License + +MIT diff --git a/PolyLingua/ui/app/globals.css b/PolyLingua/ui/app/globals.css new file mode 100644 index 0000000000..01b77aafd6 --- /dev/null +++ b/PolyLingua/ui/app/globals.css @@ -0,0 +1,59 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +@layer base { + :root { + --background: 0 0% 100%; + --foreground: 222.2 84% 4.9%; + --card: 0 0% 100%; + --card-foreground: 222.2 84% 4.9%; + --popover: 0 0% 100%; + --popover-foreground: 222.2 84% 4.9%; + --primary: 270 70% 55%; + --primary-foreground: 210 40% 98%; + --secondary: 210 40% 96.1%; + --secondary-foreground: 222.2 47.4% 11.2%; + --muted: 210 40% 96.1%; + --muted-foreground: 215.4 16.3% 46.9%; + --accent: 210 40% 96.1%; + --accent-foreground: 222.2 47.4% 11.2%; + --destructive: 0 84.2% 60.2%; + --destructive-foreground: 210 40% 98%; + --border: 214.3 31.8% 91.4%; + --input: 214.3 31.8% 91.4%; + --ring: 270 70% 55%; + --radius: 0.5rem; + } + + .dark { + --background: 222.2 84% 4.9%; + --foreground: 210 40% 98%; + --card: 222.2 84% 4.9%; + --card-foreground: 210 40% 98%; + --popover: 222.2 84% 4.9%; + --popover-foreground: 210 40% 98%; + --primary: 270 75% 60%; + --primary-foreground: 222.2 47.4% 11.2%; + --secondary: 217.2 32.6% 17.5%; + --secondary-foreground: 210 40% 98%; + --muted: 217.2 32.6% 17.5%; + --muted-foreground: 215 20.2% 65.1%; + --accent: 217.2 32.6% 17.5%; + --accent-foreground: 210 40% 98%; + --destructive: 0 62.8% 30.6%; + --destructive-foreground: 210 40% 98%; + --border: 217.2 32.6% 17.5%; + --input: 217.2 32.6% 17.5%; + --ring: 270 75% 60%; + } +} + +@layer base { + * { + @apply border-border; + } + body { + @apply bg-background text-foreground; + } +} diff --git a/PolyLingua/ui/app/layout.tsx b/PolyLingua/ui/app/layout.tsx new file mode 100644 index 0000000000..df014cacf3 --- /dev/null +++ b/PolyLingua/ui/app/layout.tsx @@ -0,0 +1,22 @@ +import type { Metadata } from "next" +import { Inter } from "next/font/google" +import "./globals.css" + +const inter = Inter({ subsets: ["latin"] }) + +export const metadata: Metadata = { + title: "PolyLingua", + description: "Translate text to multiple languages", +} + +export default function RootLayout({ + children, +}: Readonly<{ + children: React.ReactNode +}>) { + return ( + + {children} + + ) +} diff --git a/PolyLingua/ui/app/page.tsx b/PolyLingua/ui/app/page.tsx new file mode 100644 index 0000000000..96881bfa63 --- /dev/null +++ b/PolyLingua/ui/app/page.tsx @@ -0,0 +1,9 @@ +import { PolyLinguaForm } from "@/components/polylingua-form" + +export default function Home() { + return ( +
+ +
+ ) +} diff --git a/PolyLingua/ui/components.json b/PolyLingua/ui/components.json new file mode 100644 index 0000000000..fa674c93d1 --- /dev/null +++ b/PolyLingua/ui/components.json @@ -0,0 +1,17 @@ +{ + "$schema": "https://ui.shadcn.com/schema.json", + "style": "default", + "rsc": true, + "tsx": true, + "tailwind": { + "config": "tailwind.config.ts", + "css": "app/globals.css", + "baseColor": "slate", + "cssVariables": true, + "prefix": "" + }, + "aliases": { + "components": "@/components", + "utils": "@/lib/utils" + } +} diff --git a/PolyLingua/ui/components/polylingua-form.tsx b/PolyLingua/ui/components/polylingua-form.tsx new file mode 100644 index 0000000000..78aba636ca --- /dev/null +++ b/PolyLingua/ui/components/polylingua-form.tsx @@ -0,0 +1,438 @@ +"use client" + +import * as React from "react" +import { Button } from "@/components/ui/button" +import { Textarea } from "@/components/ui/textarea" +import { Label } from "@/components/ui/label" +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select" +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { Languages, Loader2, Upload, X, FileText } from "lucide-react" +import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs" + +const languages = [ + { code: "en", name: "English" }, + { code: "es", name: "Spanish" }, + { code: "fr", name: "French" }, + { code: "de", name: "German" }, + { code: "it", name: "Italian" }, + { code: "pt", name: "Portuguese" }, + { code: "ru", name: "Russian" }, + { code: "ja", name: "Japanese" }, + { code: "ko", name: "Korean" }, + { code: "zh", name: "Chinese (Simplified)" }, + { code: "ar", name: "Arabic" }, + { code: "hi", name: "Hindi" }, + { code: "nl", name: "Dutch" }, + { code: "pl", name: "Polish" }, + { code: "tr", name: "Turkish" }, + { code: "sv", name: "Swedish" }, +] + +// Supported file types +const SUPPORTED_FILE_TYPES = [ + ".docx", + ".txt", + ".md", + ".markdown", + ".rst", + ".log", + ".csv", +] + +const MAX_FILE_SIZE = 20 * 1024 * 1024 // 20MB + +export function PolyLinguaForm() { + const [sourceText, setSourceText] = React.useState("") + const [translatedText, setTranslatedText] = React.useState("") + const [targetLanguage, setTargetLanguage] = React.useState("es") + const [isLoading, setIsLoading] = React.useState(false) + const [inputMode, setInputMode] = React.useState<"text" | "file">("text") + const [selectedFile, setSelectedFile] = React.useState(null) + const [fileError, setFileError] = React.useState("") + const [dragActive, setDragActive] = React.useState(false) + const [extractedText, setExtractedText] = React.useState("") + const fileInputRef = React.useRef(null) + + // Validate file + const validateFile = (file: File): string | null => { + const fileExt = `.${file.name.split(".").pop()?.toLowerCase()}` + if (!SUPPORTED_FILE_TYPES.includes(fileExt)) { + return `Unsupported file type. Supported: ${SUPPORTED_FILE_TYPES.join(", ")}` + } + if (file.size > MAX_FILE_SIZE) { + return `File size exceeds 20MB limit` + } + return null + } + + // Handle file selection + const handleFileSelect = (file: File) => { + const error = validateFile(file) + if (error) { + setFileError(error) + setSelectedFile(null) + } else { + setFileError("") + setSelectedFile(file) + setTranslatedText("") + setExtractedText("") + } + } + + // Handle drag events + const handleDrag = (e: React.DragEvent) => { + e.preventDefault() + e.stopPropagation() + if (e.type === "dragenter" || e.type === "dragover") { + setDragActive(true) + } else if (e.type === "dragleave") { + setDragActive(false) + } + } + + // Handle drop + const handleDrop = (e: React.DragEvent) => { + e.preventDefault() + e.stopPropagation() + setDragActive(false) + + if (e.dataTransfer.files && e.dataTransfer.files[0]) { + handleFileSelect(e.dataTransfer.files[0]) + } + } + + // Handle file input change + const handleFileInputChange = (e: React.ChangeEvent) => { + if (e.target.files && e.target.files[0]) { + handleFileSelect(e.target.files[0]) + } + } + + // Clear file selection + const clearFile = () => { + setSelectedFile(null) + setFileError("") + setExtractedText("") + if (fileInputRef.current) { + fileInputRef.current.value = "" + } + } + + const handleTranslate = async () => { + // Validate input based on mode + if (inputMode === "text" && !sourceText.trim()) { + return + } + if (inputMode === "file" && !selectedFile) { + return + } + + setIsLoading(true) + setTranslatedText("") // Clear previous translation + + try { + const selectedLang = languages.find(lang => lang.code === targetLanguage) + const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || "http://localhost:8888" + + let response: Response + + if (inputMode === "file" && selectedFile) { + // Handle file upload + const formData = new FormData() + formData.append("file", selectedFile) + formData.append("language_from", "auto") + formData.append("language_to", selectedLang?.name || "Spanish") + + response = await fetch(`${backendUrl}/v1/translation`, { + method: "POST", + body: formData, + }) + } else { + // Handle text input + response = await fetch(`${backendUrl}/v1/translation`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + language_from: "auto", + language_to: selectedLang?.name || "Spanish", + source_language: sourceText, + }), + }) + } + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})) + throw new Error(errorData.detail || `Translation failed: ${response.statusText}`) + } + + // Check if response is streaming (SSE) + const contentType = response.headers.get("content-type") + if (contentType?.includes("text/event-stream")) { + // Handle Server-Sent Events streaming + const reader = response.body?.getReader() + const decoder = new TextDecoder() + let accumulatedText = "" + + if (!reader) { + throw new Error("Response body is not readable") + } + + while (true) { + const { done, value } = await reader.read() + if (done) break + + const chunk = decoder.decode(value, { stream: true }) + const lines = chunk.split("\n") + + for (const line of lines) { + if (line.startsWith("data: ")) { + const data = line.slice(6) // Remove "data: " prefix + + if (data === "[DONE]") { + continue + } + + try { + const parsed = JSON.parse(data) + // Extract content from chat completion streaming format + const text = parsed.choices?.[0]?.delta?.content || "" + if (text) { + accumulatedText += text + setTranslatedText(accumulatedText) + } + } catch (e) { + // Skip malformed JSON chunks + console.warn("Failed to parse chunk:", e) + } + } + } + } + } else { + // Handle regular JSON response (fallback) + const data = await response.json() + const translatedContent = data.choices?.[0]?.message?.content || data.text || "Translation not available" + setTranslatedText(translatedContent) + } + } catch (error) { + console.error("Translation error:", error) + setTranslatedText(`Error: Translation failed. Please try again.\n\nDetails: ${error instanceof Error ? error.message : "Unknown error"}`) + } finally { + setIsLoading(false) + } + } + + const characterCount = sourceText.length + + return ( +
+
+
+
+ +

PolyLingua

+
+

+ Translate your text to multiple languages +

+
+ + NetApp +
+ +
+ + +
+
+ Input + + Enter text or upload a document + +
+
+
+ +
+ +
+
+
+ + setInputMode(value as "text" | "file")}> + + Text Input + File Upload + + + +