-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
350 lines (300 loc) · 13.2 KB
/
server.py
File metadata and controls
350 lines (300 loc) · 13.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
#!/usr/bin/env python3
"""
BRT Italia - Tracking API Server (Production-ready)
=====================================================
Inicio:
python3 server.py
# Con múltiples workers (producción real):
gunicorn server:app -w 4 -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000
Endpoints:
GET /track/{parcel_number} → estado del envío (JSON)
GET /health → estado del servidor
GET /docs → Swagger UI
"""
import logging
import os
import re
import time
from functools import lru_cache
from typing import List, Optional
import requests
from bs4 import BeautifulSoup
from cachetools import TTLCache
from fastapi import Depends, FastAPI, Header, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
from slowapi.util import get_remote_address
# ──────────────────────────────────────────────────────────────────────────────
# CONFIG (via variables de entorno o valores por defecto)
# ──────────────────────────────────────────────────────────────────────────────
API_KEY = os.getenv("API_KEY", "") # "" = sin autenticación
CACHE_TTL_SEC = int(os.getenv("CACHE_TTL", "300")) # 5 minutos por defecto
RATE_LIMIT = os.getenv("RATE_LIMIT", "30/minute") # max 30 req/min por IP
REQUEST_TIMEOUT = int(os.getenv("BRT_TIMEOUT", "25"))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%H:%M:%S",
)
logger = logging.getLogger("brt-api")
USER_AGENT = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
)
BRT_STATES = [
"Spedizione consegnata a BRT",
"In viaggio",
"In filiale",
"Spedizione in consegna",
"Consegnato",
"In attesa di ritiro",
"Tentativo di consegna",
"Spedizione affidata",
"Spedizione in giacenza",
]
# ──────────────────────────────────────────────────────────────────────────────
# CACHÉ EN MEMORIA (TTLCache: expira automáticamente)
# ──────────────────────────────────────────────────────────────────────────────
# maxsize=500 → máximo 500 paquetes en caché simultáneamente
_cache: TTLCache = TTLCache(maxsize=500, ttl=CACHE_TTL_SEC)
# ──────────────────────────────────────────────────────────────────────────────
# MODELOS
# ──────────────────────────────────────────────────────────────────────────────
class TrackingEvent(BaseModel):
date: str
status: str
location: str = ""
class TrackingResponse(BaseModel):
success: bool = True
parcel_number: str
current_status: str
current_date: str
events: List[TrackingEvent]
elapsed_ms: int
cached: bool = False
source: str = "mybrt.it"
# ──────────────────────────────────────────────────────────────────────────────
# SCRAPING
# ──────────────────────────────────────────────────────────────────────────────
def _fetch_brt(parcel_number: str) -> dict:
"""Hace la consulta a mybrt.it con reintentos automáticos."""
session = requests.Session()
session.headers.update({
"User-Agent": USER_AGENT,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7",
"DNT": "1",
})
url = (
f"https://www.mybrt.it/it/mybrt/my-parcels/search"
f"?lang=it&parcelNumber={parcel_number}"
)
last_error = None
for attempt in range(1, 4): # 3 intentos
try:
resp = session.get(url, timeout=REQUEST_TIMEOUT, allow_redirects=True)
if resp.status_code == 200 and len(resp.text) > 5000:
return _parse_html(resp.text, parcel_number)
logger.warning(
f"Intento {attempt}: HTTP {resp.status_code}, {len(resp.text)} chars"
)
except requests.Timeout:
last_error = "Timeout conectando a mybrt.it"
logger.warning(f"Intento {attempt}: Timeout")
except requests.RequestException as exc:
last_error = str(exc)
logger.warning(f"Intento {attempt}: {exc}")
if attempt < 3:
time.sleep(attempt * 1.5) # backoff: 1.5s, 3s
raise RuntimeError(last_error or "mybrt.it no respondió correctamente")
def _parse_html(html: str, parcel_number: str) -> dict:
"""Extrae datos de tracking del HTML renderizado por mybrt.it."""
soup = BeautifulSoup(html, "html.parser")
# CSRF token
csrf = None
csrf_meta = soup.find("meta", attrs={"name": "_csrf"})
if csrf_meta:
csrf = csrf_meta.get("content")
# Parsear texto plano para extraer estados y fechas
plain_text = soup.get_text(separator="\n")
lines = [l.strip() for l in plain_text.split("\n") if l.strip()]
date_re = re.compile(r"\d{2}-\d{2}-\d{4}")
brt_num_re = re.compile(r"\b(\d{12,15})\b")
# BRT puede redirigir a un número distinto al buscado (ej: código remitente → BRT code)
actual_parcel = parcel_number
if not any(parcel_number in line for line in lines):
for line in lines:
m = brt_num_re.search(line)
if m:
actual_parcel = m.group(1)
logger.info(f"BRT remapeó {parcel_number} → {actual_parcel}")
break
events = []
current_status = "Sconosciuto"
current_date = ""
in_section = False
for i, line in enumerate(lines):
if actual_parcel in line:
in_section = True
continue
if not in_section:
continue
for state in BRT_STATES:
if state.lower() in line.lower():
# La fecha debe aparecer DESPUÉS del estado (1-2 líneas)
date = ""
for j in range(i + 1, min(len(lines), i + 3)):
dm = date_re.search(lines[j])
if dm:
date = dm.group(0)
break
# Solo agregar si tiene fecha — descarta pasos decorativos sin fecha
if date and not any(e["status"] == state and e["date"] == date for e in events):
events.append({"date": date, "status": state, "location": ""})
# Estado actual = evento con la fecha más reciente
# (si hay varios con la misma fecha máxima, tomar el último de la lista)
if events:
def _to_tuple(d):
try:
p = d.split("-")
return (int(p[2]), int(p[1]), int(p[0]))
except Exception:
return (0, 0, 0)
max_date = max(_to_tuple(e["date"]) for e in events)
latest = [e for e in events if _to_tuple(e["date"]) == max_date][-1]
current_status = latest["status"]
current_date = latest["date"]
# Detectar si el paquete realmente tiene datos o la página está vacía
not_found_signals = [
"non è ancora associata",
"spedizione non trovata",
"nessun risultato",
]
if any(s in plain_text.lower() for s in not_found_signals) and not events:
raise ValueError(f"Paquete {parcel_number} no encontrado en mybrt.it")
return {
"parcel_number": actual_parcel,
"current_status": current_status,
"current_date": current_date,
"events": events,
"csrf_token": csrf,
}
# ──────────────────────────────────────────────────────────────────────────────
# APP FASTAPI
# ──────────────────────────────────────────────────────────────────────────────
limiter = Limiter(key_func=get_remote_address, default_limits=[RATE_LIMIT])
app = FastAPI(
title="BRT Italia Tracking API",
description="Tracking de envíos BRT Italia via mybrt.it",
version="2.0.0",
)
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["GET"],
allow_headers=["*"],
)
# ── Dependencia: verificar API key (opcional) ─────────────────────────────────
def verify_api_key(x_api_key: Optional[str] = Header(default=None)):
if not API_KEY:
return # Sin API_KEY configurada → acceso libre
if x_api_key != API_KEY:
raise HTTPException(status_code=401, detail="API key inválida o faltante")
# ── Endpoints ─────────────────────────────────────────────────────────────────
@app.get("/health")
def health():
return {
"status": "ok",
"cache_size": len(_cache),
"cache_ttl_seconds": CACHE_TTL_SEC,
"rate_limit": RATE_LIMIT,
"auth_required": bool(API_KEY),
}
@app.get(
"/track/{parcel_number}",
response_model=TrackingResponse,
summary="Consultar estado de un envío BRT",
)
@limiter.limit(RATE_LIMIT)
def track(
request: Request,
parcel_number: str,
_: None = Depends(verify_api_key),
):
"""
Devuelve el estado actual e historial de un envío BRT Italia.
- Caché automática de **{CACHE_TTL} minutos** por número de paquete.
- Máximo **{RATE_LIMIT}** por IP.
- Incluir header `X-API-Key` si el servidor tiene autenticación activada.
"""
# Validar formato
if not re.match(r"^\d{12}$|^\d{14}$|^\d{15}$|^\d{19}$", parcel_number):
raise HTTPException(
status_code=400,
detail=(
f"Número inválido: '{parcel_number}'. "
"Debe tener 12, 14, 15 o 19 dígitos."
),
)
# Revisar caché primero
if parcel_number in _cache:
cached = dict(_cache[parcel_number])
cached["cached"] = True
cached["elapsed_ms"] = 0
logger.info(f"CACHE HIT: {parcel_number}")
return TrackingResponse(**cached)
# Consultar BRT
t0 = time.time()
logger.info(f"FETCH: {parcel_number}")
try:
data = _fetch_brt(parcel_number)
except ValueError as exc:
# Paquete no encontrado
raise HTTPException(status_code=404, detail=str(exc))
except Exception as exc:
logger.error(f"ERROR {parcel_number}: {exc}")
raise HTTPException(
status_code=503,
detail=f"Error consultando mybrt.it: {str(exc)}",
)
elapsed = int((time.time() - t0) * 1000)
result = {
"success": True,
"parcel_number": data["parcel_number"],
"current_status": data["current_status"],
"current_date": data["current_date"],
"events": data["events"],
"elapsed_ms": elapsed,
"cached": False,
"source": "mybrt.it",
}
# Guardar en caché (solo si hay datos)
if data["events"]:
_cache[parcel_number] = result
logger.info(
f"OK {parcel_number} → {data['current_status']} "
f"({elapsed}ms, guardado en caché {CACHE_TTL_SEC}s)"
)
return TrackingResponse(**result)
@app.get("/", include_in_schema=False)
def root():
return {
"service": "BRT Italia Tracking API v2",
"docs": "/docs",
"health": "/health",
"example": "/track/08454094584657",
}
# ──────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"server:app",
host="0.0.0.0",
port=int(os.getenv("PORT", "8000")),
workers=1, # para producción real: usa gunicorn
access_log=True,
)