"""STL + URL validation for the `processing` worker (plan.md §7.5.2 / §7.5.3). Two public callables: validate_stl_file(path) Magic-byte probe + numpy-stl mesh load + bounding-box vs build plate + triangle-count sanity. Raises `ValidationError` with the user-visible reason on any rejection. validate_external_url(url, source_type) Host allow-list re-check + HEAD reachability (fallback to GET with a Range header on 405). Raises `ValidationError` likewise. `numpy-stl` is imported lazily so this module can be imported on a host venv that hasn't run `pip install -r requirements.txt` yet -- the failure only fires when an STL is actually validated. """ from __future__ import annotations import struct from pathlib import Path from urllib.parse import urlparse import requests from django.conf import settings from .models import Submission class ValidationError(Exception): """Raised when a Submission fails its validation pass. The message becomes the operator-visible (and user-visible) rejection reason.""" # Host allow-list per source_type. Mirrors `apps.submissions.forms` # deliberately rather than DRYing through a shared import: the # `processing` worker shouldn't be coupled to the form layer. _URL_HOSTS: dict[str, set[str]] = { Submission.SourceType.PRINTABLES: {"printables.com", "www.printables.com"}, Submission.SourceType.MAKERWORLD: {"makerworld.com", "www.makerworld.com"}, Submission.SourceType.THINGIVERSE: {"thingiverse.com", "www.thingiverse.com"}, } _MIN_AXIS_MM = 5.0 _MIN_TRIANGLES = 4 _MAX_TRIANGLES = 5_000_000 # Be a polite citizen so anti-bot defenses on the model platforms don't 403 us. _HTTP_HEADERS = { "User-Agent": "hamprint-validator/1.0 (+https://hamlab.lt)", } def _build_volume() -> tuple[float, float, float]: """Read `PRINTER_BUILD_VOLUME_MM` (env-driven, default 235,235,250).""" raw = getattr(settings, "PRINTER_BUILD_VOLUME_MM", "235,235,250") parts = [float(x) for x in raw.split(",")] if len(parts) != 3: raise RuntimeError( f"PRINTER_BUILD_VOLUME_MM must be 'x,y,z'; got: {raw!r}" ) return parts[0], parts[1], parts[2] def _fmt(value: float) -> str: """Render `235.0` as `235`, `235.5` as `235.5`. Just for clean error text.""" return str(int(value)) if value.is_integer() else f"{value:g}" # ---- STL --------------------------------------------------------------------- def validate_stl_file(path: str) -> None: """Four-pass check; raises `ValidationError` on first failure.""" p = Path(path) if not p.is_file(): raise ValidationError("STL file is missing on disk") size = p.stat().st_size # 1. Magic-byte / format probe. with p.open("rb") as f: head_80 = f.read(80) if head_80.startswith(b"solid "): # Could be ASCII -- but some binary STLs also start with "solid ". # Confirm by looking for an ASCII facet marker in the first 4 KB. with p.open("rb") as f: first_4k = f.read(4096) if b"facet normal" not in first_4k: raise ValidationError( "file is not a valid STL: header / size mismatch" ) # ASCII STL accepted; the mesh load below catches anything subtler. else: # Binary STL: 80-byte header + uint32 triangle count + 50 B per tri. with p.open("rb") as f: f.seek(80) tri_bytes = f.read(4) if len(tri_bytes) != 4: raise ValidationError( "file is not a valid STL: header / size mismatch" ) triangle_count = struct.unpack(" bv_x or extents[1] > bv_y or extents[2] > bv_z: raise ValidationError( f"part is {_fmt(extents[0])}x{_fmt(extents[1])}x{_fmt(extents[2])} mm; " f"doesn't fit on our {_fmt(bv_x)}x{_fmt(bv_y)}x{_fmt(bv_z)} build plate" ) if max(extents) < _MIN_AXIS_MM: raise ValidationError( f"part is too small to print reliably " f"(under {_fmt(_MIN_AXIS_MM)} mm on every axis)" ) # 4. Triangle-count sanity. tri_count = int(len(m.vectors)) if tri_count < _MIN_TRIANGLES: raise ValidationError( f"mesh is degenerate (fewer than {_MIN_TRIANGLES} triangles)" ) if tri_count > _MAX_TRIANGLES: raise ValidationError(f"mesh is too dense ({tri_count:,} triangles)") # ---- URL --------------------------------------------------------------------- def validate_external_url(source_url: str, source_type: str) -> None: """Host check + HEAD reachability; raises `ValidationError` on rejection.""" if not source_url: raise ValidationError("URL is empty") # 1. Host re-check. We already did this at form time but the row may have # been edited via the admin since. host = (urlparse(source_url).hostname or "").lower() allowed = _URL_HOSTS.get(source_type, set()) if host not in allowed: raise ValidationError("URL host doesn't match source type") # 2. HEAD reachability. try: r = requests.head( source_url, timeout=5, allow_redirects=True, headers=_HTTP_HEADERS, ) # Some CDNs refuse HEAD with 405; fall back to a tiny ranged GET. if r.status_code == 405: r = requests.get( source_url, timeout=5, allow_redirects=True, headers={**_HTTP_HEADERS, "Range": "bytes=0-1023"}, ) except requests.exceptions.RequestException as exc: raise ValidationError( f"URL unreachable ({exc.__class__.__name__})" ) from exc if r.status_code >= 400: raise ValidationError( f"URL returned HTTP {r.status_code}; the model may have been " f"removed or set to private" )