Files
hamprint/apps/submissions/validation.py

191 lines
6.7 KiB
Python

"""STL + URL validation for the `processing` worker (plan.md §7.5.2 / §7.5.3).
Two public callables:
validate_stl_file(path)
Magic-byte probe + numpy-stl mesh load + bounding-box vs build plate +
triangle-count sanity. Raises `ValidationError` with the user-visible
reason on any rejection.
validate_external_url(url, source_type)
Host allow-list re-check + HEAD reachability (fallback to GET with a
Range header on 405). Raises `ValidationError` likewise.
`numpy-stl` is imported lazily so this module can be imported on a host venv
that hasn't run `pip install -r requirements.txt` yet -- the failure only
fires when an STL is actually validated.
"""
from __future__ import annotations
import struct
from pathlib import Path
from urllib.parse import urlparse
import requests
from django.conf import settings
from .models import Submission
class ValidationError(Exception):
"""Raised when a Submission fails its validation pass. The message
becomes the operator-visible (and user-visible) rejection reason."""
# Host allow-list per source_type. Mirrors `apps.submissions.forms`
# deliberately rather than DRYing through a shared import: the
# `processing` worker shouldn't be coupled to the form layer.
_URL_HOSTS: dict[str, set[str]] = {
Submission.SourceType.PRINTABLES: {"printables.com", "www.printables.com"},
Submission.SourceType.MAKERWORLD: {"makerworld.com", "www.makerworld.com"},
Submission.SourceType.THINGIVERSE: {"thingiverse.com", "www.thingiverse.com"},
}
_MIN_AXIS_MM = 5.0
_MIN_TRIANGLES = 4
_MAX_TRIANGLES = 5_000_000
# Be a polite citizen so anti-bot defenses on the model platforms don't 403 us.
_HTTP_HEADERS = {
"User-Agent": "hamprint-validator/1.0 (+https://hamlab.lt)",
}
def _build_volume() -> tuple[float, float, float]:
"""Read `PRINTER_BUILD_VOLUME_MM` (env-driven, default 235,235,250)."""
raw = getattr(settings, "PRINTER_BUILD_VOLUME_MM", "235,235,250")
parts = [float(x) for x in raw.split(",")]
if len(parts) != 3:
raise RuntimeError(
f"PRINTER_BUILD_VOLUME_MM must be 'x,y,z'; got: {raw!r}"
)
return parts[0], parts[1], parts[2]
def _fmt(value: float) -> str:
"""Render `235.0` as `235`, `235.5` as `235.5`. Just for clean error text."""
return str(int(value)) if value.is_integer() else f"{value:g}"
# ---- STL ---------------------------------------------------------------------
def validate_stl_file(path: str) -> None:
"""Four-pass check; raises `ValidationError` on first failure."""
p = Path(path)
if not p.is_file():
raise ValidationError("STL file is missing on disk")
size = p.stat().st_size
# 1. Magic-byte / format probe.
with p.open("rb") as f:
head_80 = f.read(80)
if head_80.startswith(b"solid "):
# Could be ASCII -- but some binary STLs also start with "solid ".
# Confirm by looking for an ASCII facet marker in the first 4 KB.
with p.open("rb") as f:
first_4k = f.read(4096)
if b"facet normal" not in first_4k:
raise ValidationError(
"file is not a valid STL: header / size mismatch"
)
# ASCII STL accepted; the mesh load below catches anything subtler.
else:
# Binary STL: 80-byte header + uint32 triangle count + 50 B per tri.
with p.open("rb") as f:
f.seek(80)
tri_bytes = f.read(4)
if len(tri_bytes) != 4:
raise ValidationError(
"file is not a valid STL: header / size mismatch"
)
triangle_count = struct.unpack("<I", tri_bytes)[0]
if size != 84 + 50 * triangle_count:
raise ValidationError(
"file is not a valid STL: header / size mismatch"
)
# 2. Mesh load. Imported lazily so the module imports without numpy-stl.
try:
from stl import mesh # type: ignore[import-not-found]
except ImportError as exc: # pragma: no cover -- only hit on host venv
raise ValidationError(
"numpy-stl is not installed in this environment"
) from exc
try:
m = mesh.Mesh.from_file(str(p))
except Exception: # numpy-stl raises a variety of types; broad on purpose
raise ValidationError("STL could not be parsed")
# 3. Bounding-box check. mesh.vectors has shape (N, 3, 3).
vertices = m.vectors.reshape(-1, 3)
mins = vertices.min(axis=0)
maxs = vertices.max(axis=0)
extents = [float(maxs[i] - mins[i]) for i in range(3)]
bv_x, bv_y, bv_z = _build_volume()
if extents[0] > bv_x or extents[1] > bv_y or extents[2] > bv_z:
raise ValidationError(
f"part is {_fmt(extents[0])}x{_fmt(extents[1])}x{_fmt(extents[2])} mm; "
f"doesn't fit on our {_fmt(bv_x)}x{_fmt(bv_y)}x{_fmt(bv_z)} build plate"
)
if max(extents) < _MIN_AXIS_MM:
raise ValidationError(
f"part is too small to print reliably "
f"(under {_fmt(_MIN_AXIS_MM)} mm on every axis)"
)
# 4. Triangle-count sanity.
tri_count = int(len(m.vectors))
if tri_count < _MIN_TRIANGLES:
raise ValidationError(
f"mesh is degenerate (fewer than {_MIN_TRIANGLES} triangles)"
)
if tri_count > _MAX_TRIANGLES:
raise ValidationError(f"mesh is too dense ({tri_count:,} triangles)")
# ---- URL ---------------------------------------------------------------------
def validate_external_url(source_url: str, source_type: str) -> None:
"""Host check + HEAD reachability; raises `ValidationError` on rejection."""
if not source_url:
raise ValidationError("URL is empty")
# 1. Host re-check. We already did this at form time but the row may have
# been edited via the admin since.
host = (urlparse(source_url).hostname or "").lower()
allowed = _URL_HOSTS.get(source_type, set())
if host not in allowed:
raise ValidationError("URL host doesn't match source type")
# 2. HEAD reachability.
try:
r = requests.head(
source_url,
timeout=5,
allow_redirects=True,
headers=_HTTP_HEADERS,
)
# Some CDNs refuse HEAD with 405; fall back to a tiny ranged GET.
if r.status_code == 405:
r = requests.get(
source_url,
timeout=5,
allow_redirects=True,
headers={**_HTTP_HEADERS, "Range": "bytes=0-1023"},
)
except requests.exceptions.RequestException as exc:
raise ValidationError(
f"URL unreachable ({exc.__class__.__name__})"
) from exc
if r.status_code >= 400:
raise ValidationError(
f"URL returned HTTP {r.status_code}; the model may have been "
f"removed or set to private"
)