191 lines
6.7 KiB
Python
191 lines
6.7 KiB
Python
"""STL + URL validation for the `processing` worker (plan.md §7.5.2 / §7.5.3).
|
|
|
|
Two public callables:
|
|
|
|
validate_stl_file(path)
|
|
Magic-byte probe + numpy-stl mesh load + bounding-box vs build plate +
|
|
triangle-count sanity. Raises `ValidationError` with the user-visible
|
|
reason on any rejection.
|
|
|
|
validate_external_url(url, source_type)
|
|
Host allow-list re-check + HEAD reachability (fallback to GET with a
|
|
Range header on 405). Raises `ValidationError` likewise.
|
|
|
|
`numpy-stl` is imported lazily so this module can be imported on a host venv
|
|
that hasn't run `pip install -r requirements.txt` yet -- the failure only
|
|
fires when an STL is actually validated.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import struct
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
import requests
|
|
from django.conf import settings
|
|
|
|
from .models import Submission
|
|
|
|
|
|
class ValidationError(Exception):
|
|
"""Raised when a Submission fails its validation pass. The message
|
|
becomes the operator-visible (and user-visible) rejection reason."""
|
|
|
|
|
|
# Host allow-list per source_type. Mirrors `apps.submissions.forms`
|
|
# deliberately rather than DRYing through a shared import: the
|
|
# `processing` worker shouldn't be coupled to the form layer.
|
|
_URL_HOSTS: dict[str, set[str]] = {
|
|
Submission.SourceType.PRINTABLES: {"printables.com", "www.printables.com"},
|
|
Submission.SourceType.MAKERWORLD: {"makerworld.com", "www.makerworld.com"},
|
|
Submission.SourceType.THINGIVERSE: {"thingiverse.com", "www.thingiverse.com"},
|
|
}
|
|
|
|
_MIN_AXIS_MM = 5.0
|
|
_MIN_TRIANGLES = 4
|
|
_MAX_TRIANGLES = 5_000_000
|
|
|
|
# Be a polite citizen so anti-bot defenses on the model platforms don't 403 us.
|
|
_HTTP_HEADERS = {
|
|
"User-Agent": "hamprint-validator/1.0 (+https://hamlab.lt)",
|
|
}
|
|
|
|
|
|
def _build_volume() -> tuple[float, float, float]:
|
|
"""Read `PRINTER_BUILD_VOLUME_MM` (env-driven, default 235,235,250)."""
|
|
raw = getattr(settings, "PRINTER_BUILD_VOLUME_MM", "235,235,250")
|
|
parts = [float(x) for x in raw.split(",")]
|
|
if len(parts) != 3:
|
|
raise RuntimeError(
|
|
f"PRINTER_BUILD_VOLUME_MM must be 'x,y,z'; got: {raw!r}"
|
|
)
|
|
return parts[0], parts[1], parts[2]
|
|
|
|
|
|
def _fmt(value: float) -> str:
|
|
"""Render `235.0` as `235`, `235.5` as `235.5`. Just for clean error text."""
|
|
return str(int(value)) if value.is_integer() else f"{value:g}"
|
|
|
|
|
|
# ---- STL ---------------------------------------------------------------------
|
|
|
|
|
|
def validate_stl_file(path: str) -> None:
|
|
"""Four-pass check; raises `ValidationError` on first failure."""
|
|
p = Path(path)
|
|
if not p.is_file():
|
|
raise ValidationError("STL file is missing on disk")
|
|
|
|
size = p.stat().st_size
|
|
|
|
# 1. Magic-byte / format probe.
|
|
with p.open("rb") as f:
|
|
head_80 = f.read(80)
|
|
if head_80.startswith(b"solid "):
|
|
# Could be ASCII -- but some binary STLs also start with "solid ".
|
|
# Confirm by looking for an ASCII facet marker in the first 4 KB.
|
|
with p.open("rb") as f:
|
|
first_4k = f.read(4096)
|
|
if b"facet normal" not in first_4k:
|
|
raise ValidationError(
|
|
"file is not a valid STL: header / size mismatch"
|
|
)
|
|
# ASCII STL accepted; the mesh load below catches anything subtler.
|
|
else:
|
|
# Binary STL: 80-byte header + uint32 triangle count + 50 B per tri.
|
|
with p.open("rb") as f:
|
|
f.seek(80)
|
|
tri_bytes = f.read(4)
|
|
if len(tri_bytes) != 4:
|
|
raise ValidationError(
|
|
"file is not a valid STL: header / size mismatch"
|
|
)
|
|
triangle_count = struct.unpack("<I", tri_bytes)[0]
|
|
if size != 84 + 50 * triangle_count:
|
|
raise ValidationError(
|
|
"file is not a valid STL: header / size mismatch"
|
|
)
|
|
|
|
# 2. Mesh load. Imported lazily so the module imports without numpy-stl.
|
|
try:
|
|
from stl import mesh # type: ignore[import-not-found]
|
|
except ImportError as exc: # pragma: no cover -- only hit on host venv
|
|
raise ValidationError(
|
|
"numpy-stl is not installed in this environment"
|
|
) from exc
|
|
try:
|
|
m = mesh.Mesh.from_file(str(p))
|
|
except Exception: # numpy-stl raises a variety of types; broad on purpose
|
|
raise ValidationError("STL could not be parsed")
|
|
|
|
# 3. Bounding-box check. mesh.vectors has shape (N, 3, 3).
|
|
vertices = m.vectors.reshape(-1, 3)
|
|
mins = vertices.min(axis=0)
|
|
maxs = vertices.max(axis=0)
|
|
extents = [float(maxs[i] - mins[i]) for i in range(3)]
|
|
bv_x, bv_y, bv_z = _build_volume()
|
|
|
|
if extents[0] > bv_x or extents[1] > bv_y or extents[2] > bv_z:
|
|
raise ValidationError(
|
|
f"part is {_fmt(extents[0])}x{_fmt(extents[1])}x{_fmt(extents[2])} mm; "
|
|
f"doesn't fit on our {_fmt(bv_x)}x{_fmt(bv_y)}x{_fmt(bv_z)} build plate"
|
|
)
|
|
if max(extents) < _MIN_AXIS_MM:
|
|
raise ValidationError(
|
|
f"part is too small to print reliably "
|
|
f"(under {_fmt(_MIN_AXIS_MM)} mm on every axis)"
|
|
)
|
|
|
|
# 4. Triangle-count sanity.
|
|
tri_count = int(len(m.vectors))
|
|
if tri_count < _MIN_TRIANGLES:
|
|
raise ValidationError(
|
|
f"mesh is degenerate (fewer than {_MIN_TRIANGLES} triangles)"
|
|
)
|
|
if tri_count > _MAX_TRIANGLES:
|
|
raise ValidationError(f"mesh is too dense ({tri_count:,} triangles)")
|
|
|
|
|
|
# ---- URL ---------------------------------------------------------------------
|
|
|
|
|
|
def validate_external_url(source_url: str, source_type: str) -> None:
|
|
"""Host check + HEAD reachability; raises `ValidationError` on rejection."""
|
|
if not source_url:
|
|
raise ValidationError("URL is empty")
|
|
|
|
# 1. Host re-check. We already did this at form time but the row may have
|
|
# been edited via the admin since.
|
|
host = (urlparse(source_url).hostname or "").lower()
|
|
allowed = _URL_HOSTS.get(source_type, set())
|
|
if host not in allowed:
|
|
raise ValidationError("URL host doesn't match source type")
|
|
|
|
# 2. HEAD reachability.
|
|
try:
|
|
r = requests.head(
|
|
source_url,
|
|
timeout=5,
|
|
allow_redirects=True,
|
|
headers=_HTTP_HEADERS,
|
|
)
|
|
# Some CDNs refuse HEAD with 405; fall back to a tiny ranged GET.
|
|
if r.status_code == 405:
|
|
r = requests.get(
|
|
source_url,
|
|
timeout=5,
|
|
allow_redirects=True,
|
|
headers={**_HTTP_HEADERS, "Range": "bytes=0-1023"},
|
|
)
|
|
except requests.exceptions.RequestException as exc:
|
|
raise ValidationError(
|
|
f"URL unreachable ({exc.__class__.__name__})"
|
|
) from exc
|
|
|
|
if r.status_code >= 400:
|
|
raise ValidationError(
|
|
f"URL returned HTTP {r.status_code}; the model may have been "
|
|
f"removed or set to private"
|
|
)
|