Add better email verification

This commit is contained in:
2026-05-14 23:49:54 +03:00
parent 569d57e144
commit 46fc07a1ae
9 changed files with 394 additions and 6 deletions

View File

@@ -0,0 +1,36 @@
# Generated by Django 6.0.5 on 2026-05-14 20:32
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("submissions", "0001_initial"),
]
operations = [
migrations.CreateModel(
name="VerifiedEmail",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"email",
models.EmailField(db_index=True, max_length=254, unique=True),
),
("verified_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
],
options={
"ordering": ("-updated_at",),
},
),
]

View File

@@ -0,0 +1,39 @@
"""Backfill `VerifiedEmail` from any historical submission that already had
`email_confirmed=True` (i.e. the user clicked the confirmation link before
the trust list existed). One-shot; safe to re-run thanks to
`update_or_create`."""
from django.db import migrations
def backfill_verified_emails(apps, schema_editor):
Submission = apps.get_model("submissions", "Submission")
VerifiedEmail = apps.get_model("submissions", "VerifiedEmail")
# `update_or_create` keeps the migration idempotent.
seen: set[str] = set()
qs = Submission.objects.filter(email_confirmed=True).exclude(
guest_email__isnull=True
).exclude(guest_email="").values_list("guest_email", flat=True)
for email in qs:
email = email.strip().lower()
if not email or email in seen:
continue
seen.add(email)
VerifiedEmail.objects.update_or_create(email=email)
def noop_reverse(apps, schema_editor):
"""We don't try to undo the backfill -- the trust list is a forward-only
derived artefact; rolling back the migration leaves the rows alone."""
class Migration(migrations.Migration):
dependencies = [
("submissions", "0002_verifiedemail"),
]
operations = [
migrations.RunPython(backfill_verified_emails, noop_reverse),
]

View File

@@ -0,0 +1,51 @@
"""Schema changes for the email normalisation / TTL / cap work.
- `Submission.canonical_email`: new indexed column populated by
`Submission.save()`. Used to count active submissions per email for the
10-cap, and to look up the `VerifiedEmail` trust list.
- `VerifiedEmail.verified_at` -> `validated_at`: keeps the data, drops the
`auto_now_add` so `update_or_create` can roll the timestamp forward on
every re-confirmation (rolling 30-day TTL).
- `VerifiedEmail.updated_at`: removed -- `validated_at` IS the most recent
confirmation timestamp now, no need for a second column.
The data backfill (populate canonical_email, re-normalise existing
VerifiedEmail rows) lives in 0005_normalize_existing_data so this
migration stays a clean schema-only change.
"""
import django.utils.timezone
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("submissions", "0003_backfill_verified_emails"),
]
operations = [
migrations.AddField(
model_name="submission",
name="canonical_email",
field=models.EmailField(blank=True, db_index=True, max_length=254),
),
migrations.RenameField(
model_name="verifiedemail",
old_name="verified_at",
new_name="validated_at",
),
migrations.RemoveField(
model_name="verifiedemail",
name="updated_at",
),
migrations.AlterField(
model_name="verifiedemail",
name="validated_at",
field=models.DateTimeField(default=django.utils.timezone.now),
),
migrations.AlterModelOptions(
name="verifiedemail",
options={"ordering": ("-validated_at",)},
),
]

View File

@@ -0,0 +1,92 @@
"""One-shot data backfill for the schema change in 0004:
1. Populate `Submission.canonical_email` for every existing row by deriving
it from `submitted_by.email` (OAuth) or `guest_email` (guest) and
running through the same normaliser the live `save()` uses.
2. Re-normalise every `VerifiedEmail.email` already in the table. Rows that
collapse to the same canonical form are deduped: we keep the row with
the most recent `validated_at` and delete the others.
Defensive: both passes use `update_fields=` and `update_or_create`-style
logic so re-running the migration is a no-op once it's been applied.
"""
from django.db import migrations
def _normalize_email(email):
if not email or "@" not in email:
return (email or "").lower()
local, _, domain = email.lower().rpartition("@")
if "+" in local:
local = local.split("+", 1)[0]
return f"{local}@{domain}"
def forward(apps, schema_editor):
Submission = apps.get_model("submissions", "Submission")
VerifiedEmail = apps.get_model("submissions", "VerifiedEmail")
User = apps.get_model("auth", "User")
# ---- Submission.canonical_email -----------------------------------------
# Pull all related users up front so we don't do an O(N) round-trip
# per submission.
user_emails = dict(
User.objects.exclude(email="").values_list("pk", "email")
)
to_update = []
for sub in Submission.objects.all().only(
"pk", "submitted_by_id", "guest_email", "canonical_email"
):
owner_email = ""
if sub.submitted_by_id and user_emails.get(sub.submitted_by_id):
owner_email = user_emails[sub.submitted_by_id]
elif sub.guest_email:
owner_email = sub.guest_email
new = _normalize_email(owner_email)
if new != sub.canonical_email:
sub.canonical_email = new
to_update.append(sub)
if to_update:
Submission.objects.bulk_update(to_update, ["canonical_email"], batch_size=500)
# ---- VerifiedEmail re-normalisation + dedup ----------------------------
# First pass: pick the surviving row per normalised form (most recent
# validated_at wins). Delete the losers.
survivors: dict[str, tuple[int, object]] = {} # norm -> (pk, validated_at)
for row in VerifiedEmail.objects.all().only("pk", "email", "validated_at"):
norm = _normalize_email(row.email)
if not norm:
row.delete()
continue
prev = survivors.get(norm)
if prev is None:
survivors[norm] = (row.pk, row.validated_at)
else:
prev_pk, prev_at = prev
if row.validated_at > prev_at:
VerifiedEmail.objects.filter(pk=prev_pk).delete()
survivors[norm] = (row.pk, row.validated_at)
else:
row.delete()
# Second pass: rewrite the surviving row's email to its normalised form
# (no-op when already normalised; safe because all duplicates are gone).
for norm, (pk, _at) in survivors.items():
VerifiedEmail.objects.filter(pk=pk).update(email=norm)
def reverse(apps, schema_editor):
"""The forward pass is a derived backfill; there's nothing meaningful
to undo. Leaving rows alone is the right thing on rollback."""
class Migration(migrations.Migration):
dependencies = [
("submissions", "0004_email_normalization"),
]
operations = [
migrations.RunPython(forward, reverse),
]