Add better email verification
This commit is contained in:
92
apps/submissions/migrations/0005_normalize_existing_data.py
Normal file
92
apps/submissions/migrations/0005_normalize_existing_data.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""One-shot data backfill for the schema change in 0004:
|
||||
|
||||
1. Populate `Submission.canonical_email` for every existing row by deriving
|
||||
it from `submitted_by.email` (OAuth) or `guest_email` (guest) and
|
||||
running through the same normaliser the live `save()` uses.
|
||||
2. Re-normalise every `VerifiedEmail.email` already in the table. Rows that
|
||||
collapse to the same canonical form are deduped: we keep the row with
|
||||
the most recent `validated_at` and delete the others.
|
||||
|
||||
Defensive: both passes use `update_fields=` and `update_or_create`-style
|
||||
logic so re-running the migration is a no-op once it's been applied.
|
||||
"""
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def _normalize_email(email):
|
||||
if not email or "@" not in email:
|
||||
return (email or "").lower()
|
||||
local, _, domain = email.lower().rpartition("@")
|
||||
if "+" in local:
|
||||
local = local.split("+", 1)[0]
|
||||
return f"{local}@{domain}"
|
||||
|
||||
|
||||
def forward(apps, schema_editor):
|
||||
Submission = apps.get_model("submissions", "Submission")
|
||||
VerifiedEmail = apps.get_model("submissions", "VerifiedEmail")
|
||||
User = apps.get_model("auth", "User")
|
||||
|
||||
# ---- Submission.canonical_email -----------------------------------------
|
||||
# Pull all related users up front so we don't do an O(N) round-trip
|
||||
# per submission.
|
||||
user_emails = dict(
|
||||
User.objects.exclude(email="").values_list("pk", "email")
|
||||
)
|
||||
to_update = []
|
||||
for sub in Submission.objects.all().only(
|
||||
"pk", "submitted_by_id", "guest_email", "canonical_email"
|
||||
):
|
||||
owner_email = ""
|
||||
if sub.submitted_by_id and user_emails.get(sub.submitted_by_id):
|
||||
owner_email = user_emails[sub.submitted_by_id]
|
||||
elif sub.guest_email:
|
||||
owner_email = sub.guest_email
|
||||
new = _normalize_email(owner_email)
|
||||
if new != sub.canonical_email:
|
||||
sub.canonical_email = new
|
||||
to_update.append(sub)
|
||||
if to_update:
|
||||
Submission.objects.bulk_update(to_update, ["canonical_email"], batch_size=500)
|
||||
|
||||
# ---- VerifiedEmail re-normalisation + dedup ----------------------------
|
||||
# First pass: pick the surviving row per normalised form (most recent
|
||||
# validated_at wins). Delete the losers.
|
||||
survivors: dict[str, tuple[int, object]] = {} # norm -> (pk, validated_at)
|
||||
for row in VerifiedEmail.objects.all().only("pk", "email", "validated_at"):
|
||||
norm = _normalize_email(row.email)
|
||||
if not norm:
|
||||
row.delete()
|
||||
continue
|
||||
prev = survivors.get(norm)
|
||||
if prev is None:
|
||||
survivors[norm] = (row.pk, row.validated_at)
|
||||
else:
|
||||
prev_pk, prev_at = prev
|
||||
if row.validated_at > prev_at:
|
||||
VerifiedEmail.objects.filter(pk=prev_pk).delete()
|
||||
survivors[norm] = (row.pk, row.validated_at)
|
||||
else:
|
||||
row.delete()
|
||||
|
||||
# Second pass: rewrite the surviving row's email to its normalised form
|
||||
# (no-op when already normalised; safe because all duplicates are gone).
|
||||
for norm, (pk, _at) in survivors.items():
|
||||
VerifiedEmail.objects.filter(pk=pk).update(email=norm)
|
||||
|
||||
|
||||
def reverse(apps, schema_editor):
|
||||
"""The forward pass is a derived backfill; there's nothing meaningful
|
||||
to undo. Leaving rows alone is the right thing on rollback."""
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("submissions", "0004_email_normalization"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(forward, reverse),
|
||||
]
|
||||
Reference in New Issue
Block a user