Commit 73107e0a authored by Jelle van der Waa's avatar Jelle van der Waa 🚧 Committed by Angel Velásquez
Browse files

Add donation import management script (#81)

This script imports messages from a Maildir folder and creates new
unique Donors based on the name in the subject. The subject also
contains the amount and the email address, which is not stored by
parent 148692cd
# -*- coding: utf-8 -*-
Imports donators from the emails which are send to,
the subject of them email contains the name of the donator, the amount and
the email address. Archweb's Donate model only contains the name, which is
An example subject:
Subject: Receipt [$25.00] By: John Doe []
Usage: ./ donor_import path/to/maildir/
import logging
import mailbox
import sys
from email.header import decode_header
from parse import parse
from django.db.utils import Error as DBError
from import BaseCommand
from main.models import Donor
format=u'%(asctime)s -> %(levelname)s: %(message)s',
datefmt=u'%Y-%m-%d %H:%M:%S',
logger = logging.getLogger()
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('maildir', type=str)
def decode_subject(self, subject):
subject = decode_header(subject)
default_charset = 'ASCII'
# Convert the list of tuples containing the decoded string and encoding to
# the same encoding.
return u''.join([unicode(s[0], s[1] or default_charset) for s in subject])
def parse_subject(self, subject):
"""Format of the subject is as following: Receipt [$amount] By: John Doe [mail]"""
parsed = parse("Receipt [{amount}] By: {name} [{email}]", subject)
if parsed:
return parsed['name']
def sanitize_name(self, name):
"""Sanitizes the parsed name and removes numbers, entries with no
valid characters and finaly trims all excess whitespace"""
# Some submissions contain no alphabetic characters, skip them
if all(not l.isalpha() for l in name):
return u''
# Strip any numbers, they could be a bank account number
name = filter(lambda x: not x.isdigit(), name)
# Normalize all capitalized names. (JOHN DOE)
name = u' '.join(l.capitalize() for l in name.split(u' '))
# Trim excess spaces
name = name.rstrip().lstrip()
return name
def handle(self, *args, **options):
v = int(options.get('verbosity', 0))
if v == 0:
logger.level = logging.ERROR
elif v == 1:
logger.level = logging.INFO
elif v >= 2:
logger.level = logging.DEBUG
directory = options['maildir']
maildir = mailbox.Maildir(directory, create=False)
except mailbox.Error:
logger.error(u"Failed to open maildir: '%s'", directory)
return 0
for msg in maildir:
subject = msg.get('subject', '')
if 'utf-8' in subject:
# Decode UTF-8 encoded subjects
subject = self.decode_subject(subject)
# Subject header can contain enters, replace them with a space
subject = subject.replace(u'\n', u' ')
name = self.parse_subject(subject)
if not name:
logger.error(u'Unable to parse: %s', subject)
name = self.sanitize_name(name)
if not name:
logger.error(u'Invalid name in subject: %s', subject)
_, created = Donor.objects.get_or_create(name=name)
if created:'Adding donor: {}'.format(name))
except DBError as e:'Error while adding donor: %s, %s', name, e)
from django.test import SimpleTestCase
from import Command
class DonorImportTest(SimpleTestCase):
def setUp(self):
self.command = Command()
def gen_parse_subject(self, data):
return self.command.parse_subject(valid.format(data))
def test_parse_subject(self):
# Valid
valid = u'Receipt [$25.00] By: John Doe []'
output = self.command.parse_subject(valid)
self.assertEqual(output, u'John Doe')
def test_parse_name(self):
self.assertEqual(self.command.sanitize_name(u'1244'), u'')
self.assertEqual(self.command.sanitize_name(u'John Doe'), u'John Doe')
self.assertEqual(self.command.sanitize_name(u' John Doe '), u'John Doe')
self.assertEqual(self.command.sanitize_name(u'John Doe 23'), u'John Doe')
......@@ -9,3 +9,4 @@ jsmin==2.2.2
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment