1 #
   2 # This tool generates on stdout a list of message IDs
   3 # that occur twice on your imap server. It looks through
   4 # all folders you have, so is really a resource hog!!
   5 #
   6 # You do need to edit the line to connect to your imap server.
   7 # The example below works against your local dovecot server if
   8 # you store your mail in /home/johannes/.mail (maildir!)
   9 #
  10 # To download from this site, append ?action=raw to the URL.
  11 # If you want to use wget, do
  12 #  $ wget -U wiki-download -O imap-find-duplicates http://johannes.sipsolutions.net/Projects/imap-find-duplicates?action=raw
  13 #
  14 # Copyright 2005 Johannes Berg <johannes@sipsolutions.net>
  15 # Released under GPLv2
  16 #
  17 
  18 import imaplib
  19 
  20 # edit this line:
  21 conn = imaplib.IMAP4_stream('MAIL=/home/johannes/.mail /usr/lib/dovecot/imap')
  22 # possibly add things here to log in to your server
  23 # see http://docs.python.org/lib/module-imaplib.html
  24 
  25 # no need to edit anything below here
  26 
  27 def get_msg_id_list(connection, mailbox):
  28   status,dummy = connection.select(mailbox, True)
  29   if status != 'OK':
  30     return []
  31   status,list = conn.fetch('1:*', '(BODY[HEADER.FIELDS (MESSAGE-ID)])')
  32   if status != 'OK':
  33     return []
  34   if list == [None]:
  35     return []
  36   result = []
  37   for item in list:
  38     if type(item) == type(''):
  39       continue
  40     idhdr = item[1].strip()
  41     if idhdr == '':
  42       # if message has no msgid, we can't really use it
  43       continue
  44     result += [(idhdr, mailbox, int(item[0].split()[0]))]
  45   return result
  46 
  47 status,list = conn.list()
  48 boxes = []
  49 for l in list:
  50   # FIXME: doesn't handle quotes in folder names!
  51   boxes += [l.split('"')[-2]]
  52 
  53 mails = []
  54 for box in boxes:
  55   mails += get_msg_id_list(conn, box)
  56 
  57 conn.close()
  58 mails.sort(lambda x,y: cmp(x[0],y[0]))
  59 
  60 old = ('',)
  61 printed=False
  62 for m in mails:
  63   if old[0] == m[0]:
  64     if not printed:
  65       print old[0],': ',old[1],old[2],
  66       printed = True
  67     print ',',m[1],m[2],
  68   else:
  69     if printed:
  70       print ''
  71     printed=False
  72   old = m