1 #
2 # This tool generates on stdout a list of message IDs
3 # that occur twice on your imap server. It looks through
4 # all folders you have, so is really a resource hog!!
5 #
6 # You do need to edit the line to connect to your imap server.
7 # The example below works against your local dovecot server if
8 # you store your mail in /home/johannes/.mail (maildir!)
9 #
10 # To download from this site, append ?action=raw to the URL.
11 # If you want to use wget, do
12 # $ wget -U wiki-download -O imap-find-duplicates http://johannes.sipsolutions.net/Projects/imap-find-duplicates?action=raw
13 #
14 # Copyright 2005 Johannes Berg <johannes@sipsolutions.net>
15 # Released under GPLv2
16 #
17
18 import imaplib
19
20 # edit this line:
21 conn = imaplib.IMAP4_stream('MAIL=/home/johannes/.mail /usr/lib/dovecot/imap')
22 # possibly add things here to log in to your server
23 # see http://docs.python.org/lib/module-imaplib.html
24
25 # no need to edit anything below here
26
27 def get_msg_id_list(connection, mailbox):
28 status,dummy = connection.select(mailbox, True)
29 if status != 'OK':
30 return []
31 status,list = conn.fetch('1:*', '(BODY[HEADER.FIELDS (MESSAGE-ID)])')
32 if status != 'OK':
33 return []
34 if list == [None]:
35 return []
36 result = []
37 for item in list:
38 if type(item) == type(''):
39 continue
40 idhdr = item[1].strip()
41 if idhdr == '':
42 # if message has no msgid, we can't really use it
43 continue
44 result += [(idhdr, mailbox, int(item[0].split()[0]))]
45 return result
46
47 status,list = conn.list()
48 boxes = []
49 for l in list:
50 # FIXME: doesn't handle quotes in folder names!
51 boxes += [l.split('"')[-2]]
52
53 mails = []
54 for box in boxes:
55 mails += get_msg_id_list(conn, box)
56
57 conn.close()
58 mails.sort(lambda x,y: cmp(x[0],y[0]))
59
60 old = ('',)
61 printed=False
62 for m in mails:
63 if old[0] == m[0]:
64 if not printed:
65 print old[0],': ',old[1],old[2],
66 printed = True
67 print ',',m[1],m[2],
68 else:
69 if printed:
70 print ''
71 printed=False
72 old = m


