'''Remove duplicate Outlook contacts'''

# Usage: removedups.py -- to list all duplicate contacts
#        removedups.py remove -- to permanently delete all the duplicate contacts

# NOTE: try runninng under Python 2.3 if you have get an "Error: unable to access Outlook"
# (caused by win32com.client.gencache.EnsureDispatch "Invalid Syntax" error)

# Derived from Kevin Altis' Python Cookbook recipe
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/173216

if __name__ == '__main__':
    import win32com.client, sys

    exclude_fields = [ 'ConversationIndex', 'EntryID', 'OutlookInternalVersion', 'Size', 
                     'SelectedMailingAddress', 'Email1EntryID', 'Email2EntryID', 'Email3EntryID' ]

    if len(sys.argv) > 1 and sys.argv[1] == 'remove':
        remove_dups = True
    else:
        remove_dups = False
        
    print 'Accessing Outlook...'
    try:
        outlook = win32com.client.gencache.EnsureDispatch('Outlook.Application')
    except:
        print 'Error: unable to access Outlook'
        sys.exit(1)

    # Load all the contacts
    
    records = []
    contacts = []
    ofContacts = outlook.GetNamespace('MAPI').GetDefaultFolder(win32com.client.constants.olFolderContacts)
    
    print 'Loading %d contacts...' % len(ofContacts.Items)

    keys = None
    for i in xrange(len(ofContacts.Items)):
        contact = ofContacts.Items.Item(i + 1)
        if contact.Class == win32com.client.constants.olContact:
            
            # The first time we encounter a contact, determine which fields to compare
            # Ideally we would simply compare them all but I have determined empiracally 
            # some fields which are commonly different even when the contacts for all 
            # intents and purposes are identical
            if not keys:
                keys = []
                for key in contact._prop_map_get_:
                    if isinstance(getattr(contact, key), (int, str, unicode)):
                        if key not in exclude_fields:
                            keys.append(key)
            record = {}
            for key in keys:
                record[key] = getattr(contact, key)
            contacts.append(contact)
            records.append(record)
            
    # Figure out which ones are duplicates
    
    dup_count = 0
    for i in xrange(len(records)):
        recordA = records[i]
        for j in xrange(i + 1, len(records)):
            recordB = records[j]
            if recordA == recordB:
                if not recordA['FullName']:
                    nameA = recordA['FullName']
                    nameB = recordB['FullName']
                else:
                    nameA = recordA['CompanyName']
                    nameB = recordB['CompanyName']
                    
                print '%s (%d) looks like a dup of %s (%d)' % (nameB, j, nameA, i)
                if remove_dups:
                    print '   deleting %s (%d)' % (nameB, j)
                    contacts[j].Delete()
                dup_count += 1
                
    print '%d duplicates' % dup_count
