Skip to content

Remove Duplicates from List

Zero-dependency Python snippets for removing duplicates from lists using the standard library.

10 snippets available in this sub-category.


Simple

Remove duplicates preserving order

list duplicates remove order data-structures

Remove duplicates while preserving original order

def remove_duplicates_ordered(lst):
    """Remove duplicates from list while preserving order."""
    seen = set()
    result = []
    for item in lst:
        if item not in seen:
            seen.add(item)
            result.append(item)
    return result


numbers = [1, 2, 2, 3, 4, 4, 5, 1]
result = remove_duplicates_ordered(numbers)
print(result)  # [1, 2, 3, 4, 5]

Notes

  • Preserves original order
  • Uses set for O(1) lookup
  • Memory efficient
  • Works with any hashable items

Remove duplicates using set

list duplicates set conversion data-structures

Remove duplicates using set conversion

def remove_duplicates_set(lst):
    """Remove duplicates using set conversion."""
    return list(set(lst))


numbers = [1, 2, 2, 3, 4, 4, 5, 1]
result = remove_duplicates_set(numbers)
print(result)  # [1, 2, 3, 4, 5] (order may vary)

Notes

  • Simple one-liner
  • Order not guaranteed
  • Very efficient
  • Works with hashable items

Complex

Remove duplicates with custom key function

list duplicates key function custom data-structures

Remove duplicates using custom key function

def remove_duplicates_by_key(lst, key_func=None):
    """Remove duplicates based on custom key function."""
    if key_func is None:
        def key_func(x):
            return x

    seen = set()
    result = []
    for item in lst:
        key = key_func(item)
        if key not in seen:
            seen.add(key)
            result.append(item)
    return result


# Remove duplicates by first letter
words = ["apple", "banana", "apricot", "cherry", "blueberry"]
result = remove_duplicates_by_key(words, lambda x: x[0])
print(result)  # ['apple', 'banana', 'cherry']

# Remove duplicates by length
result2 = remove_duplicates_by_key(words, len)
print(result2)  # ['apple', 'banana', 'apricot']

Notes

  • Flexible key function
  • Preserves order
  • Useful for complex objects
  • Custom comparison logic

Remove duplicates with case-insensitive comparison

list duplicates case-insensitive string data-structures

Remove duplicates with case-insensitive comparison

def remove_duplicates_case_insensitive(lst):
    """Remove duplicates ignoring case for strings."""
    seen = set()
    result = []
    for item in lst:
        if isinstance(item, str):
            key = item.lower()
        else:
            key = item

        if key not in seen:
            seen.add(key)
            result.append(item)
    return result


words = ["Apple", "apple", "BANANA", "banana", "Cherry"]
result = remove_duplicates_case_insensitive(words)
print(result)  # ['Apple', 'BANANA', 'Cherry']

Notes

  • Handles string case variations
  • Preserves original case
  • Works with mixed data types
  • Common text processing need

Remove duplicates from list of dictionaries

list duplicates dictionary fields data-structures

Remove duplicate dictionaries by specified fields

def remove_duplicates_dicts(lst, key_fields=None):
    """Remove duplicate dictionaries based on specified fields."""
    if key_fields is None:
        key_fields = list(lst[0].keys()) if lst else []

    seen = set()
    result = []
    for item in lst:
        # Create tuple of key field values
        key = tuple(item.get(field) for field in key_fields)
        if key not in seen:
            seen.add(key)
            result.append(item)
    return result


people = [
    {"name": "Alice", "age": 25, "city": "NYC"},
    {"name": "Bob", "age": 30, "city": "LA"},
    {"name": "Alice", "age": 25, "city": "Boston"},
    {"name": "Charlie", "age": 35, "city": "NYC"},
]

# Remove duplicates by name and age
result = remove_duplicates_dicts(people, ["name", "age"])
print(
    result
)  # [{'name': 'Alice', 'age': 25, 'city': 'NYC'}, {'name': 'Bob', 'age': 30, 'city': 'LA'}, {'name': 'Charlie', 'age': 35, 'city': 'NYC'}]

Notes

  • Works with complex objects
  • Configurable field selection
  • Preserves original structure
  • Useful for data cleaning

Remove duplicates with frequency tracking

list duplicates frequency counter analysis data-structures

Remove duplicates with frequency analysis

from collections import Counter


def remove_duplicates_with_frequency(lst):
    """Remove duplicates and return frequency information."""
    counter = Counter(lst)
    unique_items = list(counter.keys())
    frequencies = list(counter.values())

    return {
        "unique_items": unique_items,
        "frequencies": frequencies,
        "total_duplicates": len(lst) - len(unique_items),
        "most_common": counter.most_common(1)[0] if counter else None,
    }


numbers = [1, 2, 2, 3, 4, 4, 4, 5, 1]
result = remove_duplicates_with_frequency(numbers)
print(f"Unique items: {result['unique_items']}")
print(f"Frequencies: {result['frequencies']}")
print(f"Total duplicates: {result['total_duplicates']}")
print(f"Most common: {result['most_common']}")

Notes

  • Provides frequency information
  • Uses Counter for efficiency
  • Statistical analysis
  • Data insights

Remove duplicates with custom comparison function

list duplicates custom comparison function data-structures

Remove duplicates with custom comparison function

def remove_duplicates_custom_compare(lst, compare_func):
    """Remove duplicates using custom comparison function."""
    result = []
    for item in lst:
        # Check if item is already in result using custom comparison
        is_duplicate = any(compare_func(item, existing) for existing in result)
        if not is_duplicate:
            result.append(item)
    return result


# Custom comparison: items are equal if their absolute difference is <= 1
def within_one(a, b):
    return abs(a - b) <= 1


numbers = [1, 2, 4, 5, 7, 8, 10]
result = remove_duplicates_custom_compare(numbers, within_one)
print(result)  # [1, 4, 7, 10]

Notes

  • Flexible comparison logic
  • Complex matching rules
  • Preserves order
  • Advanced use cases

Remove duplicates with memory optimization

list duplicates generator memory optimization data-structures

Remove duplicates using generator

def remove_duplicates_generator(lst):
    """Remove duplicates using generator for memory efficiency."""
    seen = set()
    for item in lst:
        if item not in seen:
            seen.add(item)
            yield item


numbers = [1, 2, 2, 3, 4, 4, 5, 1]
result = list(remove_duplicates_generator(numbers))
print(result)  # [1, 2, 3, 4, 5]

# Memory efficient iteration
for item in remove_duplicates_generator(numbers):
    print(item, end=" ")  # 1 2 3 4 5

Notes

  • Memory efficient
  • Lazy evaluation
  • Suitable for large lists
  • Generator pattern

Remove duplicates with performance monitoring

list duplicates performance timing monitoring data-structures

Remove duplicates with performance monitoring

import time


def remove_duplicates_ordered(lst):
    # Function is defined in one of the above code block
    pass


def remove_duplicates_set(lst):
    # Function is defined in one of the above code block
    pass


def remove_duplicates_with_timing(lst, method="ordered"):
    """Remove duplicates with performance monitoring."""
    start_time = time.time()

    if method == "ordered":
        result = remove_duplicates_ordered(lst)
    elif method == "set":
        result = remove_duplicates_set(lst)
    else:
        raise ValueError("Method must be 'ordered' or 'set'")

    end_time = time.time()

    return {
        "result": result,
        "execution_time": end_time - start_time,
        "original_length": len(lst),
        "final_length": len(result),
        "duplicates_removed": len(lst) - len(result),
    }


# Performance comparison
large_list = list(range(10000)) + list(range(5000))  # 15000 items with 5000 duplicates

ordered_stats = remove_duplicates_with_timing(large_list, "ordered")
set_stats = remove_duplicates_with_timing(large_list, "set")

print(f"Ordered method: {ordered_stats['execution_time']:.6f}s")
print(f"Set method: {set_stats['execution_time']:.6f}s")

Notes

  • Performance measurement
  • Method comparison
  • Benchmarking tool
  • Optimization insights

Remove duplicates with error handling

list duplicates safe error handling data-structures

Safely remove duplicates with error handling

def remove_duplicates_ordered(lst):
    # Function is defined in one of the above code block
    pass


def remove_duplicates_by_key(lst, key_func=None):
    # Function is defined in one of the above code block
    pass


def remove_duplicates_safe(lst, key_func=None):
    """Safely remove duplicates with error handling."""
    try:
        if not isinstance(lst, list):
            raise TypeError("Input must be a list")

        if not lst:
            return []

        if key_func is not None:
            return remove_duplicates_by_key(lst, key_func)
        else:
            return remove_duplicates_ordered(lst)

    except Exception as e:
        print(f"Error removing duplicates: {e}")
        return lst  # Return original list on error


# Safe duplicate removal
try:
    result = remove_duplicates_safe([1, 2, 2, 3, 4, 4, 5, 1])
    print(result)  # [1, 2, 3, 4, 5]
except Exception as e:
    print(f"Error: {e}")

Notes

  • Comprehensive error handling
  • Type validation
  • Graceful degradation
  • Production ready

🔗 Cross-References

🏷️ Tags

list, duplicates, remove, set, order, key, performance, data-structures

📝 Notes

  • Set-based removal is fastest but doesn't preserve order
  • Ordered removal is slower but maintains original sequence
  • Custom key functions enable complex deduplication logic
  • Consider memory usage for very large lists
  • Always handle edge cases like empty lists and non-hashable items