Skip to content

Dictionary Subset Operations

Zero-dependency Python snippets for creating subsets of dictionaries using the standard library.

13 snippets available in this sub-category.


Simple

Create subset by keys

dictionary subset keys filter data-structures

Create subset with specified keys

def subset_by_keys(d, keys):
    """Create subset with specified keys."""
    return {k: d[k] for k in keys if k in d}


person = {"name": "Alice", "age": 30, "city": "New York", "occupation": "Engineer", "salary": 75000}

result = subset_by_keys(person, ["name", "age", "city"])
print(result)  # {'name': 'Alice', 'age': 30, 'city': 'New York'}

Notes

  • Uses dict comprehension
  • Filters existing keys only
  • Preserves order
  • Simple and efficient

Create safe subset with defaults

dictionary subset safe default data-structures

Create subset with safe key access

def safe_subset(d, keys, default=None):
    """Create subset with safe key access and defaults."""
    return {k: d.get(k, default) for k in keys}


person = {"name": "Alice", "age": 30, "city": "New York"}

result = safe_subset(person, ["name", "age", "nonexistent"], "N/A")
print(result)  # {'name': 'Alice', 'age': 30, 'nonexistent': 'N/A'}

Notes

  • Uses get() method
  • Handles missing keys
  • Configurable default value
  • No KeyError exceptions

Filter by value type

dictionary subset type filter data-structures

Create subset with values of specific type

def subset_by_type(d, value_type):
    """Create subset with values of specific type."""
    return {k: v for k, v in d.items() if isinstance(v, value_type)}


mixed_data = {
    "name": "John",
    "age": 25,
    "height": 175.5,
    "is_student": True,
    "grades": [85, 90, 88],
}

string_data = subset_by_type(mixed_data, str)
print(string_data)  # {'name': 'John'}

numeric_data = subset_by_type(mixed_data, (int, float))
print(numeric_data)  # {'age': 25, 'height': 175.5}

Notes

  • Type-based filtering
  • Supports multiple types
  • Uses isinstance()
  • Flexible criteria

Complex

Filter by value conditions

dictionary subset condition filter lambda data-structures

Create subset based on value conditions

def subset_by_condition(d, condition_func):
    """Create subset based on value conditions."""
    return {k: v for k, v in d.items() if condition_func(v)}


person = {"name": "Alice", "age": 30, "salary": 75000, "city": "New York", "experience": 5}

# High salary filter
high_salary = subset_by_condition(person, lambda v: isinstance(v, int) and v > 50000)
print(high_salary)  # {'age': 30, 'salary': 75000}

# String values only
strings_only = subset_by_condition(person, lambda v: isinstance(v, str))
print(strings_only)  # {'name': 'Alice', 'city': 'New York'}

Notes

  • Function-based filtering
  • Flexible conditions
  • Lambda expressions
  • Complex criteria support

Filter by key patterns

dictionary subset pattern key-filter data-structures

Create subset based on key patterns

def subset_by_key_pattern(d, pattern_func):
    """Create subset based on key patterns."""
    return {k: v for k, v in d.items() if pattern_func(k)}


config = {
    "app_name": "MyApp",
    "app_version": "1.0.0",
    "db_host": "localhost",
    "db_port": 5432,
    "api_key": "secret123",
    "api_url": "https://api.example.com",
}

# Keys starting with 'app_'
app_config = subset_by_key_pattern(config, lambda k: k.startswith("app_"))
print(app_config)  # {'app_name': 'MyApp', 'app_version': '1.0.0'}

# Keys containing 'api'
api_config = subset_by_key_pattern(config, lambda k: "api" in k)
print(api_config)  # {'api_key': 'secret123', 'api_url': 'https://api.example.com'}

Notes

  • Pattern-based filtering
  • String operations
  • Regular expressions possible
  • Flexible key matching

Nested dictionary subset

dictionary subset nested deep-access data-structures

Create subset using nested key paths

def subset_nested(d, key_paths):
    """Create subset using nested key paths."""
    result = {}
    for path in key_paths:
        keys = path.split(".")
        value = d
        try:
            for key in keys:
                value = value[key]
            result[path] = value
        except (KeyError, TypeError):
            continue
    return result


nested_data = {
    "user": {
        "profile": {"name": "Alice", "age": 30},
        "settings": {"theme": "dark", "notifications": True},
    },
    "app": {"version": "1.0.0"},
}

paths = ["user.profile.name", "user.settings.theme", "app.version"]
result = subset_nested(nested_data, paths)
print(
    result
)  # {'user.profile.name': 'Alice', 'user.settings.theme': 'dark', 'app.version': '1.0.0'}

Notes

  • Dot notation paths
  • Deep key access
  • Error handling
  • Flexible depth

Subset by value range

dictionary subset range numeric data-structures

Create subset with values within range

def subset_by_range(d, min_val=None, max_val=None):
    """Create subset with values within specified range."""
    result = {}
    for k, v in d.items():
        if not isinstance(v, (int, float)):
            continue
        if min_val is not None and v < min_val:
            continue
        if max_val is not None and v > max_val:
            continue
        result[k] = v
    return result


scores = {"alice": 85, "bob": 92, "charlie": 78, "diana": 95, "eve": 88}

high_scores = subset_by_range(scores, min_val=90)
print(high_scores)  # {'bob': 92, 'diana': 95}

medium_scores = subset_by_range(scores, min_val=80, max_val=90)
print(medium_scores)  # {'alice': 85, 'eve': 88}

Notes

  • Numeric value filtering
  • Configurable bounds
  • Inclusive ranges
  • Type checking

Extract public data

dictionary subset security public-data data-structures

Extract only public/safe data fields

def extract_public_data(data, public_keys):
    """Extract only public/safe data fields."""
    return {k: v for k, v in data.items() if k in public_keys}


user_data = {
    "user_id": 12345,
    "username": "john_doe",
    "email": "john@example.com",
    "first_name": "John",
    "last_name": "Doe",
    "password": "secret123",
    "api_key": "key123",
}

public_keys = {"username", "first_name", "last_name"}
public_profile = extract_public_data(user_data, public_keys)
print(public_profile)  # {'username': 'john_doe', 'first_name': 'John', 'last_name': 'Doe'}

Notes

  • Security-focused filtering
  • Whitelist approach
  • Data privacy
  • Safe data sharing

Filter sensitive data

dictionary subset security blacklist data-structures

Remove sensitive fields from data

def filter_sensitive_data(data, sensitive_keys):
    """Remove sensitive fields from data."""
    return {k: v for k, v in data.items() if k not in sensitive_keys}


user_data = {
    "user_id": 12345,
    "username": "john_doe",
    "email": "john@example.com",
    "password": "secret123",
    "api_key": "key123",
    "token": "token123",
}

sensitive_keys = {"password", "api_key", "token"}
safe_data = filter_sensitive_data(user_data, sensitive_keys)
print(safe_data)  # {'user_id': 12345, 'username': 'john_doe', 'email': 'john@example.com'}

Notes

  • Blacklist approach
  • Security filtering
  • Data sanitization
  • Safe logging

Edge Cases

Handle empty dictionaries

dictionary subset error-handling edge-case data-structures

Create subset with robust error handling

def robust_subset(d, keys, default=None):
    """Create subset with robust error handling."""
    if not isinstance(d, dict):
        return {}
    if not keys:
        return {}
    return {k: d.get(k, default) for k in keys}


# Test with empty dictionary
empty_dict = {}
result = robust_subset(empty_dict, ["a", "b", "c"])
print(result)  # {}

# Test with None input
result2 = robust_subset(None, ["a", "b"])
print(result2)  # {}

Notes

  • Input validation
  • Type checking
  • Empty input handling
  • Safe defaults

Performance optimization

dictionary subset performance optimization data-structures

Efficient subset creation for large dictionaries

def efficient_subset(d, keys):
    """Efficient subset creation for large dictionaries."""
    key_set = set(keys)  # O(1) lookup
    return {k: v for k, v in d.items() if k in key_set}


# Benchmark comparison
import time

large_dict = {f"key_{i}": f"value_{i}" for i in range(100000)}
keys_to_extract = [f"key_{i}" for i in range(0, 100000, 1000)]

# Method 1: List comprehension
start = time.time()
result1 = {k: large_dict[k] for k in keys_to_extract}
time1 = time.time() - start

# Method 2: Set-based lookup
start = time.time()
result2 = efficient_subset(large_dict, keys_to_extract)
time2 = time.time() - start

print(f"List method: {time1:.6f}s")
print(f"Set method: {time2:.6f}s")
print(f"Speedup: {time1 / time2:.2f}x")

Notes

  • Set-based lookup
  • O(1) key checking
  • Large dataset optimization
  • Benchmarking included

Practical Examples

Configuration management

dictionary subset configuration environment data-structures

Extract environment-specific configuration

def extract_env_config(full_config, environment):
    """Extract environment-specific configuration."""
    env_prefix = f"{environment}_"
    return {
        k: v
        for k, v in full_config.items()
        if k.startswith(env_prefix) or k in ["app_name", "debug"]
    }


config = {
    "app_name": "MyApp",
    "debug": True,
    "dev_host": "localhost",
    "dev_port": 8000,
    "prod_host": "api.myapp.com",
    "prod_port": 443,
    "database_url": "postgresql://localhost/mydb",
}

dev_config = extract_env_config(config, "dev")
print(dev_config)  # {'app_name': 'MyApp', 'debug': True, 'dev_host': 'localhost', 'dev_port': 8000}

Notes

  • Environment-based filtering
  • Prefix matching
  • Configuration management
  • Deployment specific

Data analytics filtering

dictionary subset analytics data-processing data-structures

Extract data suitable for analytics

def extract_analytics_data(user_data):
    """Extract data suitable for analytics."""
    return {
        k: v
        for k, v in user_data.items()
        if isinstance(v, (int, float))
        or "date" in k.lower()
        or "created" in k.lower()
        or "count" in k.lower()
    }


user_data = {
    "user_id": 12345,
    "username": "john_doe",
    "age": 30,
    "created_at": "2023-01-15",
    "last_login": "2024-01-20",
    "login_count": 150,
    "preferences": {"theme": "dark"},
}

analytics_data = extract_analytics_data(user_data)
print(
    analytics_data
)  # {'user_id': 12345, 'age': 30, 'created_at': '2023-01-15', 'last_login': '2024-01-20', 'login_count': 150}

Notes

  • Analytics-focused filtering
  • Pattern-based selection
  • Metric extraction
  • Data science ready

🔗 Cross-References

🏷️ Tags

dictionary, subset, filtering, data-processing, python, dict-comprehension, key-filtering, value-filtering, nested-dictionaries, performance, best-practices

📝 Notes

  • Subset Functions Simplify Reusable Patterns
  • Filtering by Keys, Values, or Patterns Offers Precision
  • Support for Nested Access Increases Utility