Dictionary Subset Operations¶
Zero-dependency Python snippets for creating subsets of dictionaries using the standard library.
13 snippets available in this sub-category.
Simple¶
Create subset by keys¶
dictionary
subset
keys
filter
data-structures
Create subset with specified keys
def subset_by_keys(d, keys):
"""Create subset with specified keys."""
return {k: d[k] for k in keys if k in d}
person = {"name": "Alice", "age": 30, "city": "New York", "occupation": "Engineer", "salary": 75000}
result = subset_by_keys(person, ["name", "age", "city"])
print(result) # {'name': 'Alice', 'age': 30, 'city': 'New York'}
Notes
- Uses dict comprehension
- Filters existing keys only
- Preserves order
- Simple and efficient
Create safe subset with defaults¶
dictionary
subset
safe
default
data-structures
Create subset with safe key access
def safe_subset(d, keys, default=None):
"""Create subset with safe key access and defaults."""
return {k: d.get(k, default) for k in keys}
person = {"name": "Alice", "age": 30, "city": "New York"}
result = safe_subset(person, ["name", "age", "nonexistent"], "N/A")
print(result) # {'name': 'Alice', 'age': 30, 'nonexistent': 'N/A'}
Notes
- Uses get() method
- Handles missing keys
- Configurable default value
- No KeyError exceptions
Filter by value type¶
dictionary
subset
type
filter
data-structures
Create subset with values of specific type
def subset_by_type(d, value_type):
"""Create subset with values of specific type."""
return {k: v for k, v in d.items() if isinstance(v, value_type)}
mixed_data = {
"name": "John",
"age": 25,
"height": 175.5,
"is_student": True,
"grades": [85, 90, 88],
}
string_data = subset_by_type(mixed_data, str)
print(string_data) # {'name': 'John'}
numeric_data = subset_by_type(mixed_data, (int, float))
print(numeric_data) # {'age': 25, 'height': 175.5}
Notes
- Type-based filtering
- Supports multiple types
- Uses isinstance()
- Flexible criteria
Complex¶
Filter by value conditions¶
dictionary
subset
condition
filter
lambda
data-structures
Create subset based on value conditions
def subset_by_condition(d, condition_func):
"""Create subset based on value conditions."""
return {k: v for k, v in d.items() if condition_func(v)}
person = {"name": "Alice", "age": 30, "salary": 75000, "city": "New York", "experience": 5}
# High salary filter
high_salary = subset_by_condition(person, lambda v: isinstance(v, int) and v > 50000)
print(high_salary) # {'age': 30, 'salary': 75000}
# String values only
strings_only = subset_by_condition(person, lambda v: isinstance(v, str))
print(strings_only) # {'name': 'Alice', 'city': 'New York'}
Notes
- Function-based filtering
- Flexible conditions
- Lambda expressions
- Complex criteria support
Filter by key patterns¶
dictionary
subset
pattern
key-filter
data-structures
Create subset based on key patterns
def subset_by_key_pattern(d, pattern_func):
"""Create subset based on key patterns."""
return {k: v for k, v in d.items() if pattern_func(k)}
config = {
"app_name": "MyApp",
"app_version": "1.0.0",
"db_host": "localhost",
"db_port": 5432,
"api_key": "secret123",
"api_url": "https://api.example.com",
}
# Keys starting with 'app_'
app_config = subset_by_key_pattern(config, lambda k: k.startswith("app_"))
print(app_config) # {'app_name': 'MyApp', 'app_version': '1.0.0'}
# Keys containing 'api'
api_config = subset_by_key_pattern(config, lambda k: "api" in k)
print(api_config) # {'api_key': 'secret123', 'api_url': 'https://api.example.com'}
Notes
- Pattern-based filtering
- String operations
- Regular expressions possible
- Flexible key matching
Nested dictionary subset¶
dictionary
subset
nested
deep-access
data-structures
Create subset using nested key paths
def subset_nested(d, key_paths):
"""Create subset using nested key paths."""
result = {}
for path in key_paths:
keys = path.split(".")
value = d
try:
for key in keys:
value = value[key]
result[path] = value
except (KeyError, TypeError):
continue
return result
nested_data = {
"user": {
"profile": {"name": "Alice", "age": 30},
"settings": {"theme": "dark", "notifications": True},
},
"app": {"version": "1.0.0"},
}
paths = ["user.profile.name", "user.settings.theme", "app.version"]
result = subset_nested(nested_data, paths)
print(
result
) # {'user.profile.name': 'Alice', 'user.settings.theme': 'dark', 'app.version': '1.0.0'}
Notes
- Dot notation paths
- Deep key access
- Error handling
- Flexible depth
Subset by value range¶
dictionary
subset
range
numeric
data-structures
Create subset with values within range
def subset_by_range(d, min_val=None, max_val=None):
"""Create subset with values within specified range."""
result = {}
for k, v in d.items():
if not isinstance(v, (int, float)):
continue
if min_val is not None and v < min_val:
continue
if max_val is not None and v > max_val:
continue
result[k] = v
return result
scores = {"alice": 85, "bob": 92, "charlie": 78, "diana": 95, "eve": 88}
high_scores = subset_by_range(scores, min_val=90)
print(high_scores) # {'bob': 92, 'diana': 95}
medium_scores = subset_by_range(scores, min_val=80, max_val=90)
print(medium_scores) # {'alice': 85, 'eve': 88}
Notes
- Numeric value filtering
- Configurable bounds
- Inclusive ranges
- Type checking
Extract public data¶
dictionary
subset
security
public-data
data-structures
Extract only public/safe data fields
def extract_public_data(data, public_keys):
"""Extract only public/safe data fields."""
return {k: v for k, v in data.items() if k in public_keys}
user_data = {
"user_id": 12345,
"username": "john_doe",
"email": "john@example.com",
"first_name": "John",
"last_name": "Doe",
"password": "secret123",
"api_key": "key123",
}
public_keys = {"username", "first_name", "last_name"}
public_profile = extract_public_data(user_data, public_keys)
print(public_profile) # {'username': 'john_doe', 'first_name': 'John', 'last_name': 'Doe'}
Notes
- Security-focused filtering
- Whitelist approach
- Data privacy
- Safe data sharing
Filter sensitive data¶
dictionary
subset
security
blacklist
data-structures
Remove sensitive fields from data
def filter_sensitive_data(data, sensitive_keys):
"""Remove sensitive fields from data."""
return {k: v for k, v in data.items() if k not in sensitive_keys}
user_data = {
"user_id": 12345,
"username": "john_doe",
"email": "john@example.com",
"password": "secret123",
"api_key": "key123",
"token": "token123",
}
sensitive_keys = {"password", "api_key", "token"}
safe_data = filter_sensitive_data(user_data, sensitive_keys)
print(safe_data) # {'user_id': 12345, 'username': 'john_doe', 'email': 'john@example.com'}
Notes
- Blacklist approach
- Security filtering
- Data sanitization
- Safe logging
Edge Cases¶
Handle empty dictionaries¶
dictionary
subset
error-handling
edge-case
data-structures
Create subset with robust error handling
def robust_subset(d, keys, default=None):
"""Create subset with robust error handling."""
if not isinstance(d, dict):
return {}
if not keys:
return {}
return {k: d.get(k, default) for k in keys}
# Test with empty dictionary
empty_dict = {}
result = robust_subset(empty_dict, ["a", "b", "c"])
print(result) # {}
# Test with None input
result2 = robust_subset(None, ["a", "b"])
print(result2) # {}
Notes
- Input validation
- Type checking
- Empty input handling
- Safe defaults
Performance optimization¶
dictionary
subset
performance
optimization
data-structures
Efficient subset creation for large dictionaries
def efficient_subset(d, keys):
"""Efficient subset creation for large dictionaries."""
key_set = set(keys) # O(1) lookup
return {k: v for k, v in d.items() if k in key_set}
# Benchmark comparison
import time
large_dict = {f"key_{i}": f"value_{i}" for i in range(100000)}
keys_to_extract = [f"key_{i}" for i in range(0, 100000, 1000)]
# Method 1: List comprehension
start = time.time()
result1 = {k: large_dict[k] for k in keys_to_extract}
time1 = time.time() - start
# Method 2: Set-based lookup
start = time.time()
result2 = efficient_subset(large_dict, keys_to_extract)
time2 = time.time() - start
print(f"List method: {time1:.6f}s")
print(f"Set method: {time2:.6f}s")
print(f"Speedup: {time1 / time2:.2f}x")
Notes
- Set-based lookup
- O(1) key checking
- Large dataset optimization
- Benchmarking included
Practical Examples¶
Configuration management¶
dictionary
subset
configuration
environment
data-structures
Extract environment-specific configuration
def extract_env_config(full_config, environment):
"""Extract environment-specific configuration."""
env_prefix = f"{environment}_"
return {
k: v
for k, v in full_config.items()
if k.startswith(env_prefix) or k in ["app_name", "debug"]
}
config = {
"app_name": "MyApp",
"debug": True,
"dev_host": "localhost",
"dev_port": 8000,
"prod_host": "api.myapp.com",
"prod_port": 443,
"database_url": "postgresql://localhost/mydb",
}
dev_config = extract_env_config(config, "dev")
print(dev_config) # {'app_name': 'MyApp', 'debug': True, 'dev_host': 'localhost', 'dev_port': 8000}
Notes
- Environment-based filtering
- Prefix matching
- Configuration management
- Deployment specific
Data analytics filtering¶
dictionary
subset
analytics
data-processing
data-structures
Extract data suitable for analytics
def extract_analytics_data(user_data):
"""Extract data suitable for analytics."""
return {
k: v
for k, v in user_data.items()
if isinstance(v, (int, float))
or "date" in k.lower()
or "created" in k.lower()
or "count" in k.lower()
}
user_data = {
"user_id": 12345,
"username": "john_doe",
"age": 30,
"created_at": "2023-01-15",
"last_login": "2024-01-20",
"login_count": 150,
"preferences": {"theme": "dark"},
}
analytics_data = extract_analytics_data(user_data)
print(
analytics_data
) # {'user_id': 12345, 'age': 30, 'created_at': '2023-01-15', 'last_login': '2024-01-20', 'login_count': 150}
Notes
- Analytics-focused filtering
- Pattern-based selection
- Metric extraction
- Data science ready
🔗 Cross-References¶
- Reference: See 📂 Dict Comprehension
- Reference: See 📂 Dict Merge
- Reference: See 📂 Dict Invert
- Reference: See 📂 Dict Sort
- Reference: See 📂 Dict Nested
🏷️ Tags¶
dictionary
, subset
, filtering
, data-processing
, python
, dict-comprehension
, key-filtering
, value-filtering
, nested-dictionaries
, performance
, best-practices
📝 Notes¶
- Subset Functions Simplify Reusable Patterns
- Filtering by Keys, Values, or Patterns Offers Precision
- Support for Nested Access Increases Utility