Advanced Statistics Operations¶
Zero-dependency Python snippets for advanced statistics using the standard library.
10 snippets available in this sub-category.
Simple¶
Calculate variance and standard deviation¶
math
statistics
variance
stddev
advanced
Calculate variance and standard deviation
import statistics
def variance(values, sample=True):
"""Calculate variance (sample or population)."""
if sample:
return statistics.variance(values)
else:
return statistics.pvariance(values)
def stddev(values, sample=True):
"""Calculate standard deviation (sample or population)."""
if sample:
return statistics.stdev(values)
else:
return statistics.pstdev(values)
# Examples
nums = [2, 4, 4, 4, 5, 5, 7, 9]
print(f"Sample variance: {variance(nums):.2f}") # 4.57
print(f"Population variance: {variance(nums, False):.2f}") # 4.00
print(f"Sample stddev: {stddev(nums):.2f}") # 2.14
print(f"Population stddev: {stddev(nums, False):.2f}") # 2.00
Notes
- Sample vs population
- Uses statistics module
- Returns float
Calculate quantiles, percentiles, and IQR¶
math
statistics
quantiles
percentiles
iqr
advanced
Calculate quantiles, percentiles, and IQR
import statistics
def quantiles(values, n=4):
"""Divide data into n quantiles (default: quartiles)."""
return statistics.quantiles(values, n=n)
def percentile(values, percent):
"""Calculate the value at a given percentile (0-100)."""
k = (len(values) - 1) * percent / 100
f = int(k)
c = min(f + 1, len(values) - 1)
if f == c:
return sorted(values)[int(k)]
d0 = sorted(values)[f] * (c - k)
d1 = sorted(values)[c] * (k - f)
return d0 + d1
def interquartile_range(values):
"""Calculate interquartile range (IQR)."""
q1, q3 = quantiles(values, 4)[0], quantiles(values, 4)[2]
return q3 - q1
# Examples
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9]
print(f"Quartiles: {quantiles(nums)}") # [3.0, 5.0, 7.0]
print(f"90th percentile: {percentile(nums, 90)}") # 8.2
print(f"IQR: {interquartile_range(nums)}") # 4.0
Notes
- Quartiles, percentiles
- Interquartile range for spread
- Useful for box plots
Calculate skewness and kurtosis (manual)¶
math
statistics
skewness
kurtosis
advanced
Calculate skewness and kurtosis
import statistics
def skewness(values):
"""Calculate sample skewness (Fisher-Pearson)."""
n = len(values)
mean_val = statistics.mean(values)
std_val = statistics.stdev(values)
return (sum((x - mean_val) ** 3 for x in values) / n) / (std_val**3)
def kurtosis(values):
"""Calculate sample kurtosis (excess)."""
n = len(values)
mean_val = statistics.mean(values)
std_val = statistics.stdev(values)
return (sum((x - mean_val) ** 4 for x in values) / n) / (std_val**4) - 3
# Examples
nums = [2, 4, 4, 4, 5, 5, 7, 9]
print(f"Skewness: {skewness(nums):.2f}") # ~0.63
print(f"Kurtosis: {kurtosis(nums):.2f}") # ~-0.86
Notes
- Manual calculation
- Fisher-Pearson skewness
- Excess kurtosis
- Useful for distribution shape
Complex¶
Covariance and correlation¶
math
statistics
covariance
correlation
advanced
Calculate covariance and correlation
import statistics
def covariance(x, y, sample=True):
"""Calculate covariance between two variables."""
n = len(x)
mean_x = statistics.mean(x)
mean_y = statistics.mean(y)
cov = sum((xi - mean_x) * (yi - mean_y) for xi, yi in zip(x, y))
if sample:
return cov / (n - 1)
else:
return cov / n
def correlation(x, y):
"""Calculate Pearson correlation coefficient."""
std_x = statistics.stdev(x)
std_y = statistics.stdev(y)
return covariance(x, y) / (std_x * std_y)
# Examples
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
print(f"Covariance: {covariance(x, y):.2f}") # 5.0
print(f"Correlation: {correlation(x, y):.2f}") # 1.0
Notes
- Covariance for joint variability
- Pearson correlation for linear relationship
Z-scores and standardization¶
math
statistics
z-score
standardization
normalization
Calculate z-scores and standardize data
import statistics
def z_scores(values):
"""Calculate z-scores for a list of values."""
mean_val = statistics.mean(values)
std_val = statistics.stdev(values)
return [(x - mean_val) / std_val for x in values]
def standardize(values):
"""Standardize values to mean 0, std 1."""
mean_val = statistics.mean(values)
std_val = statistics.stdev(values)
return [(x - mean_val) / std_val for x in values]
# Examples
nums = [10, 12, 14, 16, 18]
print(f"Z-scores: {z_scores(nums)}")
print(f"Standardized: {standardize(nums)}")
Notes
- Z-scores for outlier detection
- Standardization for normalization
Moving average and rolling statistics¶
math
statistics
moving-average
rolling
window
Calculate moving average and rolling statistics
import statistics
def moving_average(values, window):
"""Calculate moving average with given window size."""
if window <= 0:
raise ValueError("Window size must be positive")
return [sum(values[i : i + window]) / window for i in range(len(values) - window + 1)]
def rolling_stddev(values, window):
"""Calculate rolling standard deviation."""
if window <= 0:
raise ValueError("Window size must be positive")
return [statistics.stdev(values[i : i + window]) for i in range(len(values) - window + 1)]
# Examples
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print(f"Moving average (3): {moving_average(nums, 3)}")
print(f"Rolling stddev (3): {rolling_stddev(nums, 3)}")
Notes
- Moving average for smoothing
- Rolling statistics for time series
Edge Cases¶
Handle edge cases in advanced statistics¶
math
statistics
error-handling
edge-case
validation
Robust advanced statistics with edge case handling
import statistics
def covariance(x, y, sample=True):
# Function is defined in one of the above code block
pass
def safe_variance(values):
"""Safe variance calculation with error handling."""
try:
return statistics.variance(values)
except statistics.StatisticsError:
return 0
def safe_stddev(values):
"""Safe stddev calculation with error handling."""
try:
return statistics.stdev(values)
except statistics.StatisticsError:
return 0
def safe_covariance(x, y):
"""Safe covariance calculation with error handling."""
try:
return covariance(x, y)
except Exception:
return 0
# Test edge cases
print(safe_variance([1])) # 0
print(safe_stddev([1])) # 0
print(safe_covariance([1], [1])) # 0
Notes
- Handles small/empty lists
- Returns 0 for invalid input
- Avoids exceptions in user-facing code
Performance comparison¶
math
statistics
performance
benchmarking
Benchmark advanced statistics calculations
import time
import statistics
def benchmark_advanced_statistics():
"""Benchmark variance and stddev calculations."""
nums = list(range(1000)) * 100
start = time.time()
statistics.variance(nums)
var_time = time.time() - start
start = time.time()
statistics.stdev(nums)
std_time = time.time() - start
print(f"Variance: {var_time:.6f}s, Stddev: {std_time:.6f}s")
# benchmark_advanced_statistics()
Notes
- Performance comparison
- Useful for large datasets
Practical Examples¶
Outlier detection and normalization¶
math
statistics
outliers
normalization
minmax
Detect outliers and normalize data
def z_scores(values):
# Function is defined in one of the above code block
pass
def detect_outliers(values, threshold=2):
"""Detect outliers using z-score method."""
zs = z_scores(values)
return [x for x, z in zip(values, zs) if abs(z) > threshold]
def normalize_minmax(values):
"""Normalize values to [0, 1] range."""
min_val = min(values)
max_val = max(values)
return [(x - min_val) / (max_val - min_val) if max_val > min_val else 0 for x in values]
# Examples
nums = [10, 12, 14, 16, 100]
print(f"Outliers: {detect_outliers(nums)}")
print(f"Min-max normalized: {normalize_minmax(nums)}")
Notes
- Z-score outlier detection
- Min-max normalization
- Useful for preprocessing
Linear regression (simple)¶
math
statistics
regression
linear
prediction
Perform simple linear regression
import statistics
def linear_regression(x, y):
"""Calculate slope and intercept for simple linear regression."""
_ = len(x)
mean_x = statistics.mean(x)
mean_y = statistics.mean(y)
num = sum((xi - mean_x) * (yi - mean_y) for xi, yi in zip(x, y))
den = sum((xi - mean_x) ** 2 for xi in x)
slope = num / den
intercept = mean_y - slope * mean_x
return slope, intercept
def predict(x, slope, intercept):
"""Predict y value given x, slope, and intercept."""
return slope * x + intercept
# Examples
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
slope, intercept = linear_regression(x, y)
print(f"Slope: {slope:.2f}, Intercept: {intercept:.2f}")
print(f"Prediction for x=6: {predict(6, slope, intercept):.2f}")
Notes
- Simple linear regression
- Slope and intercept
- Prediction function
🔗 Cross-References¶
- Reference: See 📂 Statistics Basic
- Reference: See 📂 Round Number
- Reference: See 📂 Format Number
- Reference: See 📂 Percentage
🏷️ Tags¶
math
, statistics
, variance
, stddev
, correlation
, regression
, outliers
, normalization
, performance
, edge-case
, best-practices
📝 Notes¶
- Advanced Statistics Support Data Science and Analytics
- Use for Variability, Correlation, and Regression
- Edge Case Handling Ensures Robustness
- Performance Suitable for Large Datasets