Detecting Data Bias with Kullback-Leibler Divergence | by Nikolas Weissmueller

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

def generate_sample(measurement, imply=0, std=1):
return np.random.regular(imply, std, measurement)

def weighted_sample(measurement, weight_func, imply=0, std=1):
x = np.random.regular(imply, std, measurement * 2)
w = weight_func(x)
return x[np.random.random(size * 2) < w / np.max(w)][:size]

def polynomial_weight(x, a):
return 1 + a * x**2

def exponential_weight(x, a):
return np.exp(a * x)

def calculate_kl_divergence(p, q):
# Use kernel density estimation for smoother pdf estimates
kde_p = stats.gaussian_kde(p)
kde_q = stats.gaussian_kde(q)
x = np.linspace(min(p.min(), q.min()), max(p.max(), q.max()), 1000)
return np.sum(kde_p(x) * (np.log(kde_p(x)) – np.log(kde_q(x)))) * (x[1] – x[0])

def calculate_kld_curve(sample_sizes, reference_dist, test_dist, num_runs=10):
kld_values = []
for measurement in sample_sizes:
klds = []
for _ in vary(num_runs):
p = reference_dist(measurement)
q = test_dist(measurement)
klds.append(calculate_kl_divergence(p, q))
kld_values.append(np.imply(klds))
return kld_values

def plot_kld_curves(sample_sizes, curves, labels):
plt.determine(figsize=(12, 7))
for curve, label in zip(curves, labels):
plt.plot(sample_sizes, curve, label=label, marker=’o’, markersize=4)
plt.xscale(‘log’)
plt.yscale(‘log’)
plt.xlabel(‘Pattern Measurement’)
plt.ylabel(‘KL Divergence’)
plt.legend()
plt.title(‘KL Divergence vs Pattern Measurement’)
plt.grid(True, which=”each”, ls=”-“, alpha=0.2)
plt.tight_layout()
plt.present()

# Essential execution
n = 10000
sample_sizes = np.logspace(1, 5, 30).astype(int)

# Outline distribution turbines
unweighted_dist = lambda measurement: generate_sample(measurement)
poly_weighted_dist = lambda measurement: weighted_sample(measurement, lambda x: polynomial_weight(x, 0.5))
exp_weighted_dist = lambda measurement: weighted_sample(measurement, lambda x: exponential_weight(x, 0.2))

# Calculate KLD curves
unweighted_curve = calculate_kld_curve(sample_sizes, unweighted_dist, unweighted_dist)
poly_weighted_curve = calculate_kld_curve(sample_sizes, unweighted_dist, poly_weighted_dist)
exp_weighted_curve = calculate_kld_curve(sample_sizes, unweighted_dist, exp_weighted_dist)

# Plot outcomes
plot_kld_curves(sample_sizes,
[unweighted_curve, poly_weighted_curve, exp_weighted_curve],
[‘Unweighted’, ‘Polynomial Weighted’, ‘Exponential Weighted’])

Source link

Explaining dropout technique in deep learning | by Fernando Jean Dijkinga, M.Sc. | Sep, 2024

Teknik Prompt Yang Jelas dan Spesifik — Bagian 2/5 | by trirachmat | Sep, 2024

Building an End-to-End Machine Learning Pipeline with Azure Data Factory | by Kishan Rasikbhai Akbari | Sep, 2024

Leave A Reply Cancel Reply

Netflix teases the next seasons of Avatar, Squid Game and Arcane at Geeked Week

Explaining dropout technique in deep learning | by Fernando Jean Dijkinga, M.Sc. | Sep, 2024

Teknik Prompt Yang Jelas dan Spesifik — Bagian 2/5 | by trirachmat | Sep, 2024

Building an End-to-End Machine Learning Pipeline with Azure Data Factory | by Kishan Rasikbhai Akbari | Sep, 2024

8 Insights from Working with LLM Recently | by Mr.Data | Sep, 2024

Most Popular

The Hamas Threat of Hostage Execution Videos Looms Large Over Social Media

Revolutionizing the Way We Find Love

Federal Investigators Widen Tesla Inquiry, Company Says

Our Picks

Netflix teases the next seasons of Avatar, Squid Game and Arcane at Geeked Week

Explaining dropout technique in deep learning | by Fernando Jean Dijkinga, M.Sc. | Sep, 2024

Teknik Prompt Yang Jelas dan Spesifik — Bagian 2/5 | by trirachmat | Sep, 2024

Detecting Data Bias with Kullback-Leibler Divergence | by Nikolas Weissmueller | Sep, 2024

Related Posts

Leave A Reply Cancel Reply