fork download
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. from sklearn.datasets import make_blobs
  5. from sklearn.cluster import KMeans, AgglomerativeClustering
  6. from sklearn.metrics import silhouette_score
  7. from scipy.cluster.hierarchy import dendrogram, linkage
  8.  
  9. # Step 1: Generate a synthetic dataset
  10. data, labels_true = make_blobs(n_samples=300, centers=4, cluster_std=1.0, random_state=42)
  11.  
  12. # Step 2: Apply K-Means Clustering
  13. kmeans = KMeans(n_clusters=4, random_state=42)
  14. kmeans_labels = kmeans.fit_predict(data)
  15. kmeans_silhouette = silhouette_score(data, kmeans_labels)
  16.  
  17. # Step 3: Apply Hierarchical Clustering
  18. hierarchical = AgglomerativeClustering(n_clusters=4)
  19. hierarchical_labels = hierarchical.fit_predict(data)
  20. hierarchical_silhouette = silhouette_score(data, hierarchical_labels)
  21.  
  22. # Step 4: Visualize Results
  23. fig, axes = plt.subplots(1, 3, figsize=(18, 6))
  24.  
  25. # Original Data
  26. axes[0].scatter(data[:, 0], data[:, 1], c=labels_true, cmap='viridis', s=50, alpha=0.6)
  27. axes[0].set_title("Original Data")
  28.  
  29. # K-Means Clustering
  30. axes[1].scatter(data[:, 0], data[:, 1], c=kmeans_labels, cmap='viridis', s=50, alpha=0.6)
  31. axes[1].set_title(f"K-Means Clustering\nSilhouette Score: {kmeans_silhouette:.2f}")
  32.  
  33. # Hierarchical Clustering
  34. axes[2].scatter(data[:, 0], data[:, 1], c=hierarchical_labels, cmap='viridis', s=50, alpha=0.6)
  35. axes[2].set_title(f"Hierarchical Clustering\nSilhouette Score: {hierarchical_silhouette:.2f}")
  36.  
  37. plt.tight_layout()
  38. plt.show()
  39.  
  40. # Step 5: Create a Dendrogram for Hierarchical Clustering
  41. linked = linkage(data, method='ward')
  42. plt.figure(figsize=(10, 7))
  43. dendrogram(linked, truncate_mode='lastp', p=12, leaf_rotation=45., leaf_font_size=15., show_contracted=True)
  44. plt.title("Hierarchical Clustering Dendrogram")
  45. plt.xlabel("Sample index or Cluster size")
  46. plt.ylabel("Distance")
  47. plt.show()
  48.  
  49. # Summary
  50. print("Summary of Clustering Results:\n")
  51. print(f"K-Means Silhouette Score: {kmeans_silhouette:.2f}")
  52. print(f"Hierarchical Clustering Silhouette Score: {hierarchical_silhouette:.2f}")
  53.  
Success #stdin #stdout 4.25s 142456KB
stdin
Standard input is empty
stdout
Summary of Clustering Results:

K-Means Silhouette Score: 0.79
Hierarchical Clustering Silhouette Score: 0.79