10. Pairwise Metrics
10.1. Data
[1]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
10.2. Plotting function
[2]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
def plot_heatmap(X, figsize=(7,7)):
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(X, ax=ax)
10.3. Real-valued vector space
10.3.1. Cosine similarity
[3]:
from sklearn.metrics.pairwise import cosine_similarity
S = cosine_similarity(X)
plot_heatmap(S)
10.3.2. Cosine distance
[4]:
from sklearn.metrics.pairwise import cosine_distances
D = cosine_distances(X)
plot_heatmap(D)
10.3.3. Euclidean distance
[5]:
from sklearn.metrics.pairwise import euclidean_distances
D = euclidean_distances(X)
plot_heatmap(D)
10.3.4. Manhattan distance
[6]:
from sklearn.metrics.pairwise import manhattan_distances
D = manhattan_distances(X)
plot_heatmap(D)
10.3.5. Kernel distance
[7]:
from sklearn.metrics.pairwise import pairwise_kernels
D = pairwise_kernels(X, metric='chi2')
plot_heatmap(D)
10.3.6. Minkowski
[8]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('minkowski').pairwise(X)
plot_heatmap(D)
10.3.7. Mahalanobis
[9]:
import numpy as np
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('mahalanobis', V=np.cov(X.T)).pairwise(X)
plot_heatmap(D)
10.3.8. Haversine distance
[10]:
from sklearn.metrics.pairwise import haversine_distances
import pandas as pd
df = pd.read_csv('capitals.tsv', sep='\t').sample(n=10, random_state=37)
X = df[['latitude', 'longitude']].values
D = haversine_distances(X)
plot_heatmap(D, figsize=(5, 5))
10.4. Integer-valued vector space
10.4.1. Data
[11]:
import numpy as np
X = np.array([
[1, 1, 1],
[1, 1, 0],
[1, 0, 1],
[0, 0, 1],
[0, 0, 0]
])
10.4.2. Hamming
[12]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('hamming').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.4.3. Canberra
[13]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('canberra').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.4.4. Bray-Curtis
[14]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('braycurtis').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.5. Boolean-valued vector space
10.5.1. Data
[15]:
import numpy as np
X = np.array([
[1, 1, 1],
[1, 1, 0],
[1, 0, 1],
[0, 0, 1],
[0, 0, 0]
])
10.5.2. Jaccard
[16]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('jaccard').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.5.3. Matching
[17]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('matching').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.5.4. Dice
[18]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('dice').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.5.5. Kulsinski
[19]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('kulsinski').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.5.6. Rogers-Tanimoto
[20]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('rogerstanimoto').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.5.7. Russell-Rao
[21]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('russellrao').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.5.8. Sokal-Michener
[22]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('sokalmichener').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
10.5.9. Sokal-Sneath
[23]:
from sklearn.neighbors import DistanceMetric
D = DistanceMetric.get_metric('sokalsneath').pairwise(X)
plot_heatmap(D, figsize=(5, 5))