10. Pairwise Metrics

10.1. Data

[1]:
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True)

10.2. Plotting function

[2]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

def plot_heatmap(X, figsize=(7,7)):
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(X, ax=ax)

10.3. Real-valued vector space

10.3.1. Cosine similarity

[3]:
from sklearn.metrics.pairwise import cosine_similarity

S = cosine_similarity(X)
plot_heatmap(S)
_images/pairwise-metrics_7_0.png

10.3.2. Cosine distance

[4]:
from sklearn.metrics.pairwise import cosine_distances

D = cosine_distances(X)
plot_heatmap(D)
_images/pairwise-metrics_9_0.png

10.3.3. Euclidean distance

[5]:
from sklearn.metrics.pairwise import euclidean_distances

D = euclidean_distances(X)
plot_heatmap(D)
_images/pairwise-metrics_11_0.png

10.3.4. Manhattan distance

[6]:
from sklearn.metrics.pairwise import manhattan_distances

D = manhattan_distances(X)
plot_heatmap(D)
_images/pairwise-metrics_13_0.png

10.3.5. Kernel distance

[7]:
from sklearn.metrics.pairwise import pairwise_kernels

D = pairwise_kernels(X, metric='chi2')
plot_heatmap(D)
_images/pairwise-metrics_15_0.png

10.3.6. Minkowski

[8]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('minkowski').pairwise(X)
plot_heatmap(D)
_images/pairwise-metrics_17_0.png

10.3.7. Mahalanobis

[9]:
import numpy as np
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('mahalanobis', V=np.cov(X.T)).pairwise(X)
plot_heatmap(D)
_images/pairwise-metrics_19_0.png

10.3.8. Haversine distance

[10]:
from sklearn.metrics.pairwise import haversine_distances
import pandas as pd

df = pd.read_csv('capitals.tsv', sep='\t').sample(n=10, random_state=37)
X = df[['latitude', 'longitude']].values

D = haversine_distances(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_21_0.png

10.4. Integer-valued vector space

10.4.1. Data

[11]:
import numpy as np

X = np.array([
    [1, 1, 1],
    [1, 1, 0],
    [1, 0, 1],
    [0, 0, 1],
    [0, 0, 0]
])

10.4.2. Hamming

[12]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('hamming').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_26_0.png

10.4.3. Canberra

[13]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('canberra').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_28_0.png

10.4.4. Bray-Curtis

[14]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('braycurtis').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_30_0.png

10.5. Boolean-valued vector space

10.5.1. Data

[15]:
import numpy as np

X = np.array([
    [1, 1, 1],
    [1, 1, 0],
    [1, 0, 1],
    [0, 0, 1],
    [0, 0, 0]
])

10.5.2. Jaccard

[16]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('jaccard').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_35_0.png

10.5.3. Matching

[17]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('matching').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_37_0.png

10.5.4. Dice

[18]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('dice').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_39_0.png

10.5.5. Kulsinski

[19]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('kulsinski').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_41_0.png

10.5.6. Rogers-Tanimoto

[20]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('rogerstanimoto').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_43_0.png

10.5.7. Russell-Rao

[21]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('russellrao').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_45_0.png

10.5.8. Sokal-Michener

[22]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('sokalmichener').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_47_0.png

10.5.9. Sokal-Sneath

[23]:
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('sokalsneath').pairwise(X)
plot_heatmap(D, figsize=(5, 5))
_images/pairwise-metrics_49_0.png