# 10. Pairwise Metrics

## 10.1. Data

[1]:

from sklearn.datasets import load_iris



## 10.2. Plotting function

[2]:

%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

def plot_heatmap(X, figsize=(7,7)):
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(X, ax=ax)


## 10.3. Real-valued vector space

### 10.3.1. Cosine similarity

[3]:

from sklearn.metrics.pairwise import cosine_similarity

S = cosine_similarity(X)
plot_heatmap(S)


### 10.3.2. Cosine distance

[4]:

from sklearn.metrics.pairwise import cosine_distances

D = cosine_distances(X)
plot_heatmap(D)


### 10.3.3. Euclidean distance

[5]:

from sklearn.metrics.pairwise import euclidean_distances

D = euclidean_distances(X)
plot_heatmap(D)


### 10.3.4. Manhattan distance

[6]:

from sklearn.metrics.pairwise import manhattan_distances

D = manhattan_distances(X)
plot_heatmap(D)


### 10.3.5. Kernel distance

[7]:

from sklearn.metrics.pairwise import pairwise_kernels

D = pairwise_kernels(X, metric='chi2')
plot_heatmap(D)


### 10.3.6. Minkowski

[8]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('minkowski').pairwise(X)
plot_heatmap(D)


### 10.3.7. Mahalanobis

[9]:

import numpy as np
from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('mahalanobis', V=np.cov(X.T)).pairwise(X)
plot_heatmap(D)


### 10.3.8. Haversine distance

[10]:

from sklearn.metrics.pairwise import haversine_distances
import pandas as pd

X = df[['latitude', 'longitude']].values

D = haversine_distances(X)
plot_heatmap(D, figsize=(5, 5))


## 10.4. Integer-valued vector space

### 10.4.1. Data

[11]:

import numpy as np

X = np.array([
[1, 1, 1],
[1, 1, 0],
[1, 0, 1],
[0, 0, 1],
[0, 0, 0]
])


### 10.4.2. Hamming

[12]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('hamming').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.4.3. Canberra

[13]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('canberra').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.4.4. Bray-Curtis

[14]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('braycurtis').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


## 10.5. Boolean-valued vector space

### 10.5.1. Data

[15]:

import numpy as np

X = np.array([
[1, 1, 1],
[1, 1, 0],
[1, 0, 1],
[0, 0, 1],
[0, 0, 0]
])


### 10.5.2. Jaccard

[16]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('jaccard').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.5.3. Matching

[17]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('matching').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.5.4. Dice

[18]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('dice').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.5.5. Kulsinski

[19]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('kulsinski').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.5.6. Rogers-Tanimoto

[20]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('rogerstanimoto').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.5.7. Russell-Rao

[21]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('russellrao').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.5.8. Sokal-Michener

[22]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('sokalmichener').pairwise(X)
plot_heatmap(D, figsize=(5, 5))


### 10.5.9. Sokal-Sneath

[23]:

from sklearn.neighbors import DistanceMetric

D = DistanceMetric.get_metric('sokalsneath').pairwise(X)
plot_heatmap(D, figsize=(5, 5))