EPITA 2022 MLRF practice_01-04_color-histogram v2022-03-25_104727 by Joseph CHAZALON
The principle is very simple and illustrated below: from a set of pixels we count how many times each color appears, and we build an histogram of the frequencies (as opposed to raw counts, because we normalize the values) of occurrences of colors.
Here are some examples of the histograms we can compute from some bubbles:
Using such descriptors, we can very easily group similar bubbles with a reasonable confidence.
This part contains the following steps:
It is hard to compare the full RGB histogram of an image (a bubble) with the histogram of another, so we will first reduce the number of colors used to represent each image.
Color quantization is a practical application of vector quantization where each color is replaced by the closest color in a pre-defined palette.
We have two options here:
We will use K-Means clustering to discover a reduced set of representative colors.
Because K-Means is a costly algorithm, we will first sample our pixels (viewing them as plain 3-dimensional vectors) to avoid filling up our memory during KMeans fitting.
We will use the base image to facilitate the sampling, because otherwise we would have to select pixels from every bubble image and merge the results.
# deactivate buggy jupyter completion
%config Completer.use_jedi = False
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import os
# TODO
PATH_TO_RESOURCES = "." # FIXME set this to the path of the twinit resource directory
# prof
PATH_TO_RESOURCES = "/home/jchazalo/git/jchazalo/cours-mlrf-preparation/resources/twin_it"
# load the base image and its mask.
poster = cv2.imread(os.path.join(PATH_TO_RESOURCES, "twin_it_200dpi.png"))
# let us convert the poster from BGR to RGB for what follows
# we will work with RGB images
poster = cv2.cvtColor(poster, cv2.COLOR_BGR2RGB)
# Read the mask image
poster_mask = cv2.imread(os.path.join(PATH_TO_RESOURCES, "mask_bubbles.png"))[..., 0] > 0
# Check the shape and data type of our arrays
poster.shape, poster.dtype, poster_mask.shape, poster_mask.dtype
((5956, 4153, 3), dtype('uint8'), (5956, 4153), dtype('bool'))
WARNING: All our images should be in RGB format in this session! Convert them when loading them to avoid mistakes.
plt.subplot(1,2,1)
plt.imshow(poster) # no need for `cv2.cvtColor` because we already have an RGB image
plt.subplot(1,2,2)
plt.imshow(poster_mask)
<matplotlib.image.AxesImage at 0x7fcbc67c5eb0>
# TODO now sample 5000 pixels from the image
sample_pixels = np.array([]) # FIXME
# ...
sample_pixels.shape, sample_pixels.dtype
# The last line should return
# ((5000, 3), dtype('uint8'))
((0,), dtype('float64'))
# prof
# sample 5000 pixels from the image, using indexes
sample_indices = np.random.choice(np.count_nonzero(poster_mask), 5000, replace=False)
sample_pixels = poster[poster_mask][sample_indices]
sample_pixels.shape, sample_pixels.dtype
((5000, 3), dtype('uint8'))
# prof
# altern version, much simpler but requires the use of the new RNGs (`choice` API is different)
pmasked = poster[poster_mask]
from numpy.random import default_rng
rng = default_rng()
sample_pixels = rng.choice(pmasked, size=5000, replace=False)
sample_pixels.shape, sample_pixels.dtype
((5000, 3), dtype('uint8'))
We are now ready to perform K-Means clustering.
from sklearn.cluster import KMeans
WARNING: K-Means is a RAM-hungry algorithm. Save your work regularly (and start now)!
# TODO compute the new color palette
kmeans = None # FIXME
# prof
# compute the new color palette
kmeans = KMeans(n_clusters=7, random_state=0)
kmeans.fit(sample_pixels)
kmeans.cluster_centers_ # Those are RGB colors because our training data (poster) is RGB!
array([[247.08082707, 206.29135338, 94.15601504], [ 88.12667191, 64.7631786 , 31.86939418], [149.84065934, 112.53021978, 78.73076923], [149.03492885, 200.80336352, 187.03622251], [243.76593279, 134.03012746, 83.54808806], [ 44.90154211, 21.53618031, 5.6227758 ], [246.33333333, 230.44632768, 165.74011299]])
The KMeans
class provided by scikit-learn has two methods for transforming our data:
transform
predict
Make sure you understand the difference between those two functions.
# Some extra help about the LUT thing:
my_LUT = np.array([("a", "b"), # "cluster" 0
("c", "d")]) # "cluster" 1
my_label_image = np.array([[0,1,0,1,1]])
my_LUT[my_label_image]
# The result contains values from `my_LUT` and has the shape of `my_label_image`. Simply beautiful.
array([[['a', 'b'], ['c', 'd'], ['a', 'b'], ['c', 'd'], ['c', 'd']]], dtype='<U1')
# TODO
# Create a label map (for each pixel inside a bubble, get the id of the closest cluster)
label_map = None
# prof
# Create a label map (for each pixel inside a bubble, get the id of the closest cluster)
label_map = kmeans.predict(poster[poster_mask])
# (we give you the code for this, try to understand it)
# Create and show the LUT and its colors
color_lut = np.uint8(kmeans.cluster_centers_)
plt.bar(np.arange(len(color_lut)),
np.ones(len(color_lut)),
color=color_lut/255)
color_lut
array([[247, 206, 94], [ 88, 64, 31], [149, 112, 78], [149, 200, 187], [243, 134, 83], [ 44, 21, 5], [246, 230, 165]], dtype=uint8)
# TODO
# Now create a recolored image `test_recolored` containing only indexed colors (ie. in the LUT) using:
# - the original image `poster`,
# - the mask `poster_mask` of pixels belonging to bubbles (and therefore not to the background)
# - the LUT `color_lut`
# TIP: start by filling your image with white color.
test_recolored = np.zeros_like(poster)
# prof
white_color = (255,255,255)
test_recolored = np.full_like(poster, white_color) # fill the image with white
test_recolored[poster_mask] = color_lut[label_map] # apply the LUT
test_recolored.shape
(5956, 4153, 3)
# let us display zoomed portions of the images
plt.figure(figsize=(12,12))
plt.subplot(1,2,1)
plt.imshow(poster[:1000,:1000])
plt.axis("off")
plt.title("Original")
plt.subplot(1,2,2)
plt.imshow(test_recolored[:1000,:1000])
plt.axis("off")
plt.title("Recolored")
Text(0.5, 1.0, 'Recolored')
(you can write some observations here)
(prof) As we did not train the predictor on white color, we observe a significant color shift in the "twin it!" bubble. The color difference is other bubbles is hard to notice.
# TODO save the image and compare the sizes
cv2.imwrite("/tmp/recolored.png", test_recolored) # FIXME make sure the exported image is in BGR format here!
# Let us use some notebook magic to check the size of the image
!du -h /tmp/recolored.png
!du -h $PATH_TO_RESOURCES/twin_it_200dpi.png
16M /tmp/recolored.png 46M /home/jchazalo/git/jchazalo/cours-mlrf-preparation/resources/twin_it/twin_it_200dpi.png
# prof
# save the image and compare the sizes
cv2.imwrite("/tmp/recolored.png", cv2.cvtColor(test_recolored, cv2.COLOR_RGB2BGR))
# Let us use some notebook magic to check the size of the image
!du -h /tmp/recolored.png
!du -h $PATH_TO_RESOURCES/twin_it_200dpi.png
16M /tmp/recolored.png 46M /home/jchazalo/git/jchazalo/cours-mlrf-preparation/resources/twin_it/twin_it_200dpi.png
We can now load and convert all the bubbles. We need to:
We will need to ignore the area where all the pixels are black (0,0,0)
, because they do not belong to the bubble and it may change their value.
# TODO define a function to compute the mask of a bubble
def bubble2mask(bubble):
return np.zeros_like(bubble, dtype=np.bool) # FIXME
# prof
# define a function to compute the mask of a bubble
def bubble2mask(bubble):
return np.any(bubble>0, axis=-1)
# Some Jupyter magic to help you
bubble_files = !ls $PATH_TO_RESOURCES/bubbles_200dpi/b*.png | sort
bubble_files[:3]
['/home/jchazalo/git/jchazalo/cours-mlrf-preparation/resources/twin_it/bubbles_200dpi/b001.png', '/home/jchazalo/git/jchazalo/cours-mlrf-preparation/resources/twin_it/bubbles_200dpi/b002.png', '/home/jchazalo/git/jchazalo/cours-mlrf-preparation/resources/twin_it/bubbles_200dpi/b003.png']
# TODO load all the bubbles and convert them to RGB
# `bubbles` is a list of np.array elements (bubble images in RGB format)
bubbles = []
# prof
# load all the bubbles and convert them to RGB
bubbles = [cv2.cvtColor(cv2.imread(ff), cv2.COLOR_BGR2RGB) for ff in bubble_files]
len(bubbles), bubbles[0].shape, bubbles[0].dtype
(391, (392, 787, 3), dtype('uint8'))
# TODO reduce the color of all bubbles
# This is exactly what we did for the full poster, but applied to each bubble AND keeping background as black.
bubbles_quant = []
# prof
# reduce the color of all bubbles
bubbles_quant = []
for bb in bubbles:
mask = bubble2mask(bb)
recolored = np.zeros_like(bb)
label_map = kmeans.predict(bb[mask])
recolored[mask] = color_lut[label_map]
bubbles_quant.append(recolored)
# let us display the images
plt.figure(figsize=(8,16))
for ii in range(10):
plt.subplot(10,2,1+2*ii)
plt.imshow(bubbles[ii])
plt.axis("off")
if ii == 0:
plt.title("Original")
plt.subplot(10,2,2+2*ii)
plt.imshow(bubbles_quant[ii])
plt.axis("off")
if ii == 0:
plt.title("Recolored")
To compute the color histogram of a bubble, we do not need to recolorize it, we just need to compute its "label map" to count the number of pixel belonging to each cluster.
# TODO compute the color histogram for each bubble (number of pixel in each cluster) then normalize it.
# `color_histograms` is a list of np.array,
# each element being the histogram corresponding to a bubble in `bubbles`.
color_histograms = []
# prof
# compute the color histogram for each bubble
color_histograms = []
for bb in bubbles:
mask = bubble2mask(bb)
label_map = kmeans.predict(bb[mask])
bb_hist = np.bincount(label_map, minlength=kmeans.n_clusters) / np.count_nonzero(mask)
color_histograms.append(bb_hist)
# Show some color histograms with colors!
colors_for_bars = color_lut/255 # matplotlib colors are RGB values scaled to [0,1]
for ii in range(5):
plt.figure(figsize=(8,4))
plt.subplot(1,3,1)
plt.imshow(bubbles[ii])
plt.axis('off'); plt.title("Original")
plt.subplot(1,3,2)
plt.imshow(bubbles_quant[ii])
plt.axis('off'); plt.title("Recolored")
plt.subplot(1,3,3, aspect=len(color_histograms[0]))
plt.ylim(0, 1)
plt.bar(range(len(color_histograms[ii])),
color_histograms[ii]/color_histograms[ii].max(),
color=colors_for_bars)
plt.xticks([]); plt.title("Histogram")
plt.show()
Because color histograms are very compact, it is very fast to compute the distance matrix (even if the complexity is $O(n^2)$).
# TODO compute the distance matrix between each pair of bubbles
dist_mat = np.ones((len(bubbles), len(bubbles))) # distances will be between 0 (closest) and 1 (farthest)
# for... dist_mat[i,j] = ...
# prof
# compute the distance matrix between each pair of bubbles
dist_mat = np.ones((len(bubbles), len(bubbles))) # distances will be between 0 (closest) and 1 (farthest)
from scipy.spatial.distance import cosine as dist_cosine
max_elem = len(bubbles)
for ii in range(max_elem):
for jj in range(max_elem):
dist_mat[ii, jj] = dist_cosine(color_histograms[ii], color_histograms[jj])
Correct the diagonal to avoid getting the same result over and over: we set the distance of one element against itself to the maximum distance.
dist_mat[np.diag_indices_from(dist_mat)] = 1
# TODO
# Compute the indices of elements with smallest distance to each bubble (ie. in a matrix row)
# `idx_of_best_matches_per_row` is a 2D np.array:
# for each row, the columns indicate the indices of the elements in `dist_mat` to sort the row (ascending order)
idx_of_best_matches_per_row = None
# prof
# Compute the indices of elements with smallest distance to each bubble (ie. in a matrix row)
idx_of_best_matches_per_row = np.argsort(dist_mat, axis=1)
# Display the best matches for some bubbles
max_res = 5
interesting_bubble_ids = [0, 1, 35, 36, 43, 44, 49, 50, 91, 92, 105, 106]
for ii in interesting_bubble_ids:
plt.figure(figsize=(12,8))
columns = max_res + 1
plt.subplot(1, columns, 1)
plt.imshow(bubbles[ii])
plt.axis("off"); plt.title("Bubble %d"%(ii,))
for jj in range(max_res):
bb_idx = idx_of_best_matches_per_row[ii, jj] # Read the id of each best match for current bubble
plt.subplot(1, columns, jj+2)
plt.imshow(bubbles[bb_idx])
plt.axis("off"); plt.title("b%d@%.3f" % (bb_idx, dist_mat[ii, bb_idx])) # display bubble id and dist.
plt.show()
TODO write some notes here
Advantages of color hist:
Limitations:
Instead of computing a distance matrix, it is also possible to aggregate the elements starting by the closest one, then iterating. The trick is to be able to compute the distance between a cluster and another cluster, and a simple solution is to average the descriptor of two clusters to form the descriptor of a new parent cluster.
The code below does all this work and produces a dendrogram.
# Adapted from https://github.com/jesolem/PCV/blob/master/PCV/clustering/hcluster.py
# licensed under the BSD 2-Clause "Simplified" License
from itertools import combinations
class ClusterNode(object):
def __init__(self,vec,left,right,distance=0.0,count=1):
self.left = left
self.right = right
self.vec = vec
self.distance = distance
self.count = count # only used for weighted average
def extract_clusters(self,dist):
""" Extract list of sub-tree clusters from
hcluster tree with distance<dist. """
if self.distance < dist:
return [self]
return self.left.extract_clusters(dist) + self.right.extract_clusters(dist)
def get_cluster_elements(self):
""" Return ids for elements in a cluster sub-tree. """
return self.left.get_cluster_elements() + self.right.get_cluster_elements()
def get_height(self):
""" Return the height of a node,
height is sum of each branch. """
return self.left.get_height() + self.right.get_height()
def get_depth(self):
""" Return the depth of a node, depth is
max of each child plus own distance. """
return max(self.left.get_depth(), self.right.get_depth()) + self.distance
def draw(self,draw,x,y,s,imlist,im):
""" Draw nodes recursively with image
thumbnails for leaf nodes. """
h1 = int(self.left.get_height()*60 / 2)
h2 = int(self.right.get_height()*60 /2)
top = y-(h1+h2)
bottom = y+(h1+h2)
# vertical line to children
cv2.line(draw, (int(top+h1), int(x)) , (int(bottom-h2),int(x)), (0,0,0))
# horizontal lines
ll = self.distance*s
cv2.line(draw, (int(top+h1),int(x)) , (int(top+h1), int(x+ll)), (0,0,0))
cv2.line(draw, (int(bottom-h2), int(x)) , (int(bottom-h2), int(x+ll)), (0,0,0))
# draw left and right child nodes recursively
self.left.draw(draw,x+ll,top+h1,s,imlist,im)
self.right.draw(draw,x+ll,bottom-h2,s,imlist,im)
class ClusterLeafNode(object):
def __init__(self,vec,id):
self.vec = vec
self.id = id
def extract_clusters(self,dist):
return [self]
def get_cluster_elements(self):
return [self.id]
def get_height(self):
return 1
def get_depth(self):
return 0
def draw(self,draw,x,y,s,imlist,im):
nodeim = cv2.resize(imlist[self.id], (60,60))
ns = nodeim.shape
im[int(x):int(x+ns[0]), int(y-ns[1]//2):int(y+ns[1]-ns[1]//2), ...] = nodeim
def hcluster(features, distfcn):
"""
Cluster the rows of features using hierarchical clustering.
`features`: square matrix of features
`distfcn`: distance function to compare features (feature x feature -> float)
"""
# cache of distance calculations
distances = {}
# initialize with each row as a cluster
node = [ClusterLeafNode(f,id=i) for i,f in enumerate(features)]
while len(node)>1:
closest = float('Inf')
# loop through every pair looking for the smallest distance
for ni,nj in combinations(node,2):
if (ni,nj) not in distances:
distances[ni,nj] = distfcn(ni.vec,nj.vec)
d = distances[ni,nj]
if d<closest:
closest = d
lowestpair = (ni,nj)
ni,nj = lowestpair
# average the two clusters
new_vec = (ni.vec + nj.vec) / 2.0
# create new node
new_node = ClusterNode(new_vec,left=ni,right=nj,distance=closest)
node.remove(ni)
node.remove(nj)
node.append(new_node)
return node[0]
def draw_dendrogram(node, imlist, filename='dendrogram.jpg'):
"""
Draw a cluster dendrogram and save to a file.
`node`: result of the `hcluster` function
`imlist`: list of BGR images for each element
`filename`: path to output file
"""
# height and width
cols = node.get_height()*60
rows = 1200
# scale factor for distances to fit image width
s = float(rows-150)/node.get_depth()
# create image and draw object
im = np.full((rows,cols, 3), 255, dtype=np.uint8)
draw = im
# initial line for start of tree
cv2.line(draw, (cols//2, 0),(cols//2, 60), (0,0,0))
# draw the nodes recursively
node.draw(draw,60,(cols/2),s,imlist,im)
cv2.imwrite(filename, draw)
print("Wrote '%s'." % filename)
# TODO generate the clusters using the features you computed, and a distance function of your choice
cluster = None # hcluster(...)
# prof
# generate the clusters
cluster = hcluster(color_histograms, distfcn=dist_cosine)
bubbles_bgr = [cv2.cvtColor(bb, cv2.COLOR_RGB2BGR) for bb in bubbles]
draw_dendrogram(cluster, bubbles_bgr, "my_dendrogram.jpg")
Wrote 'my_dendrogram.jpg'.
Do not forget to submit your notebooks (and maybe a scaled version of your dendrogram)!