1.1 MiB
Object detection¶
Tracking (following something over time) typically starts with object detection (localizing the thing in a given image)
Background subtraction
What is “background”?
Thresholding for binarized images
Connected component labeling
Image: Chao..Perán Sensors 2015
Connected component labeling¶
https://en.wikipedia.org/wiki/Connected-component_labeling
Image: https://commons.wikimedia.org/wiki/File:Two-pass_connected_component_labeling.svg
Blur and erosion¶
Operations to help thresholding¶
Before thresholding: Gaussian blur
Image: Ronneberger..Joffe Chromosome Research 2008
Note that this is an example of kernel based image processing). This is also called "convolution".
Image: https://de.m.wikipedia.org/wiki/Datei:Halftone,_Gaussian_Blur.jpg
After thresholding: Erosion, dilation
See wikipedia on Erosion). Note the similarities to the kernel operations.
Image: Cereser, PhD thesis 2016
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage
import imageio # "pip install 'imageio[ffmpeg]'"
# Here we define a helper function which we call below to make sure
# an image is an "unsigned 8 bit integer". This way, we know they
# take only a single byte per pixel and have a value from 0..255.
def ensure_dtype_uint8(arr):
return arr.astype(np.uint8)
fname = 'short-movie20170810_182130.mp4'
reader = imageio.get_reader(fname)
for frame in reader:
frame0 = frame[:,:,1] # take only green channel
break
frame0.ndim
frame0.shape
plt.imshow(frame0,cmap='gray')
plt.colorbar();
plt.imshow(frame0)
plt.colorbar();
# get all frames into big 3D array
(height,width) = frame0.shape
all_frames = []
reader.set_image_index(0) # return to start of file
for frame in reader:
all_frames.append( frame[:,:,1] )
all_frames = np.array(all_frames)
print(all_frames.shape)
n_frames = len(all_frames)
mean_frame = np.mean(all_frames, axis=0)
median_frame = np.median(all_frames, axis=0)
plt.imshow(mean_frame, cmap="jet")
plt.colorbar();
plt.imshow(median_frame, cmap="jet")
plt.colorbar();
start_row, stop_row = (750,950)
start_col, stop_col = (400,800)
frame = all_frames[0,:,:]
plt.imshow(frame[start_row:stop_row,start_col:stop_col], cmap='jet')
plt.colorbar();
plt.imshow(median_frame[start_row:stop_row,start_col:stop_col], cmap='jet')
plt.colorbar();
plt.imshow(mean_frame[start_row:stop_row,start_col:stop_col], cmap='jet')
plt.colorbar();
frame_absdiff = abs(frame - median_frame)
plt.imshow(frame_absdiff[start_row:stop_row,start_col:stop_col], cmap='jet')
plt.colorbar();
threshold = 70
tmp = frame_absdiff>threshold
print(tmp.dtype)
ensure_dtype_uint8(frame_absdiff>threshold).dtype
binarized = ensure_dtype_uint8(frame_absdiff>threshold)
plt.imshow(binarized[start_row:stop_row,start_col:stop_col], cmap='jet')
plt.colorbar();
blurred = ndimage.gaussian_filter(frame, sigma=6.0)
plt.imshow(blurred[start_row:stop_row,start_col:stop_col], cmap='jet')
plt.colorbar();
blurred_absdiff = abs(blurred - median_frame)
blur_threshold = 40
binarized_blurred = ensure_dtype_uint8(blurred_absdiff>blur_threshold)
plt.imshow(binarized_blurred[start_row:stop_row,start_col:stop_col], cmap='jet', interpolation='nearest')
plt.colorbar();
eroded = binarized_blurred
for i in range(4):
eroded = ndimage.binary_erosion(eroded)
plt.imshow(eroded.astype(np.uint8)[start_row:stop_row,start_col:stop_col], cmap='gray')
plt.colorbar();