Bag of Features 图像检索算法

算法原理

引入：词袋模型（Bag of Words）

Bag of Words是文本分类中一种通俗易懂的策略。
一般来讲，如果我们要了解一段文本的主要内容，最行之有效的策略是抓取文本中的关键词，根据关键词出现的频率确定这段文本的中心思想。

Bag of Words中的Words是区分度较高的单词。
根据这些Words ，我们就可以快速识别出文章内容，并对文章进行分类。

Bag of Features是对图像的一种类似的处理方法，抽出图像中的关键特征，根据这些特征来识别图像。

Bag of Features 算法流程

提取出关键特征，通常会使用SIFT特征。
将这些特征进行K-Means聚类，得到包含K个视觉词汇的词典。
对图像中的特征点进行量化，将特征点映射到视觉词汇上。
统计每个视觉词汇的出现频率，得到频率直方图。
构造特征到图像的倒排表，用于图像检索。
根据索引结果，进行直方图匹配

代码实现

image文件夹下存放图像，query文件夹下存放查询图像。

import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial import distance
from tqdm import tqdm
from time import time
import pickle

os.environ["LOKY_MAX_CPU_COUNT"] = "4" # KMenas 多线程设置

image_paths = [] # 图片路径列表
sift_res = {} # sift_res[img_path] = (keypoints, descriptors) # SIFT特征字典
k_means = None # K-Means模型
k = 50 # 视觉词典大小
histograms = {} # 直方图字典
sift = cv2.SIFT_create() # 创建SIFT对象

# 获取文件夹下所有图片路径
def get_images(image_folder):
    global image_paths
    for root, _, files in os.walk(image_folder): 
        for filename in files:
            if filename.lower().endswith(".jpg"):
                image_paths.append(os.path.join(root, filename))

# 1. 提取SIFT特征
def extract_sift_features(cache="cache/BOF_SIFT_desc.pkl"):
    print("1.提取SIFT特征")
    global sift_res, image_paths
    if os.path.exists(cache):
        with open(cache, "rb") as f:
            print(f"从缓存中加载SIFT特征: {cache}")
            sift_res = pickle.load(f)
            return 
        
    for img_path in tqdm(image_paths):
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 
        _, desc = sift.detectAndCompute(img, None) # 提取特征点和描述符
        sift_res[img_path] = desc

    with open(cache, "wb") as f:
        print(f"保存SIFT特征到缓存: {cache}")
        pickle.dump(sift_res, f)

# 2. K-Means聚类，创建视觉词典
def create_vocabulary(cache="cache/BOF_kmeans.pkl"):
    print("2.创建视觉词典")
    global k_means, k
    if os.path.exists(cache):
        with open(cache, "rb") as f:
            print(f"从缓存中加载K-Means模型: {cache}")
            k_means=pickle.load(f)
            return 

    st = time()
    descriptors_list = list(sift_res.values())
    all_descriptors = np.vstack(descriptors_list)  # 汇总所有特征点
    k_means = KMeans(n_clusters=k, random_state=0, n_init=10)
    k_means.fit(all_descriptors)
    print(f"K-Means聚类耗时: {time()-st:.2f}s")

    with open(cache, "wb") as f:
        print(f"保存K-Means模型到缓存: {cache}")
        pickle.dump(k_means, f)

# 3. 计算BoF直方图
def compute_histogram(img_path):
    if img_path in sift_res:
        descriptors = sift_res[img_path] # 获取描述符
    else:
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 
        _, descriptors = sift.detectAndCompute(img, None)
    words = k_means.predict(descriptors) # 预测每个描述符的词汇索引
    hist, _ = np.histogram(words, bins=np.arange(k+1)) # 计算直方图
    hist = hist / np.linalg.norm(hist)  # 归一化
    return hist

def compute_histograms(cache="cache/BOF_histogram.pkl"):
    print("3.计算数据库的BoF直方图")
    global image_paths, histograms, k_means

    if os.path.exists(cache):
        with open(cache, "rb") as f:
            print(f"从缓存中加载数据库直方图: {cache}")
            histograms=pickle.load(f)

    for img_path in tqdm(image_paths):
        histograms[img_path] = compute_histogram(img_path)
        
    with open(cache, "wb") as f:
        print(f"保存数据库直方图到缓存: {cache}")
        pickle.dump(histograms, f)

# 4. 直方图匹配
def cos_sim(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def get_topk(query_hist, topk=10):
    print("4.直方图匹配")
    global histograms
    similarities = []
    for img_path, hist in tqdm(histograms.items()):
        # dist = distance.euclidean(query_hist, hist)
        dist = cos_sim(query_hist, hist)
        similarities.append((img_path, dist))
    similarities.sort(key=lambda x: x[1], reverse=True) # 降序排序
    return similarities[:topk]

# 运行BoF流程
image_folder = "image"
get_images(image_folder)
extract_sift_features()
create_vocabulary()
compute_histograms()

# 匹配图像
test_img = "query/A0C573_20151103073308_3029240562.jpg"
test_hist = compute_histogram(test_img)
topk=get_topk(test_hist, topk=10)
print("Top 10 相似图片:")
for i, (filepath, sim) in enumerate(topk):
    print(f"{i+1}. {filepath} - 相似度: {sim:.4f}")

测试结果：

(DL) PS D:\Repos\Course\CVLab> & C:/Users/CCLMSY/.conda/envs/DL/python.exe d:/Repos/Course/CVLab/BOF.py
1.提取SIFT特征
从缓存中加载SIFT特征: cache/BOF_SIFT_desc.pkl
2.创建视觉词典
从缓存中加载K-Means模型: cache/BOF_kmeans.pkl
3.计算数据库的BoF直方图
从缓存中加载数据库直方图: cache/BOF_histogram.pkl
4.构造倒排索引
100%|██████████████████████████████████████████████████| 945/945 [00:00<00:00, 23779.80it/s] 
5.直方图匹配
100%|██████████████████████████████████████████████████| 946/946 [00:00<00:00, 157646.77it/s] 
query/A0C573_20151029074136_6562078379.jpg 匹配到的最相似图片是 image\A0C573\A0C573_20151029074136_6562078379.jpg