一、相关知识点
二、利用KNN完成香蕉和苹果的识别
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
def preprocess(img):
# image_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 灰度化
# # 加入sobel算子用于提取边缘信息
# # # 高斯滤波,去除噪声防止干扰边缘提取
# gaussian = cv2.GaussianBlur(image_gray, (5, 5), sigmaX=2) # sigmax为2是因为只有两种颜色
# # 垂直梯度处理,(滤波)
# image_ver = cv2.filter2D(gaussian, -1, kernel_ver)
# # # 水平梯度处理,(滤波)
# # image_hor = cv2.filter2D(gaussian, -1, kernel_hor)
# image_ver = cv2.resize(image_ver, target_size)
# flattened_edge = image_ver.flatten()
# # -----至此获取到了图像的边缘信息-----
img = cv2.resize(img, target_size)
img_flat = img.flatten() # 展平,否则knn无法读取
# -----以上是图像的原本信息-----
# combined_features = np.concatenate([flattened_edge, img_flat])
# -----拼接,在不损失颜色信息的前提下同时加入边缘信息----
return img_flat
# # sobel算子,用于提取边缘
# # 垂直梯度算子
# kernel_ver = np.array([[-1, 0, -1],
# [-2, 0, 2],
# [-1, 0, 1]])
# # 水平梯度算子
# kernel_hor = np.array([[-1, -2, -1],
# [0, 0, 0],
# [1, 2, 1]])
# 读取图片并预处理
images = []
target_size = (100, 100)
for file_name in os.listdir('../data/fruit/img'):
img = cv2.imread(f'../data/fruit/img/{file_name}')
img_flat = preprocess(img)
images.append(img_flat)
print(len(images))
# 2. 转成 NumPy 数组 (n_samples, n_features)
X = np.array(images, dtype=np.float32)
# 3.标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 4.读取标签
labels = []
for file_name in os.listdir('../data/fruit/label'):
with open(f'../data/fruit/label/{file_name}', 'r', encoding='utf-8') as f:
first_char = f.read(1)
labels.append(first_char)
print(len(labels))
y = labels
# -----------------训练
# 分出训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.2,
random_state=42
)
# 训练
for i in range(3, 15):
estimator = KNeighborsClassifier(n_neighbors=i, weights='distance')
estimator.fit(X_train, y_train) # fit的X参数必须输入二维的
# 评估
y_pred = estimator.predict(X_test)
# 准确率
accuracy = estimator.score(X_test, y_test)
print(accuracy)
# 对比找出分类错误的样本
misclassified_indices = np.where(y_pred != y_test)[0]
print(misclassified_indices)
#
estimator = KNeighborsClassifier(n_neighbors=5)
estimator.fit(X_train, y_train) # fit的X参数必须输入二维的
sample = cv2.imread('F:\py_MachineLearning\MachineLearning\MachineLearning\img.png')
sample=preprocess(sample)
sample = sample.reshape(1, -1)
sample_pred = estimator.predict(sample)
print("测试:",sample_pred)