计算机实战分享2:平板电脑手机价格影响因素分析实战机器学习-完整代码数据可直接运行

发布于:2024-05-10 ⋅ 阅读:(22) ⋅ 点赞:(0)

直接看运行演示:

直接看数据:

数据分析:

 

 

 训练预测:

 代码:

import numpy as np
import pandas as pd
import matplotlib.dates as mdates
from datetime import datetime
from statsmodels.tsa.seasonal import seasonal_decompose
import os
# print(data_x)
# 热力图-------------
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
import numpy as np
import pandas as pd
import re
import matplotlib.dates as mdates
from datetime import datetime
from statsmodels.tsa.seasonal import seasonal_decompose
import os
# print(data_x)
# 热力图-------------
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
data = pd.read_csv("data(1)(2).csv", encoding='gbk')

# 产品类型占比饼状图
# 统计某一列的值的频数
value_counts = data['品牌'].value_counts()
# 绘制饼状图
plt.figure(figsize=(8, 8))
plt.pie(value_counts, labels=value_counts.index, autopct='%1.1f%%', startangle=90, colors=['red','yellow', 'green', 'blue','orange'])
plt.title('品牌 Distribution')
plt.show()

value_counts = data['系统'].value_counts()
# 绘制饼状图
plt.figure(figsize=(8, 8))
plt.pie(value_counts, labels=value_counts.index, autopct='%1.1f%%', startangle=90, colors=['red','yellow', 'green', 'blue','orange'])
plt.title('系统 Distribution')
plt.show()

value_counts = data['运行内存'].value_counts()
# 绘制饼状图
plt.figure(figsize=(8, 8))
plt.pie(value_counts, labels=value_counts.index, autopct='%1.1f%%', startangle=90, colors=['red','yellow', 'green', 'blue','orange'])
plt.title('运行内存 Distribution')
plt.show()




data = data.dropna()
data = data.fillna(0)  # 'good_id', ' good_title', ' good_price', ' good_detail_link', '评论数量',
data['品牌'] = pd.factorize(data['品牌'])[0]
data['商品名称'] = pd.factorize(data['商品名称'])[0]
data['屏幕尺寸'] = pd.factorize(data['屏幕尺寸'])[0]
data['系统'] = pd.factorize(data['系统'])[0]
data['运行内存'] = pd.factorize(data['运行内存'])[0]
data['CPU类型'] = pd.factorize(data['CPU类型'])[0]
data['前置摄像头像素'] = pd.factorize(data['前置摄像头像素'])[0]
data['后置摄像头像素'] = pd.factorize(data['后置摄像头像素'])[0]
datas=data[[' good_price','品牌', '商品名称', '屏幕尺寸', '屏幕尺寸', '系统', '运行内存', 'CPU类型', '前置摄像头像素',
       '后置摄像头像素']]

datas = datas.dropna()
datas = datas.fillna(0)  # 'good_id', ' good_title', ' good_price', ' good_detail_link', '评论数量',
print(datas.head(10))
datas = datas[datas[' good_price'].str.strip() != '']
datas.to_csv('ceshi.csv',encoding='utf-8')
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
correlation_matrix=datas.corr()
plt.subplots(figsize=(20, 50))

sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=.5)
plt.title('Correlation Heatmap')
plt.show()
values = data[' good_price'].values




# 计算特征与目标列的相关性
correlation_matrix = datas.corr()
correlation_with_target = correlation_matrix[' good_price'].abs().sort_values(ascending=False)

# 筛选与目标列相关性较高的特征
selected_features = correlation_with_target[correlation_with_target > 0.00001].index

# 打印相关性排序
print("特征与目标列的相关性排序:")
print(correlation_with_target)

# 绘制柱状图显示相关性
plt.figure(figsize=(10, 6))
sns.barplot(x=correlation_with_target.values, y=correlation_with_target.index, palette='viridis')
plt.title('Feature Correlation with  good_price')
plt.xlabel('Correlation')
plt.ylabel('Feature')
plt.show()


from sklearn import preprocessing
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from datetime import datetime
import time
import math
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from scipy import stats, integrate
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from sklearn.model_selection import learning_curve
from sklearn.model_selection import validation_curve
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error  # 评价指标
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor,AdaBoostClassifier
from sklearn.ensemble import GradientBoostingRegressor
from lightgbm import LGBMRegressor
from date_process import data_read_csv
train_x,train_y=data_read_csv()
x_train, x_test, y_train, y_test = train_test_split(np.array(train_x), np.array(train_y), test_size=0.2,shuffle=True,random_state=15)
print('x_train.shape',x_train.shape)
print('x_test.shape',x_test.shape)
# 集成学习模型
# svm算法

svm = AdaBoostRegressor()
svm.fit(x_train,y_train)
svm_pred = svm.predict(x_test)
print(svm_pred)
from metra import metric
mae, mse, rmse, mape, mspe,r2=metric(np.array(svm_pred), np.array(y_test))
print('mae, mse, rmse, mape, mspe,r2')
print(mae, mse, rmse, mape, mspe,r2)
# 设置Seaborn样式

import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="darkgrid")
x = range(len(y_test))
data = pd.DataFrame({'x': x, 'y_pred': svm_pred.flatten(), 'y_true': y_test.flatten()})
# 绘制y_pred的折线图
sns.lineplot(x='x', y='y_pred', data=data, linewidth=1, label='y_pred')

# 绘制y_true的折线图
sns.lineplot(x='x', y='y_true', data=data, linewidth=1, label='y_true')

# 添加标题和标签
plt.title('Prediction vs True')
plt.xlabel('Date')
plt.ylabel('Values')
plt.savefig('AdaBoost.png')
# 显示图形
plt.show()

完整数据代码:

https://download.csdn.net/download/mqdlff_python/89290286 


网站公告

今日签到

点亮在社区的每一天
去签到