基于Django的京东手机数据分析及预测系统 - 完整技术实现
一个集数据爬取、分析、机器学习预测、用户管理和推荐系统于一体的电商数据分析平台
📋 目录
🎯 项目概述
项目背景
随着电商行业的快速发展,数据驱动的决策变得越来越重要。本项目基于京东手机商品数据,构建了一个完整的数据分析和预测系统,为电商运营提供数据支撑。
系统特点
- 数据规模: 1161款手机商品,1982条用户评论,1003条收藏记录
- 算法准确率: 销量预测准确率>85%
- 用户体验: 响应式设计,支持移动端和桌面端
- 技术栈: Django + MySQL + scikit-learn + Bootstrap
💻 项目展示
项目演示视频随后更新于哔哩哔哩个人主页,敬请关注!
🏗️ 技术架构
后端技术栈
# requirements.txt 核心依赖
Django==4.2.20 # Web框架
pymysql==1.1.0 # MySQL驱动
pandas==2.0.3 # 数据处理
scikit-learn==1.3.0 # 机器学习
jieba==0.42.1 # 中文分词
snownlp==0.12.3 # 情感分析
django-simpleui==2023.12.30 # 后台美化
pillow==10.0.1 # 图片处理
faker==19.6.2 # 模拟数据
前端技术栈
<!-- 核心CSS框架 -->
<link href="/static/assets/css/bootstrap.min.css" rel="stylesheet">
<link href="/static/assets/css/app.css" rel="stylesheet">
<!-- 图表库 -->
<script src="/static/assets/plugins/chartjs/js/Chart.min.js"></script>
<!-- 图标库 -->
<link href="/static/assets/css/icons.css" rel="stylesheet">
项目结构
jd_djangoProject/
├── myapp/ # 主应用模块
│ ├── models.py # 数据模型定义
│ ├── views.py # 视图控制器(866行)
│ ├── urls.py # URL路由配置
│ ├── admin.py # 后台管理配置
│ ├── recommendation.py # 推荐系统引擎
│ └── management/ # Django管理命令
├── templates/ # HTML模板文件
│ ├── base.html # 基础模板
│ ├── index.html # 首页模板
│ ├── predict.html # 销量预测模板
│ └── sentiment_analysis.html # 情感分析模板
├── buildmodel/ # 机器学习模型
│ ├── build.py # 模型训练脚本
│ ├── rf_sales_model.joblib # 随机森林模型
│ └── result3.csv # 训练数据集(1178条)
├── static/ # 静态资源文件
├── media/ # 媒体文件存储
└── speculate.py # 销量预测接口
🔧 核心功能实现
1. 数据模型设计
# myapp/models.py
from django.db import models
from django.contrib.auth.models import User
class XinXi(models.Model):
"""商品信息模型"""
title = models.CharField(max_length=500, verbose_name='商品标题')
price = models.DecimalField(max_digits=10, decimal_places=2, verbose_name='价格')
brand = models.CharField(max_length=100, verbose_name='品牌')
score = models.DecimalField(max_digits=3, decimal_places=2, null=True, verbose_name='评分')
sales = models.IntegerField(null=True, verbose_name='销量')
class Meta:
verbose_name = '商品信息'
verbose_name_plural = verbose_name
class Comment(models.Model):
"""评论模型"""
user = models.ForeignKey(User, on_delete=models.CASCADE, verbose_name='用户')
product = models.ForeignKey(XinXi, on_delete=models.CASCADE, verbose_name='商品')
content = models.TextField(verbose_name='评论内容')
rating = models.IntegerField(choices=[(i, i) for i in range(1, 6)], verbose_name='评分')
created_at = models.DateTimeField(auto_now_add=True, verbose_name='评论时间')
class Meta:
verbose_name = '用户评论'
verbose_name_plural = verbose_name
class Favorite(models.Model):
"""收藏模型"""
user = models.ForeignKey(User, on_delete=models.CASCADE, verbose_name='用户')
product = models.ForeignKey(XinXi, on_delete=models.CASCADE, verbose_name='商品')
created_at = models.DateTimeField(auto_now_add=True, verbose_name='收藏时间')
class Meta:
verbose_name = '用户收藏'
verbose_name_plural = verbose_name
2. 视图控制器实现
# myapp/views.py
from django.shortcuts import render, redirect
from django.contrib.auth.decorators import login_required
from django.db.models import Count, Avg
from .models import XinXi, Comment, Favorite
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib
def index(request):
"""首页视图"""
# 统计数据
total_products = XinXi.objects.count()
total_comments = Comment.objects.count()
total_favorites = Favorite.objects.count()
total_users = User.objects.count()
context = {
'total_products': total_products,
'total_comments': total_comments,
'total_favorites': total_favorites,
'total_users': total_users,
}
return render(request, 'index.html', context)
@login_required
def predict_sales(request):
"""销量预测视图"""
if request.method == 'POST':
price = float(request.POST.get('price'))
brand = request.POST.get('brand')
score = float(request.POST.get('score'))
# 加载训练好的模型
model = joblib.load('buildmodel/rf_sales_model.joblib')
# 特征工程
features = np.array([[price, brand_encoding[brand], score]])
prediction = model.predict(features)[0]
context = {
'prediction': prediction,
'features': {'price': price, 'brand': brand, 'score': score}
}
return render(request, 'predict.html', context)
return render(request, 'predict.html')
def sentiment_analysis(request):
"""情感分析视图"""
from snownlp import SnowNLP
comments = Comment.objects.all()
sentiments = []
for comment in comments:
s = SnowNLP(comment.content)
sentiment_score = s.sentiments
if sentiment_score >= 0.6:
sentiment = '积极'
elif sentiment_score >= 0.4:
sentiment = '中性'
else:
sentiment = '消极'
sentiments.append(sentiment)
# 统计情感分布
sentiment_counts = pd.Series(sentiments).value_counts()
context = {
'sentiment_data': sentiment_counts.to_dict(),
'total_comments': len(comments)
}
return render(request, 'sentiment_analysis.html', context)
3. 推荐系统实现
# myapp/recommendation.py
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from .models import User, XinXi, Favorite, Comment
class RecommendationEngine:
def __init__(self):
self.user_item_matrix = None
self.user_similarity = None
def build_user_item_matrix(self):
"""构建用户-商品矩阵"""
users = User.objects.all()
products = XinXi.objects.all()
matrix = np.zeros((len(users), len(products)))
for i, user in enumerate(users):
for j, product in enumerate(products):
# 基于收藏和评论计算用户对商品的偏好
favorites = Favorite.objects.filter(user=user, product=product).count()
comments = Comment.objects.filter(user=user, product=product).count()
rating = Comment.objects.filter(user=user, product=product).aggregate(Avg('rating'))['rating__avg'] or 0
matrix[i][j] = favorites * 2 + comments + rating
self.user_item_matrix = matrix
return matrix
def calculate_user_similarity(self):
"""计算用户相似度"""
if self.user_item_matrix is None:
self.build_user_item_matrix()
self.user_similarity = cosine_similarity(self.user_item_matrix)
return self.user_similarity
def get_recommendations_for_user(self, user_id, n=6):
"""为用户生成推荐"""
if self.user_similarity is None:
self.calculate_user_similarity()
users = list(User.objects.all())
user_index = next((i for i, u in enumerate(users) if u.id == user_id), None)
if user_index is None:
return []
# 找到最相似的用户
similar_users = np.argsort(self.user_similarity[user_index])[::-1][1:6]
# 基于相似用户的偏好生成推荐
recommendations = []
for similar_user_idx in similar_users:
similar_user = users[similar_user_idx]
user_favorites = Favorite.objects.filter(user=similar_user)
for favorite in user_favorites:
if not Favorite.objects.filter(user_id=user_id, product=favorite.product).exists():
recommendations.append(favorite.product)
if len(recommendations) >= n:
break
return recommendations[:n]
# 使用示例
def get_user_recommendations(user_id):
engine = RecommendationEngine()
return engine.get_recommendations_for_user(user_id)
🤖 机器学习算法
1. 销量预测模型
# buildmodel/build.py
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib
def build_sales_prediction_model():
"""构建销量预测模型"""
# 加载数据
df = pd.read_csv('result3.csv')
# 特征工程
le_brand = LabelEncoder()
df['brand_encoded'] = le_brand.fit_transform(df['brand'])
# 特征选择
features = ['price', 'brand_encoded', 'score']
X = df[features]
# 销量标签化
df['sales_category'] = pd.cut(df['sales'],
bins=[0, 100, 500, 1000, float('inf')],
labels=['低销量', '中等销量', '高销量', '超高销量'])
y = df['sales_category']
# 训练模型
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
# 评估模型
accuracy = rf_model.score(X_test, y_test)
print(f"模型准确率: {accuracy:.2%}")
# 保存模型
joblib.dump(rf_model, 'rf_sales_model.joblib')
# 保存标签编码器
joblib.dump(le_brand, 'brand_encoder.joblib')
return rf_model, accuracy
if __name__ == "__main__":
model, accuracy = build_sales_prediction_model()
print(f"销量预测模型训练完成,准确率: {accuracy:.2%}")
2. 情感分析实现
# myapp/sentiment_utils.py
from snownlp import SnowNLP
import jieba
from collections import Counter
def analyze_sentiment(text):
"""情感分析"""
s = SnowNLP(text)
sentiment_score = s.sentiments
if sentiment_score >= 0.6:
return '积极', sentiment_score
elif sentiment_score >= 0.4:
return '中性', sentiment_score
else:
return '消极', sentiment_score
def generate_wordcloud(comments):
"""生成词云数据"""
all_text = ' '.join([comment.content for comment in comments])
# 使用jieba分词
words = jieba.cut(all_text)
# 过滤停用词
stop_words = {'的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个', '上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看', '好', '自己', '这'}
filtered_words = [word for word in words if word not in stop_words and len(word) > 1]
# 统计词频
word_counts = Counter(filtered_words)
return dict(word_counts.most_common(50))
🎨 前端界面设计
1. 基础模板结构
<!-- templates/base.html -->
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>京东商品数据分析可视化系统</title>
<!-- Bootstrap CSS -->
<link href="/static/assets/css/bootstrap.min.css" rel="stylesheet">
<link href="/static/assets/css/app.css" rel="stylesheet">
<link href="/static/assets/css/icons.css" rel="stylesheet">
</head>
<body>
<div class="wrapper">
<!-- 侧边栏导航 -->
<div class="sidebar-wrapper" data-simplebar="true">
<ul class="metismenu" id="menu">
<li><a href="{% url 'index' %}"><i class='bx bx-home-circle'></i>首页</a></li>
<li class="menu-label">数据信息</li>
<li><a href="{% url 'ecommerce_products' %}"><i class='bx bx-cart'></i>商品列表</a></li>
<li><a href="{% url 'ecommerce_comment_list' %}"><i class='bx bx-bookmark-heart'></i>评论列表</a></li>
<li class="menu-label">可视化图表</li>
<li><a href="{% url 'widgets' %}"><i class='bx bx-cookie'></i>整体分析</a></li>
<li><a href="{% url 'chart' %}"><i class="bx bx-line-chart"></i>相关性分析</a></li>
<li><a href="{% url 'comment_chart' %}"><i class="bx bx-comment"></i>评论分析</a></li>
<li><a href="{% url 'sentiment_analysis' %}"><i class="bx bx-brain"></i>情感分析</a></li>
<li class="menu-label">机器学习算法</li>
<li><a href="{% url 'predict' %}"><i class="bx bx-lock"></i>销量预测</a></li>
<li class="menu-label">个人中心</li>
<li><a href="{% url 'recommendations' %}"><i class='bx bx-heart'></i>猜你喜欢</a></li>
<li><a href="{% url 'favorite_list' %}"><i class='bx bx-bookmark-heart'></i>我的收藏</a></li>
<li><a href="{% url 'my_comments' %}"><i class='bx bx-comment-detail'></i>我的评论</a></li>
</ul>
</div>
<!-- 主要内容区域 -->
<div class="page-wrapper">
{% block content %}{% endblock %}
</div>
</div>
<!-- JavaScript -->
<script src="/static/assets/js/bootstrap.bundle.min.js"></script>
<script src="/static/assets/js/jquery.min.js"></script>
<script src="/static/assets/plugins/chartjs/js/Chart.min.js"></script>
{% block jss %}{% endblock %}
</body>
</html>
2. 数据可视化实现
<!-- templates/widgets.html - 整体分析页面 -->
{% extends 'base.html' %}
{% block content %}
<div class="page-content">
<div class="row">
<!-- 统计卡片 -->
<div class="col-12 col-lg-3">
<div class="card radius-10">
<div class="card-body">
<div class="d-flex align-items-center">
<div class="">
<p class="mb-1">商品总数</p>
<h4 class="mb-0 text-primary">{{ total_products }}</h4>
</div>
<div class="ms-auto fs-2 text-primary">
<i class="bx bx-cart"></i>
</div>
</div>
</div>
</div>
</div>
<div class="col-12 col-lg-3">
<div class="card radius-10">
<div class="card-body">
<div class="d-flex align-items-center">
<div class="">
<p class="mb-1">用户评论</p>
<h4 class="mb-0 text-danger">{{ total_comments }}</h4>
</div>
<div class="ms-auto fs-2 text-danger">
<i class="bx bx-comment"></i>
</div>
</div>
</div>
</div>
</div>
<div class="col-12 col-lg-3">
<div class="card radius-10">
<div class="card-body">
<div class="d-flex align-items-center">
<div class="">
<p class="mb-1">收藏数量</p>
<h4 class="mb-0 text-success">{{ total_favorites }}</h4>
</div>
<div class="ms-auto fs-2 text-success">
<i class="bx bx-heart"></i>
</div>
</div>
</div>
</div>
</div>
<div class="col-12 col-lg-3">
<div class="card radius-10">
<div class="card-body">
<div class="d-flex align-items-center">
<div class="">
<p class="mb-1">注册用户</p>
<h4 class="mb-0 text-warning">{{ total_users }}</h4>
</div>
<div class="ms-auto fs-2 text-warning">
<i class="bx bx-user"></i>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- 图表展示 -->
<div class="row">
<div class="col-12 col-lg-6">
<div class="card radius-10">
<div class="card-body">
<div class="d-flex align-items-center mb-3">
<h6 class="mb-0">价格分布</h6>
</div>
<canvas id="priceChart"></canvas>
</div>
</div>
</div>
<div class="col-12 col-lg-6">
<div class="card radius-10">
<div class="card-body">
<div class="d-flex align-items-center mb-3">
<h6 class="mb-0">品牌分布</h6>
</div>
<canvas id="brandChart"></canvas>
</div>
</div>
</div>
</div>
</div>
<script>
// 价格分布图表
const priceCtx = document.getElementById('priceChart').getContext('2d');
const priceChart = new Chart(priceCtx, {
type: 'bar',
data: {
labels: ['0-1000', '1000-2000', '2000-3000', '3000-4000', '4000+'],
datasets: [{
label: '商品数量',
data: {{ price_distribution|safe }},
backgroundColor: 'rgba(54, 162, 235, 0.2)',
borderColor: 'rgba(54, 162, 235, 1)',
borderWidth: 1
}]
},
options: {
responsive: true,
scales: {
y: {
beginAtZero: true
}
}
}
});
// 品牌分布图表
const brandCtx = document.getElementById('brandChart').getContext('2d');
const brandChart = new Chart(brandCtx, {
type: 'doughnut',
data: {
labels: {{ brand_labels|safe }},
datasets: [{
data: {{ brand_data|safe }},
backgroundColor: [
'#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', '#9966FF',
'#FF9F40', '#FF6384', '#C9CBCF', '#4BC0C0', '#FF6384'
]
}]
},
options: {
responsive: true,
plugins: {
legend: {
position: 'bottom'
}
}
}
});
</script>
{% endblock %}
📊 数据可视化展示
1. 整体数据统计
- 商品总数: 1161款手机产品
- 用户评论: 1982条评论数据
- 收藏记录: 1003条收藏关系
- 注册用户: 22个用户账户
2. 价格分布分析
# 价格区间统计
price_ranges = {
'0-1000': 0,
'1000-2000': 0,
'2000-3000': 0,
'3000-4000': 0,
'4000+': 0
}
for product in XinXi.objects.all():
if product.price <= 1000:
price_ranges['0-1000'] += 1
elif product.price <= 2000:
price_ranges['1000-2000'] += 1
elif product.price <= 3000:
price_ranges['2000-3000'] += 1
elif product.price <= 4000:
price_ranges['3000-4000'] += 1
else:
price_ranges['4000+'] += 1
3. 品牌分布统计
# 品牌统计
brand_stats = XinXi.objects.values('brand').annotate(
count=Count('id'),
avg_price=Avg('price'),
avg_score=Avg('score')
).order_by('-count')
4. 情感分析结果
# 情感分析统计
sentiment_stats = {
'积极': 0,
'中性': 0,
'消极': 0
}
for comment in Comment.objects.all():
sentiment, score = analyze_sentiment(comment.content)
sentiment_stats[sentiment] += 1
🚀 部署与优化
1. 环境配置
# 创建虚拟环境
python -m venv venv
source venv/bin/activate # Linux/Mac
# venv\Scripts\activate # Windows
# 安装依赖
pip install -r requirements.txt
# 数据库配置
mysql -u root -p
CREATE DATABASE design_99_phone CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
2. Django设置
# settings.py
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'design_99_phone',
'USER': 'root',
'PASSWORD': 'your_password',
'HOST': 'localhost',
'PORT': '3306',
'OPTIONS': {
'charset': 'utf8mb4',
}
}
}
# 静态文件配置
STATIC_URL = '/static/'
STATIC_ROOT = os.path.join(BASE_DIR, 'staticfiles')
STATICFILES_DIRS = [
os.path.join(BASE_DIR, 'static'),
]
# 媒体文件配置
MEDIA_URL = '/media/'
MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
3. 性能优化
# 数据库查询优化
def get_products_with_optimization():
"""优化后的商品查询"""
return XinXi.objects.select_related().prefetch_related(
'comment_set', 'favorite_set'
).all()
# 缓存配置
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.redis.RedisCache',
'LOCATION': 'redis://127.0.0.1:6379/1',
}
}
# 分页优化
from django.core.paginator import Paginator
def paginate_products(request, products, per_page=20):
"""分页处理"""
paginator = Paginator(products, per_page)
page_number = request.GET.get('page')
page_obj = paginator.get_page(page_number)
return page_obj
🎯 项目特色
1. 技术亮点
- 完整的电商数据分析流程: 从数据收集到智能分析
- 高准确率机器学习模型: 销量预测准确率>85%
- 中文自然语言处理: 基于jieba和SnowNLP的情感分析
- 个性化推荐系统: 基于协同过滤的智能推荐
- 响应式界面设计: 支持多端访问
2. 数据规模
- 商品数据: 1161款手机产品信息
- 用户数据: 22个注册用户
- 评论数据: 1982条用户评论
- 收藏数据: 1003条收藏记录
3. 算法性能
- 销量预测: 随机森林算法,准确率85%+
- 情感分析: 中文文本情感识别
- 推荐系统: 协同过滤算法
- 词云分析: 基于jieba分词
📈 系统监控
1. 性能指标
# 系统性能监控
import time
from django.db import connection
def performance_monitor(func):
"""性能监控装饰器"""
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
# 记录执行时间
execution_time = end_time - start_time
# 记录数据库查询次数
query_count = len(connection.queries)
print(f"函数 {func.__name__} 执行时间: {execution_time:.2f}秒")
print(f"数据库查询次数: {query_count}")
return result
return wrapper
2. 错误处理
# 全局异常处理
from django.http import JsonResponse
from django.core.exceptions import ObjectDoesNotExist
def handle_exception(func):
"""异常处理装饰器"""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except ObjectDoesNotExist:
return JsonResponse({'error': '数据不存在'}, status=404)
except Exception as e:
return JsonResponse({'error': str(e)}, status=500)
return wrapper
🔒 安全特性
1. 用户认证
# 用户认证装饰器
from django.contrib.auth.decorators import login_required
from django.shortcuts import redirect
@login_required
def protected_view(request):
"""需要登录的视图"""
return render(request, 'protected.html')
# 权限控制
from django.contrib.auth.mixins import LoginRequiredMixin
class ProtectedView(LoginRequiredMixin, View):
login_url = '/login/'
redirect_field_name = 'next'
2. 数据验证
# 表单验证
from django import forms
class ProductForm(forms.ModelForm):
class Meta:
model = XinXi
fields = ['title', 'price', 'brand', 'score']
def clean_price(self):
price = self.cleaned_data['price']
if price <= 0:
raise forms.ValidationError('价格必须大于0')
return price
📝 总结与展望
项目成果
- 完整的数据分析平台: 集成了数据收集、分析、预测、推荐等完整功能
- 高准确率预测模型: 销量预测准确率达到85%以上
- 用户友好的界面: 响应式设计,支持多端访问
- 可扩展的架构: 模块化设计,便于功能扩展
技术收获
- Django全栈开发: 掌握了Django框架的完整开发流程
- 机器学习集成: 学会了在Web应用中集成机器学习算法
- 数据可视化: 掌握了Chart.js等图表库的使用
- 中文NLP: 学会了jieba、SnowNLP等中文处理工具
未来改进方向
- 算法优化: 尝试深度学习模型提升预测准确率
- 实时数据处理: 集成Kafka等消息队列处理实时数据
- 微服务架构: 将系统拆分为多个微服务
- 云原生部署: 使用Docker、Kubernetes进行容器化部署
📞 联系方式
- 联系方式: [码界筑梦坊各平台同名]