99-基于Python的京东手机数据分析及预测系统
基于Django的京东手机数据分析及预测系统 - 完整技术实现
一个集数据爬取、分析、机器学习预测、用户管理和推荐系统于一体的电商数据分析平台
📋 目录
- 项目概述
- 技术架构
- 核心功能实现
- 数据模型设计
- 机器学习算法
- 前端界面设计
- 部署与优化
- 项目特色
- 总结与展望
🎯 项目概述
项目背景
随着电商行业的快速发展,数据驱动的决策变得越来越重要。本项目基于京东手机商品数据,构建了一个完整的数据分析和预测系统,为电商运营提供数据支撑。
系统特点
- 数据规模: 1161款手机商品,1982条用户评论,1003条收藏记录
- 算法准确率: 销量预测准确率>85%
- 用户体验: 响应式设计,支持移动端和桌面端
- 技术栈: Django + MySQL + scikit-learn + Bootstrap
💻 项目展示
项目演示视频随后更新于哔哩哔哩个人主页,敬请关注!
🏗️ 技术架构
后端技术栈
# requirements.txt 核心依赖
Django==4.2.20 # Web框架
pymysql==1.1.0 # MySQL驱动
pandas==2.0.3 # 数据处理
scikit-learn==1.3.0 # 机器学习
jieba==0.42.1 # 中文分词
snownlp==0.12.3 # 情感分析
django-simpleui==2023.12.30 # 后台美化
pillow==10.0.1 # 图片处理
faker==19.6.2 # 模拟数据
前端技术栈
<!-- 核心CSS框架 -->
<link href="/static/assets/css/bootstrap.min.css" rel="stylesheet">
<link href="/static/assets/css/app.css" rel="stylesheet"><!-- 图表库 -->
<script src="/static/assets/plugins/chartjs/js/Chart.min.js"></script><!-- 图标库 -->
<link href="/static/assets/css/icons.css" rel="stylesheet">
项目结构
jd_djangoProject/
├── myapp/ # 主应用模块
│ ├── models.py # 数据模型定义
│ ├── views.py # 视图控制器(866行)
│ ├── urls.py # URL路由配置
│ ├── admin.py # 后台管理配置
│ ├── recommendation.py # 推荐系统引擎
│ └── management/ # Django管理命令
├── templates/ # HTML模板文件
│ ├── base.html # 基础模板
│ ├── index.html # 首页模板
│ ├── predict.html # 销量预测模板
│ └── sentiment_analysis.html # 情感分析模板
├── buildmodel/ # 机器学习模型
│ ├── build.py # 模型训练脚本
│ ├── rf_sales_model.joblib # 随机森林模型
│ └── result3.csv # 训练数据集(1178条)
├── static/ # 静态资源文件
├── media/ # 媒体文件存储
└── speculate.py # 销量预测接口
🔧 核心功能实现
1. 数据模型设计
# myapp/models.py
from django.db import models
from django.contrib.auth.models import Userclass XinXi(models.Model):"""商品信息模型"""title = models.CharField(max_length=500, verbose_name='商品标题')price = models.DecimalField(max_digits=10, decimal_places=2, verbose_name='价格')brand = models.CharField(max_length=100, verbose_name='品牌')score = models.DecimalField(max_digits=3, decimal_places=2, null=True, verbose_name='评分')sales = models.IntegerField(null=True, verbose_name='销量')class Meta:verbose_name = '商品信息'verbose_name_plural = verbose_nameclass Comment(models.Model):"""评论模型"""user = models.ForeignKey(User, on_delete=models.CASCADE, verbose_name='用户')product = models.ForeignKey(XinXi, on_delete=models.CASCADE, verbose_name='商品')content = models.TextField(verbose_name='评论内容')rating = models.IntegerField(choices=[(i, i) for i in range(1, 6)], verbose_name='评分')created_at = models.DateTimeField(auto_now_add=True, verbose_name='评论时间')class Meta:verbose_name = '用户评论'verbose_name_plural = verbose_nameclass Favorite(models.Model):"""收藏模型"""user = models.ForeignKey(User, on_delete=models.CASCADE, verbose_name='用户')product = models.ForeignKey(XinXi, on_delete=models.CASCADE, verbose_name='商品')created_at = models.DateTimeField(auto_now_add=True, verbose_name='收藏时间')class Meta:verbose_name = '用户收藏'verbose_name_plural = verbose_name
2. 视图控制器实现
# myapp/views.py
from django.shortcuts import render, redirect
from django.contrib.auth.decorators import login_required
from django.db.models import Count, Avg
from .models import XinXi, Comment, Favorite
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblibdef index(request):"""首页视图"""# 统计数据total_products = XinXi.objects.count()total_comments = Comment.objects.count()total_favorites = Favorite.objects.count()total_users = User.objects.count()context = {'total_products': total_products,'total_comments': total_comments,'total_favorites': total_favorites,'total_users': total_users,}return render(request, 'index.html', context)@login_required
def predict_sales(request):"""销量预测视图"""if request.method == 'POST':price = float(request.POST.get('price'))brand = request.POST.get('brand')score = float(request.POST.get('score'))# 加载训练好的模型model = joblib.load('buildmodel/rf_sales_model.joblib')# 特征工程features = np.array([[price, brand_encoding[brand], score]])prediction = model.predict(features)[0]context = {'prediction': prediction,'features': {'price': price, 'brand': brand, 'score': score}}return render(request, 'predict.html', context)return render(request, 'predict.html')def sentiment_analysis(request):"""情感分析视图"""from snownlp import SnowNLPcomments = Comment.objects.all()sentiments = []for comment in comments:s = SnowNLP(comment.content)sentiment_score = s.sentimentsif sentiment_score >= 0.6:sentiment = '积极'elif sentiment_score >= 0.4:sentiment = '中性'else:sentiment = '消极'sentiments.append(sentiment)# 统计情感分布sentiment_counts = pd.Series(sentiments).value_counts()context = {'sentiment_data': sentiment_counts.to_dict(),'total_comments': len(comments)}return render(request, 'sentiment_analysis.html', context)
3. 推荐系统实现
# myapp/recommendation.py
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from .models import User, XinXi, Favorite, Commentclass RecommendationEngine:def __init__(self):self.user_item_matrix = Noneself.user_similarity = Nonedef build_user_item_matrix(self):"""构建用户-商品矩阵"""users = User.objects.all()products = XinXi.objects.all()matrix = np.zeros((len(users), len(products)))for i, user in enumerate(users):for j, product in enumerate(products):# 基于收藏和评论计算用户对商品的偏好favorites = Favorite.objects.filter(user=user, product=product).count()comments = Comment.objects.filter(user=user, product=product).count()rating = Comment.objects.filter(user=user, product=product).aggregate(Avg('rating'))['rating__avg'] or 0matrix[i][j] = favorites * 2 + comments + ratingself.user_item_matrix = matrixreturn matrixdef calculate_user_similarity(self):"""计算用户相似度"""if self.user_item_matrix is None:self.build_user_item_matrix()self.user_similarity = cosine_similarity(self.user_item_matrix)return self.user_similaritydef get_recommendations_for_user(self, user_id, n=6):"""为用户生成推荐"""if self.user_similarity is None:self.calculate_user_similarity()users = list(User.objects.all())user_index = next((i for i, u in enumerate(users) if u.id == user_id), None)if user_index is None:return []# 找到最相似的用户similar_users = np.argsort(self.user_similarity[user_index])[::-1][1:6]# 基于相似用户的偏好生成推荐recommendations = []for similar_user_idx in similar_users:similar_user = users[similar_user_idx]user_favorites = Favorite.objects.filter(user=similar_user)for favorite in user_favorites:if not Favorite.objects.filter(user_id=user_id, product=favorite.product).exists():recommendations.append(favorite.product)if len(recommendations) >= n:breakreturn recommendations[:n]# 使用示例
def get_user_recommendations(user_id):engine = RecommendationEngine()return engine.get_recommendations_for_user(user_id)
🤖 机器学习算法
1. 销量预测模型
# buildmodel/build.py
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblibdef build_sales_prediction_model():"""构建销量预测模型"""# 加载数据df = pd.read_csv('result3.csv')# 特征工程le_brand = LabelEncoder()df['brand_encoded'] = le_brand.fit_transform(df['brand'])# 特征选择features = ['price', 'brand_encoded', 'score']X = df[features]# 销量标签化df['sales_category'] = pd.cut(df['sales'], bins=[0, 100, 500, 1000, float('inf')],labels=['低销量', '中等销量', '高销量', '超高销量'])y = df['sales_category']# 训练模型X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)rf_model = RandomForestClassifier(n_estimators=100, random_state=42)rf_model.fit(X_train, y_train)# 评估模型accuracy = rf_model.score(X_test, y_test)print(f"模型准确率: {accuracy:.2%}")# 保存模型joblib.dump(rf_model, 'rf_sales_model.joblib')# 保存标签编码器joblib.dump(le_brand, 'brand_encoder.joblib')return rf_model, accuracyif __name__ == "__main__":model, accuracy = build_sales_prediction_model()print(f"销量预测模型训练完成,准确率: {accuracy:.2%}")
2. 情感分析实现
# myapp/sentiment_utils.py
from snownlp import SnowNLP
import jieba
from collections import Counterdef analyze_sentiment(text):"""情感分析"""s = SnowNLP(text)sentiment_score = s.sentimentsif sentiment_score >= 0.6:return '积极', sentiment_scoreelif sentiment_score >= 0.4:return '中性', sentiment_scoreelse:return '消极', sentiment_scoredef generate_wordcloud(comments):"""生成词云数据"""all_text = ' '.join([comment.content for comment in comments])# 使用jieba分词words = jieba.cut(all_text)# 过滤停用词stop_words = {'的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个', '上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看', '好', '自己', '这'}filtered_words = [word for word in words if word not in stop_words and len(word) > 1]# 统计词频word_counts = Counter(filtered_words)return dict(word_counts.most_common(50))
🎨 前端界面设计
1. 基础模板结构
<!-- templates/base.html -->
<!doctype html>
<html lang="en">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>京东商品数据分析可视化系统</title><!-- Bootstrap CSS --><link href="/static/assets/css/bootstrap.min.css" rel="stylesheet"><link href="/static/assets/css/app.css" rel="stylesheet"><link href="/static/assets/css/icons.css" rel="stylesheet">
</head>
<body>
<div class="wrapper"><!-- 侧边栏导航 --><div class="sidebar-wrapper" data-simplebar="true"><ul class="metismenu" id="menu"><li><a href="{% url 'index' %}"><i class='bx bx-home-circle'></i>首页</a></li><li class="menu-label">数据信息</li><li><a href="{% url 'ecommerce_products' %}"><i class='bx bx-cart'></i>商品列表</a></li><li><a href="{% url 'ecommerce_comment_list' %}"><i class='bx bx-bookmark-heart'></i>评论列表</a></li><li class="menu-label">可视化图表</li><li><a href="{% url 'widgets' %}"><i class='bx bx-cookie'></i>整体分析</a></li><li><a href="{% url 'chart' %}"><i class="bx bx-line-chart"></i>相关性分析</a></li><li><a href="{% url 'comment_chart' %}"><i class="bx bx-comment"></i>评论分析</a></li><li><a href="{% url 'sentiment_analysis' %}"><i class="bx bx-brain"></i>情感分析</a></li><li class="menu-label">机器学习算法</li><li><a href="{% url 'predict' %}"><i class="bx bx-lock"></i>销量预测</a></li><li class="menu-label">个人中心</li><li><a href="{% url 'recommendations' %}"><i class='bx bx-heart'></i>猜你喜欢</a></li><li><a href="{% url 'favorite_list' %}"><i class='bx bx-bookmark-heart'></i>我的收藏</a></li><li><a href="{% url 'my_comments' %}"><i class='bx bx-comment-detail'></i>我的评论</a></li></ul></div><!-- 主要内容区域 --><div class="page-wrapper">{% block content %}{% endblock %}</div>
</div><!-- JavaScript -->
<script src="/static/assets/js/bootstrap.bundle.min.js"></script>
<script src="/static/assets/js/jquery.min.js"></script>
<script src="/static/assets/plugins/chartjs/js/Chart.min.js"></script>
{% block jss %}{% endblock %}
</body>
</html>
2. 数据可视化实现
<!-- templates/widgets.html - 整体分析页面 -->
{% extends 'base.html' %}
{% block content %}
<div class="page-content"><div class="row"><!-- 统计卡片 --><div class="col-12 col-lg-3"><div class="card radius-10"><div class="card-body"><div class="d-flex align-items-center"><div class=""><p class="mb-1">商品总数</p><h4 class="mb-0 text-primary">{{ total_products }}</h4></div><div class="ms-auto fs-2 text-primary"><i class="bx bx-cart"></i></div></div></div></div></div><div class="col-12 col-lg-3"><div class="card radius-10"><div class="card-body"><div class="d-flex align-items-center"><div class=""><p class="mb-1">用户评论</p><h4 class="mb-0 text-danger">{{ total_comments }}</h4></div><div class="ms-auto fs-2 text-danger"><i class="bx bx-comment"></i></div></div></div></div></div><div class="col-12 col-lg-3"><div class="card radius-10"><div class="card-body"><div class="d-flex align-items-center"><div class=""><p class="mb-1">收藏数量</p><h4 class="mb-0 text-success">{{ total_favorites }}</h4></div><div class="ms-auto fs-2 text-success"><i class="bx bx-heart"></i></div></div></div></div></div><div class="col-12 col-lg-3"><div class="card radius-10"><div class="card-body"><div class="d-flex align-items-center"><div class=""><p class="mb-1">注册用户</p><h4 class="mb-0 text-warning">{{ total_users }}</h4></div><div class="ms-auto fs-2 text-warning"><i class="bx bx-user"></i></div></div></div></div></div></div><!-- 图表展示 --><div class="row"><div class="col-12 col-lg-6"><div class="card radius-10"><div class="card-body"><div class="d-flex align-items-center mb-3"><h6 class="mb-0">价格分布</h6></div><canvas id="priceChart"></canvas></div></div></div><div class="col-12 col-lg-6"><div class="card radius-10"><div class="card-body"><div class="d-flex align-items-center mb-3"><h6 class="mb-0">品牌分布</h6></div><canvas id="brandChart"></canvas></div></div></div></div>
</div><script>
// 价格分布图表
const priceCtx = document.getElementById('priceChart').getContext('2d');
const priceChart = new Chart(priceCtx, {type: 'bar',data: {labels: ['0-1000', '1000-2000', '2000-3000', '3000-4000', '4000+'],datasets: [{label: '商品数量',data: {{ price_distribution|safe }},backgroundColor: 'rgba(54, 162, 235, 0.2)',borderColor: 'rgba(54, 162, 235, 1)',borderWidth: 1}]},options: {responsive: true,scales: {y: {beginAtZero: true}}}
});// 品牌分布图表
const brandCtx = document.getElementById('brandChart').getContext('2d');
const brandChart = new Chart(brandCtx, {type: 'doughnut',data: {labels: {{ brand_labels|safe }},datasets: [{data: {{ brand_data|safe }},backgroundColor: ['#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', '#9966FF','#FF9F40', '#FF6384', '#C9CBCF', '#4BC0C0', '#FF6384']}]},options: {responsive: true,plugins: {legend: {position: 'bottom'}}}
});
</script>
{% endblock %}
📊 数据可视化展示
1. 整体数据统计
- 商品总数: 1161款手机产品
- 用户评论: 1982条评论数据
- 收藏记录: 1003条收藏关系
- 注册用户: 22个用户账户
2. 价格分布分析
# 价格区间统计
price_ranges = {'0-1000': 0,'1000-2000': 0,'2000-3000': 0,'3000-4000': 0,'4000+': 0
}for product in XinXi.objects.all():if product.price <= 1000:price_ranges['0-1000'] += 1elif product.price <= 2000:price_ranges['1000-2000'] += 1elif product.price <= 3000:price_ranges['2000-3000'] += 1elif product.price <= 4000:price_ranges['3000-4000'] += 1else:price_ranges['4000+'] += 1
3. 品牌分布统计
# 品牌统计
brand_stats = XinXi.objects.values('brand').annotate(count=Count('id'),avg_price=Avg('price'),avg_score=Avg('score')
).order_by('-count')
4. 情感分析结果
# 情感分析统计
sentiment_stats = {'积极': 0,'中性': 0,'消极': 0
}for comment in Comment.objects.all():sentiment, score = analyze_sentiment(comment.content)sentiment_stats[sentiment] += 1
🚀 部署与优化
1. 环境配置
# 创建虚拟环境
python -m venv venv
source venv/bin/activate # Linux/Mac
# venv\Scripts\activate # Windows# 安装依赖
pip install -r requirements.txt# 数据库配置
mysql -u root -p
CREATE DATABASE design_99_phone CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
2. Django设置
# settings.py
DATABASES = {'default': {'ENGINE': 'django.db.backends.mysql','NAME': 'design_99_phone','USER': 'root','PASSWORD': 'your_password','HOST': 'localhost','PORT': '3306','OPTIONS': {'charset': 'utf8mb4',}}
}# 静态文件配置
STATIC_URL = '/static/'
STATIC_ROOT = os.path.join(BASE_DIR, 'staticfiles')
STATICFILES_DIRS = [os.path.join(BASE_DIR, 'static'),
]# 媒体文件配置
MEDIA_URL = '/media/'
MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
3. 性能优化
# 数据库查询优化
def get_products_with_optimization():"""优化后的商品查询"""return XinXi.objects.select_related().prefetch_related('comment_set', 'favorite_set').all()# 缓存配置
CACHES = {'default': {'BACKEND': 'django.core.cache.backends.redis.RedisCache','LOCATION': 'redis://127.0.0.1:6379/1',}
}# 分页优化
from django.core.paginator import Paginatordef paginate_products(request, products, per_page=20):"""分页处理"""paginator = Paginator(products, per_page)page_number = request.GET.get('page')page_obj = paginator.get_page(page_number)return page_obj
🎯 项目特色
1. 技术亮点
- 完整的电商数据分析流程: 从数据收集到智能分析
- 高准确率机器学习模型: 销量预测准确率>85%
- 中文自然语言处理: 基于jieba和SnowNLP的情感分析
- 个性化推荐系统: 基于协同过滤的智能推荐
- 响应式界面设计: 支持多端访问
2. 数据规模
- 商品数据: 1161款手机产品信息
- 用户数据: 22个注册用户
- 评论数据: 1982条用户评论
- 收藏数据: 1003条收藏记录
3. 算法性能
- 销量预测: 随机森林算法,准确率85%+
- 情感分析: 中文文本情感识别
- 推荐系统: 协同过滤算法
- 词云分析: 基于jieba分词
📈 系统监控
1. 性能指标
# 系统性能监控
import time
from django.db import connectiondef performance_monitor(func):"""性能监控装饰器"""def wrapper(*args, **kwargs):start_time = time.time()result = func(*args, **kwargs)end_time = time.time()# 记录执行时间execution_time = end_time - start_time# 记录数据库查询次数query_count = len(connection.queries)print(f"函数 {func.__name__} 执行时间: {execution_time:.2f}秒")print(f"数据库查询次数: {query_count}")return resultreturn wrapper
2. 错误处理
# 全局异常处理
from django.http import JsonResponse
from django.core.exceptions import ObjectDoesNotExistdef handle_exception(func):"""异常处理装饰器"""def wrapper(*args, **kwargs):try:return func(*args, **kwargs)except ObjectDoesNotExist:return JsonResponse({'error': '数据不存在'}, status=404)except Exception as e:return JsonResponse({'error': str(e)}, status=500)return wrapper
🔒 安全特性
1. 用户认证
# 用户认证装饰器
from django.contrib.auth.decorators import login_required
from django.shortcuts import redirect@login_required
def protected_view(request):"""需要登录的视图"""return render(request, 'protected.html')# 权限控制
from django.contrib.auth.mixins import LoginRequiredMixinclass ProtectedView(LoginRequiredMixin, View):login_url = '/login/'redirect_field_name = 'next'
2. 数据验证
# 表单验证
from django import formsclass ProductForm(forms.ModelForm):class Meta:model = XinXifields = ['title', 'price', 'brand', 'score']def clean_price(self):price = self.cleaned_data['price']if price <= 0:raise forms.ValidationError('价格必须大于0')return price
📝 总结与展望
项目成果
- 完整的数据分析平台: 集成了数据收集、分析、预测、推荐等完整功能
- 高准确率预测模型: 销量预测准确率达到85%以上
- 用户友好的界面: 响应式设计,支持多端访问
- 可扩展的架构: 模块化设计,便于功能扩展
技术收获
- Django全栈开发: 掌握了Django框架的完整开发流程
- 机器学习集成: 学会了在Web应用中集成机器学习算法
- 数据可视化: 掌握了Chart.js等图表库的使用
- 中文NLP: 学会了jieba、SnowNLP等中文处理工具
未来改进方向
- 算法优化: 尝试深度学习模型提升预测准确率
- 实时数据处理: 集成Kafka等消息队列处理实时数据
- 微服务架构: 将系统拆分为多个微服务
- 云原生部署: 使用Docker、Kubernetes进行容器化部署
📞 联系方式
- 联系方式: [码界筑梦坊各平台同名]