Node+Cheerio+Express+Vue+Request完成页面抓取

发布于:2025-05-20 ⋅ 阅读:(18) ⋅ 点赞:(0)

        实现了一个基于 Express 框架的简单爬虫应用,用于从指定的网页中抓取数据,并将数据整理后通过 HTTP 接口返回。以下是对代码的详细解析:

界面实现

后端

模块导入

const express = require('express');
const router = express.Router();
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');

  • express:用于创建 web 服务器和路由。
  • request:用于发送 HTTP 请求,获取网页内容。
  • cheerio:用于解析 HTML 文档,类似于 jQuery 的语法。
  • fs:用于文件系统操作(虽然代码中未使用)。

常量定义

const url = 'https://www.maigoo.com/top/427945.html';
const objWithUrl = {};
const ScoreData = [];
const TuiJianDataMessage = [];
const JingDianMessage = [];
  • url:目标网页的 URL。
  • objWithUrl:用于存储最终整合后的数据。
  • ScoreDataTuiJianDataMessageJingDianMessage:分别用于存储从网页中提取的不同类型的数据。

数据提取函数

function ScoreFunction($) {
    this.ScoreData = [];
    $('.md_citiao >.rowlist').find('.pbox').each(function (index, value) {
        ScoreData.push({
            id: index + 1,
            paiming: $(value).find('.pitem>.item>.num').text(),
            detailPage: $(value).find('.pitem>.item>.md_title>.font18').attr('href'),
            title: $(value).find('.pitem>.item>.md_title>a>i').text(),
            tese_em: $(value).find('.pitem>.item>.md_title>.tese_em').map(function () {
                return $(this).text();
            }).get(),
            score: $(value).find('.pitem>.item>.attention>.att').text(),
        });
    });
}
  • ScoreFunction:从网页中提取评分相关的数据,并将其存储在 ScoreData 数组中。
function TuiJianData($) {
    this.TuiJianData = [];
    $('.md_citiao >.citiaobtnlist').find('.dhidden').each(function (index, value) {
        TuiJianDataMessage.push({
            id: index + 1,
            name: $(value).find('a').text(),
            url: $(value).find('a').attr('href'),
        });
    });
}
  • TuiJianData:从网页中提取推荐数据,并将其存储在 TuiJianDataMessage 数组中。
function JingDianData($) {
    this.JingDianMessage = [];
    $('.mod_cont>.md_citiao').find('.pbox').each(function (index, value) {
        JingDianMessage.push({
            id: index + 1,
            title: $(value).find('.md_title>.title>a').text(),
            jingdianUrl: $(value).find('.md_title>.title>a').attr('href'),
            tese_em: $(value).find('.md_title>.sc_tese').map(function () {
                return $(this).text();
            }).get(),
            pic: $(value).find('.pic>.img>a>img').attr('src'),
            looktukuUrl: $(value).find('.pic>.img>.looktuku').attr('href') || '暂无图集',
            desc: $(value).find('.content').text(),
            descUrl: $(value).find('.content>a').attr('href'),
            address: $(value).find('.address').text(),
        });
    });
}
  • JingDianData:从网页中提取景点相关的数据,并将其存储在 JingDianMessage 数组中。

数据整合函数

function ZhengHeData() {
    objWithUrl.ScoreData = ScoreData.splice(0, 10);
    objWithUrl.TuiJianDataMessage = TuiJianDataMessage.splice(0, 10);
    objWithUrl.JingDianMessage = JingDianMessage.splice(0, 10);
    return objWithUrl;
}
  • ZhengHeData:将提取的数据整合到一个对象中,并返回该对象。

路由处理

router.get('/data', (req, res) => {
    request(url, function (error, response, body) {
        if (!error && response.statusCode == 200) {
            const $ = cheerio.load(body);
            ScoreFunction($);
            TuiJianData($);
            JingDianData($);
            res.send({
                code: 1,
                msg: '数据获取成功!',
                data: ZhengHeData()
            });
        } else {
            res.send({
                code: 0,
                msg: '数据获取失败!',
                data: null
            });
        }
    });
});
  • /data 路由:当访问 /data 时,发送 HTTP 请求获取网页内容,解析并提取数据,最后将整合后的数据返回给客户端。

模块导出

module.exports = router;
  • 将路由模块导出,以便在其他文件中使用。

完整代码

const express = require('express');
const router = express.Router();
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs')

const url = 'https://www.maigoo.com/top/427945.html';
const objWithUrl = {}
const ScoreData = []
const TuiJianDataMessage = []
const JingDianMessage = []

function ScoreFunction($) {
    this.ScoreData = []
    $('.md_citiao >.rowlist').find('.pbox').each(function (index, value) {
        ScoreData.push({
            id: index + 1,
            paiming: $(value).find('.pitem>.item>.num').text(),
            detailPage: $(value).find('.pitem>.item>.md_title>.font18').attr('href'),
            title: $(value).find('.pitem>.item>.md_title>a>i').text(),
            tese_em: $(value).find('.pitem>.item>.md_title>.tese_em').map(function () {
                return $(this).text();
            }).get(),
            score: $(value).find('.pitem>.item>.attention>.att').text(),
        })
    })
}

function TuiJianData($) {
    this.TuiJianData = []
    $('.md_citiao >.citiaobtnlist').find('.dhidden').each(function (index, value) {
        TuiJianDataMessage.push({
            id: index + 1,
            name: $(value).find('a').text(),
            url: $(value).find('a').attr('href'),
        })
    })
}

function JingDianData($) {
    this.JingDianMessage = []
    $('.mod_cont>.md_citiao').find('.pbox').each(function (index, value) {
        JingDianMessage.push({
            id: index + 1,
            title: $(value).find('.md_title>.title>a').text(),
            jingdianUrl: $(value).find('.md_title>.title>a').attr('href'),
            tese_em: $(value).find('.md_title>.sc_tese').map(function () {
                return $(this).text();
            }).get(),
            pic: $(value).find('.pic>.img>a>img').attr('src'),
            looktukuUrl: $(value).find('.pic>.img>.looktuku').attr('href') || '暂无图集',
            desc: $(value).find('.content').text(),
            descUrl: $(value).find('.content>a').attr('href'),
            address: $(value).find('.address').text(),
        })
    })
}

function ZhengHeData() {
    objWithUrl.ScoreData = ScoreData.splice(0, 10);
    objWithUrl.TuiJianDataMessage = TuiJianDataMessage.splice(0, 10);
    objWithUrl.JingDianMessage = JingDianMessage.splice(0, 10);
    return objWithUrl;
}

router.get('/data', (req, res) => {
    request(url, function (error, response, body) {
        if (!error && response.statusCode == 200) {
            const $ = cheerio.load(body);
            ScoreFunction($)
            TuiJianData($)
            JingDianData($)
            res.send({
                code: 1,
                msg: '数据获取成功!',
                data: ZhengHeData()
            })
        } else {
            res.send({
                code: 0,
                msg: '数据获取失败!',
                data: null
            });
        }
    });

})


module.exports = router;

前端

数据获取与初始化

created 钩子中,通过 shuJuList API 获取数据,并将数据分别赋值给 ScoreDataTuiJianDataMessageJingDianMessage。如果数据获取失败,捕获错误并输出到控制台。

async created() {
  try {
    const res = await shuJuList();
    this.ScoreData = res.data.data.ScoreData;
    this.TuiJianDataMessage = res.data.data.TuiJianDataMessage;
    this.JingDianMessage = res.data.data.JingDianMessage;
  } catch (error) {
    console.error("数据获取失败:", error);
  }
}

ECharts 初始化

mounted 钩子中,调用 initEcharts1initEcharts2 方法初始化两个 ECharts 图表。

mounted() {
  this.initEcharts1();
  this.initEcharts2();
}

ECharts 配置与渲染

initEcharts 方法用于初始化饼图,initPieChart 方法用于配置和渲染具体的饼图。通过传入容器 ID、标题、数据和颜色数组,动态生成饼图。

methods: {
  initEcharts() {
    this.initPieChart('echarts-container1', '景区售票占比', this.getScenicData(), ['#ff7f50', '#87cefa', '#da70d6', '#32cd32']);
    this.initPieChart('echarts-container2', '人数占比', this.getVisitorData(), ['#ff69b4', '#add8e6', '#90ee90', '#ffa07a']);
  },
  initPieChart(containerId, titleText, data, colors) {
    var myChart = echarts.init(document.getElementById(containerId));
    var option = {
      title: {
        text: titleText,
        left: 'center',
      },
      tooltip: {
        trigger: 'item',
        formatter: '{a} <br/>{b}: {c} ({d}%)',
      },
      legend: {
        orient: 'vertical',
        left: 10,
        data: data.map(item => item.name),
      },
      series: [
        {
          name: '数据',
          type: 'pie',
          radius: '55%',
          center: ['50%', '60%'],
          data: data,
          roseType: 'area',
          itemStyle: {
            color: function (params) {
              return colors[params.dataIndex];
            },
          },
          emphasis: {
            itemStyle: {
              shadowBlur: 10,
              shadowOffsetX: 0,
              shadowColor: 'rgba(0, 0, 0, 0.5)',
            },
          },
        },
      ],
    };
    myChart.setOption(option);
  }
}

数据获取方法

getScenicDatagetVisitorData 方法用于获取景区售票和人数占比的数据。这些数据可以是从 API 获取的,也可以是本地模拟的。

methods: {
  getScenicData() {
    return [
      { name: '景区A', value: 40 },
      { name: '景区B', value: 30 },
      { name: '景区C', value: 20 },
      { name: '景区D', value: 10 },
    ];
  },
  getVisitorData() {
    return [
      { name: '游客A', value: 50 },
      { name: '游客B', value: 30 },
      { name: '游客C', value: 15 },
      { name: '游客D', value: 5 },
    ];
  }
}

样式与布局

在模板中,使用 el-rowel-col 进行布局,左侧展示排名和推荐景点,右侧展示详细信息与 ECharts 图表。通过 el-carousel 实现轮播效果,展示景点的详细信息。

<template>
  <div class="dashboard-container">
    <el-row :gutter="20">
      <el-col :span="8">
        <div class="card ranking-list">
          <h3 class="card-title">景点综合排名</h3>
          <div v-for="item in ScoreData" :key="item.id" class="ranking-item">
            <span class="rank-number">{{ item.paiming }}</span>
            <div class="content">
              <h4>
                <a :href="item.detailPage" target="_blank">{{ item.title }}</a>
              </h4>
              <div class="tags">
                <span v-for="(tag, index) in item.tese_em" :key="index" class="tag">{{ tag }}</span>
              </div>
              <div class="score">{{ item.score }}</div>
            </div>
          </div>
        </div>
        <div class="card recommendation">
          <h3 class="card-title">推荐景点</h3>
          <div class="tags-group">
            <a v-for="item in TuiJianDataMessage" :key="item.id" :href="item.url" target="_blank" class="recommend-tag">{{ item.name }}</a>
          </div>
        </div>
      </el-col>
      <el-col :span="16">
        <el-carousel loop="false" autoplay="false" motion-blur>
          <el-carousel-item v-for="item in JingDianMessage" class="card detail-card" height="530px" :key="item.id">
            <h3 class="card-title">{{ item.title }}</h3>
            <div class="detail-content">
              <img :src="item.pic" class="detail-image" />
              <div class="detail-info">
                <div class="tags">
                  <span v-for="(tag, index) in item.tese_em[0].split('\t')" :key="index" class="tag">{{ tag.trim() }}</span>
                </div>
                <p class="desc">{{ item.desc.replace(/\n|\t/g, "") }}</p>
                <div class="address">
                  <i class="el-icon-location"></i>
                  {{ item.address }}
                </div>
                <a :href="item.looktukuUrl" target="_blank" class="image-link">查看图集</a>
                <a :href="item.jingdianUrl" target="_blank" class="image-link">景点详情</a>
              </div>
            </div>
          </el-carousel-item>
        </el-carousel>
        <div class="card dashboard-echarts">
          <div class="echarts" id="echarts-container1"></div>
          <div class="echarts" id="echarts-container2"></div>
        </div>
      </el-col>
    </el-row>
  </div>
</template>

样式优化

通过 CSS 对卡片、排名列表、推荐标签、详细信息等元素进行样式优化,确保页面布局美观且易于阅读。

.dashboard-container {
  padding: 20px;
}
.card {
  background: #fff;
  border-radius: 8px;
  padding: 20px;
  margin-bottom: 20px;
  box-shadow: 0 2px 12px 0 rgba(0, 0, 0, 0.1);
}
.ranking-item {
  display: flex;
  align-items: center;
  margin-bottom: 10px;
}
.rank-number {
  font-size: 18px;
  font-weight: bold;
  margin-right: 10px;
}
.tags {
  display: flex;
  flex-wrap: wrap;
}
.tag {
  background: #f0f0f0;
  padding: 2px 8px;
  border-radius: 4px;
  margin-right: 5px;
  margin-bottom: 5px;
}
.recommend-tag {
  display: inline-block;
  padding: 5px 10px;
  background: #f0f0f0;
  border-radius: 4px;
  margin-right: 10px;
  margin-bottom: 10px;
}
.detail-image {
  width: 100%;
  height: auto;
  border-radius: 8px;
}
.detail-info {
  margin-top: 10px;
}
.image-link {
  display: inline-block;
  margin-right: 10px;
  color: #409eff;
  text-decoration: none;
}
.echarts {
  width: 100%;
  height: 300px;
}

完整代码 

<template>
  <div class="dashboard-container">
    <el-row :gutter="20">
      <!-- 左侧排名 -->
      <el-col :span="8">
        <div class="card ranking-list">
          <h3 class="card-title">景点综合排名</h3>
          <div v-for="item in ScoreData" :key="item.id" class="ranking-item">
            <span class="rank-number">{{ item.paiming }}</span>
            <div class="content">
              <h4>
                <a :href="item.detailPage" target="_blank">{{ item.title }}</a>
              </h4>
              <div class="tags">
                <span
                  v-for="(tag, index) in item.tese_em"
                  :key="index"
                  class="tag"
                  >{{ tag }}</span
                >
              </div>
              <div class="score">{{ item.score }}</div>
            </div>
          </div>
        </div>

        <div class="card recommendation">
          <h3 class="card-title">推荐景点</h3>
          <div class="tags-group">
            <a
              v-for="item in TuiJianDataMessage"
              :key="item.id"
              :href="item.url"
              target="_blank"
              class="recommend-tag"
              >{{ item.name }}</a
            >
          </div>
        </div>
      </el-col>

      <!-- 右侧详细信息 -->
      <el-col :span="16">
        <el-carousel loop="false" autoplay="false" motion-blur>
          <el-carousel-item
            v-for="item in JingDianMessage"
            class="card detail-card"
            height="530px"
            :key="item.id"
          >
            <h3 class="card-title">{{ item.title }}</h3>
            <div class="detail-content">
              <img :src="item.pic" class="detail-image" />
              <div class="detail-info">
                <div class="tags">
                  <span
                    v-for="(tag, index) in item.tese_em[0].split('\t')"
                    :key="index"
                    class="tag"
                    >{{ tag.trim() }}</span
                  >
                </div>
                <p class="desc">{{ item.desc.replace(/\n|\t/g, "") }}</p>
                <div class="address">
                  <i class="el-icon-location"></i>
                  {{ item.address }}
                </div>
                <a :href="item.looktukuUrl" target="_blank" class="image-link"
                  >查看图集</a
                >
                <a :href="item.jingdianUrl" target="_blank" class="image-link"
                  >景点详情</a
                >
              </div>
            </div>
          </el-carousel-item>
        </el-carousel>
        <div class="card dashboard-echarts">
          <div class="echarts" id="echarts-container1"></div>
          <div class="echarts" id="echarts-container2"></div>
        </div>
      </el-col>
    </el-row>
  </div>
</template>

<script>
import * as echarts from "echarts";
import { shuJuList } from "../../api/shuju.js";

export default {
  data() {
    return {
      ScoreData: [],
      TuiJianDataMessage: [],
      JingDianMessage: [],
      echartsOneXdata:[]
    };
  },
  async created() {
    try {
      const res = await shuJuList();
      this.ScoreData = res.data.data.ScoreData;
      this.TuiJianDataMessage = res.data.data.TuiJianDataMessage;
      this.JingDianMessage = res.data.data.JingDianMessage;
    } catch (error) {
      console.error("数据获取失败:", error);
    }
  },
  mounted() {
    this.initEcharts1();
    this.initEcharts2();
  },
methods: {
  initEcharts() {
    this.initPieChart('echarts-container1', '景区售票占比', this.getScenicData(), ['#ff7f50', '#87cefa', '#da70d6', '#32cd32']);
    this.initPieChart('echarts-container2', '人数占比', this.getVisitorData(), ['#ff69b4', '#add8e6', '#90ee90', '#ffa07a']);
  },
  initPieChart(containerId, titleText, data, colors) {
    // 基于准备好的dom,初始化echarts实例
    var myChart = echarts.init(document.getElementById(containerId));

    // 指定图表的配置项和数据
    var option = {
      title: {
        text: titleText,
        left: 'center',
      },
      tooltip: {
        trigger: 'item',
        formatter: '{a} <br/>{b}: {c} ({d}%)',
      },
      legend: {
        orient: 'vertical',
        left: 10,
        data: data.map(item => item.name),
      },
      series: [
        {
          name: '数据',
          type: 'pie',
          radius: '55%',
          center: ['50%', '60%'],
          data: data,
          roseType: 'area', // 设置为玫瑰图
          itemStyle: {
            color: function (params) {
              // 使用传入的颜色数组
              return colors[params.dataIndex];
            },
          },
          emphasis: {
            itemStyle: {
              shadowBlur: 10,
              shadowOffsetX: 0,
              shadowColor: 'rgba(0, 0, 0, 0.5)',
            },
          },
        },
      ],
    };

    // 使用刚指定的配置项和数据显示图表
    myChart.setOption(option);
  },
  getScenicData() {
    return [
      { value: 30, name: '西湖' },
      { value: 20, name: '宋城' },
      { value: 25, name: '灵隐寺' },
      { value: 15, name: '断桥残雪' },
    ];
  },
  getVisitorData() {
    return [
      { value: 20000, name: '西湖' },
      { value: 15000, name: '宋城' },
      { value: 10000, name: '灵隐寺' },
      { value: 5000, name: '断桥残雪' },
    ];
  },
},

mounted() {
  this.initEcharts();
}

};
</script>

<style lang="scss" scoped>
.dashboard-container {
  padding: 10px;
  background: #f0f2f5;
  min-height: 65vh;

  .dashboard-title {
    text-align: center;
    color: #333;
    margin-bottom: 30px;
    font-size: 28px;
  }

  .card {
    background: white;
    border-radius: 8px;
    padding: 20px;
    margin-bottom: 20px;
    box-shadow: 0 2px 12px 0 rgba(0, 0, 0, 0.1);

    &-title {
      color: #333;
      margin-bottom: 15px;
      font-size: 18px;
      border-left: 4px solid #409eff;
      padding-left: 10px;
    }
  }

  .ranking-list {
    height: 50vh;
    overflow-y: auto;
    .ranking-item {
      display: flex;
      align-items: center;
      padding: 15px 0;
      border-bottom: 1px solid #eee;

      .rank-number {
        font-size: 24px;
        color: #409eff;
        min-width: 50px;
        text-align: center;
      }

      .content {
        flex: 1;

        h4 {
          margin: 0 0 8px;
          font-size: 16px;

          a {
            color: #333;
            text-decoration: none;

            &:hover {
              color: #409eff;
            }
          }
        }

        .tags {
          margin-bottom: 8px;

          .tag {
            display: inline-block;
            background: #f4f4f5;
            color: #909399;
            padding: 5px 10px;
            border-radius: 4px;
            margin-right: 8px;
            margin-bottom: 5px;
            font-size: 12px;
          }
        }

        .score {
          color: #f56c6c;
          font-weight: bold;
        }
      }
    }
  }

  .recommendation {
    .tags-group {
      display: flex;
      flex-wrap: wrap;
      gap: 10px;

      .recommend-tag {
        background: #409eff;
        color: white;
        padding: 8px 15px;
        border-radius: 20px;
        text-decoration: none;
        transition: all 0.3s;

        &:hover {
          background: darken(#409eff, 10%);
          transform: translateY(-2px);
        }
      }
    }
  }

  .detail-card {
    .detail-content {
      display: flex;
      gap: 20px;
      box-sizing: border-box;

      .detail-image {
        width: 300px;
        height: 200px;
        border-radius: 6px;
        object-fit: cover;
      }

      .detail-info {
        flex: 1;

        .desc {
          color: #666;
          line-height: 1.6;
          margin: 10px 0;
          width: 560px;
          display: -webkit-box;
          -webkit-line-clamp: 4;
          -webkit-box-orient: vertical;
          text-overflow: ellipsis;
          overflow: hidden;
        }

        .address {
          color: #999;
          margin-top: 15px;

          i {
            margin-right: 5px;
          }
        }

        .image-link {
          display: inline-block;
          margin-top: 10px;
          color: #409eff;
          text-decoration: none;
          margin-right: 20px;
          &:hover {
            text-decoration: underline;
          }
        }
      }
    }
  }

  .dashboard-echarts {
    margin-top: 20px;
    display: flex;
    justify-content: space-between;
    .echarts {
      width: 48%;
      // background-color: red;
      height: 400px;
    }
  }
}
</style>

项目地址

koa_system: 🔥🔥🔥Koa2 + React商城项目前端-React + Antd前端-Vue2 + Element-plus后端-Koa2 + Sequelizehttps://gitee.com/ah-ah-bao/koa_system

欢迎大家点击查看,方便的话点一个star~ 


网站公告

今日签到

点亮在社区的每一天
去签到