项目需要未来40天气象,找了一圈气象api,不是收费就是不支持未来40天,干脆写了个爬虫自动爬取气象网站的数据。以前都是用Python写的,重新拾起来再用Java写别有一番风味。
目标气象网站:西安天气预报40天_西安天气预报40天查询,西安未来40天天气预报- 东方天气
第三方依赖:
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.1.0</version>
</dependency>
代码:
package com.test.main;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.FileOutputStream;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
@Slf4j
public class weatherSpider {
public static void main(String[] args) {
List<WeatherInfo> weatherInfos = new ArrayList<>();
String url = "https://tianqi.eastday.com/xian/40/"; // 目标网页URL
// 获取前两天的日期
LocalDate twoDaysAgo = LocalDate.now().minusDays(2);
// 设置日期格式
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMdd");
try {
// 设置用户代理,尝试绕过访问限制
Document document = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
.timeout(10000) // 设置超时时间
.get();
// 查找所有class为temp的div标签
Elements tempElements = document.select("div.temp");
// 遍历所有找到的div标签
for (Element tempElement : tempElements) {
// 获取div标签内的所有span标签
Elements spanElements = tempElement.select("span");
// 遍历所有span标签并打印内容
for (Element spanElement : spanElements) {
String spanText = spanElement.text();
System.out.println(spanText);
String lowTemp = spanText.split("~")[0];
String highTempC = spanText.split("~")[1];
String highTemp = highTempC.substring(0, highTempC.length() - 1);
System.out.println("日期:" + twoDaysAgo.format(formatter) + " 温度:" + lowTemp + " ~ " + highTemp);
WeatherInfo weatherInfo = new WeatherInfo();
weatherInfo.setDate(twoDaysAgo.format(formatter));
weatherInfo.setLowTemp(lowTemp); // 示例最低气温
weatherInfo.setHighTemp(highTemp); // 示例最高气温
weatherInfos.add(weatherInfo);
twoDaysAgo = twoDaysAgo.plusDays(1);
break;
}
}
// 写入Excel
writeWeatherToExcel(weatherInfos, "C:\\Users\\Xylon\\Desktop\\weather_forecast.xlsx");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
// 将天气数据写入Excel
private static void writeWeatherToExcel(List<WeatherInfo> weatherInfos, String filePath) throws Exception {
try (Workbook workbook = new XSSFWorkbook()) {
Sheet sheet = workbook.createSheet("Weather Forecast");
// 创建表头
Row headerRow = sheet.createRow(0);
String[] headers = {"Date", "LowTemp", "HighTemp"};
for (int i = 0; i < headers.length; i++) {
Cell cell = headerRow.createCell(i);
cell.setCellValue(headers[i]);
}
// 写入天气数据
for (int i = 0; i < weatherInfos.size(); i++) {
Row row = sheet.createRow(i + 1);
WeatherInfo weatherInfo = weatherInfos.get(i);
row.createCell(0).setCellValue(weatherInfo.getDate());
row.createCell(1).setCellValue(weatherInfo.getLowTemp());
row.createCell(2).setCellValue(weatherInfo.getHighTemp());
}
// 写入文件
try (FileOutputStream fileOut = new FileOutputStream(filePath)) {
workbook.write(fileOut);
}
System.out.println("Weather data has been written to " + filePath);
} catch (Exception e) {
e.printStackTrace();
}
}
// 天气信息类
@Setter
@Getter
static class WeatherInfo {
private String date;
private String lowTemp;
private String highTemp;
}
}