爬虫获取网站内的数据,获得完整姓名
网站一:姓氏
网站二:男生名字
网站三:女生名字
进行拼接,获取完整的男生女生姓名。
//导包
import org.apache.commons.io.FileUtils;
import java.io.*;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class test {
public static void main(String[] args) throws IOException {
String familynamenet="https://hanyu.baidu.com/shici/detail?pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d&from=kg0";
String boynamenet="http://www.haoming8.cn/baobao/10881.html//oracle.com/java/technologies/javase/javase-jdk8-downloads.html";
String girlnamenet="http://www.haoming8.cn/baobao/7641.html//oracle.com/java/technologies/javase/javase-jdk8-downloads.html";
String familynamestr=webcrawler(familynamenet);
String boynamestr=webcrawler(boynamenet);
String girlnamestr=webcrawler(girlnamenet);
ArrayList<String> boynametemplist=getData(boynamestr,"([\\u4E00-\\u9FA5]){2}(、|。)",1);
ArrayList<String> familynametemplist=getData(familynamestr,"(.{4})(,|.)",1);
ArrayList<String> girlnametemplist=getData(girlnamestr,"(.. ){4}..",0);
//处理数据
ArrayList<String> familynamelist=new ArrayList<>();
for(String str:familynametemplist) {
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
familynamelist.add(c+"");
}
}
ArrayList<String> boynamelist=new ArrayList<>();
for(String str:boynametemplist) {
if(!boynamelist.contains(str)){
boynamelist.add(str);
}
}
ArrayList<String> girlnamelist=new ArrayList<>();
for(String str:girlnametemplist) {
String[] arr=str.split(" ");
for(int i=0;i<arr.length;i++){
girlnamelist.add(arr[i]);
}
}
//生成数据// 姓名(唯一)]
getinfos(familynamelist,boynamelist,girlnamelist,10,10);
}
/*
方法作用:
获取男生和女生信息
形参一:装着姓氏的集合
参数二:装着男生姓名的集合
参数三:装着女生姓名的集合
参数四;男生个数
参数五:女生个数
*/
public static ArrayList<String>getinfos(ArrayList<String> familynamelist,ArrayList<String> boynamelist,ArrayList<String> girlnamelist,int boycount,int girlcount){
//生成男生不重复的名字
HashSet<String> boyhs=new HashSet<>();
while(true){
if(boyhs.size()==boycount){
break;
}
Collections.shuffle(familynamelist);
Collections.shuffle(boynamelist);
boyhs.add(familynamelist.get(0)+boynamelist.get(0));
}
HashSet<String> girlhs=new HashSet<>();
while(true) {
if (girlhs.size() == girlcount) {
break;
}
Collections.shuffle(familynamelist);
Collections.shuffle(girlnamelist);
girlhs.add(familynamelist.get(0) + girlnamelist.get(0));
}
System.out.println(boyhs);
System.out.println(girlhs);
return null;
}
/*
getData()方法作用:
根基正则表达式获取字符串中的数据。
参数一:完整字符串
参数二:正则表达式
参数三:
返回值:真正想要的数据
*/
public static ArrayList<String> getData(String str,String regex,int index){
ArrayList<String> list=new ArrayList<>();
Pattern pattern=Pattern.compile(regex);
Matcher matcher=pattern.matcher(str);
while(matcher.find()){
list.add(matcher.group(index));
}
return list;
}
/*
webcrawler()方法作用:
从网络中爬取数据,把数据拼接成字符串返回
形参:网址
返回值:爬取到的所有数据。
*/
public static String webcrawler(String net) throws IOException {
StringBuilder sb=new StringBuilder();
URL url=new URL(net);
URLConnection conn=url.openConnection();
InputStreamReader isr=new InputStreamReader(conn.getInputStream());
int ch;
while((ch=isr.read())!=-1){
sb.append((char)ch);
}
isr.close();
return sb.toString();
}
}
登陆操作:
正确的用户名和密码保存在文件中,先从文件中读取,再与输入的进行比较判断。
public class test {
public static void main(String[] args) throws IOException {
BufferedReader br=new BufferedReader(new FileReader("src\\a.txt"));
String line=br.readLine();
br.close();
String[] userInfo=line.split("&");
String[] arr1=userInfo[0].split("=");
String[] arr2=userInfo[1].split("=");
String rightusername=arr1[1];
String rightpassword=arr2[1];
Scanner sc=new Scanner(System.in);
System.out.println("请输入用户名:");
sc.nextLine();
String username=sc.nextLine();
System.out.println("请输入密码:");
String password=sc.nextLine();
if(rightusername.equals(username)&&rightpassword.equals(password)){
System.out.println("登陆成功");
}else{
System.out.println("登陆失败");
}
}
}