diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/Makefile" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/Makefile"
new file mode 100644
index 0000000..b89f3ad
--- /dev/null
+++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/Makefile"
@@ -0,0 +1,12 @@
+bighomework : main.o getcontent.o dfs.o datechange.o dealwith.o main.cpp getcontent.cpp dfs.cpp datechange.cpp dealwith.cpp
+ g++ -o bighomework main.o getcontent.o dfs.o datechange.o dealwith.o -std=c++11
+main.o : main.cpp head.h
+ g++ -c main.cpp
+dfs.o : dfs.cpp head.h
+ g++ -c dfs.cpp
+datechange.o : datechange.cpp head.h
+ g++ -c datechange.cpp
+getcontent.o : getcontent.cpp head.h
+ g++ -c getcontent.cpp
+dealwith.o : dealwith.cpp head.h
+ g++ -c dealwith.cpp
\ No newline at end of file
diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/add" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/add"
new file mode 100644
index 0000000..76d4bb8
--- /dev/null
+++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/add"
@@ -0,0 +1 @@
+add
diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/datechange.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/datechange.cpp"
new file mode 100644
index 0000000..e96b522
--- /dev/null
+++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/datechange.cpp"
@@ -0,0 +1,35 @@
+#include "head.h"
+int datechange(string date)
+{
+ int loc1,loc2,m_date = 0;
+ char middle[9];
+ for(int i = 0;i < 4;i++)
+ middle[i] = date[i];
+ loc1 = date.find("月");
+ if(loc1 == 7)
+ {
+ middle[4] = '0';
+ middle[5] = date[6];
+ }
+ else if(loc1 == 8)
+ {
+ middle[4] = date[6];
+ middle[5] = date[7];
+ }
+ loc2 = date.find("日");
+ if(loc2-loc1 == 3)
+ {
+ middle[6] = '0';
+ middle[7] = date[loc1+2];
+ }
+ else if(loc2-loc1 == 4)
+ {
+ middle[6] = date[loc1+2];
+ middle[7] = date[loc1+3];
+ }
+ for(int i = 0;i < 8;i++)
+ {
+ m_date = m_date*10+int(middle[i]-48);
+ }
+ return m_date;
+}
\ No newline at end of file
diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dealwith.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dealwith.cpp"
new file mode 100644
index 0000000..cf2f410
--- /dev/null
+++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dealwith.cpp"
@@ -0,0 +1,157 @@
+#include "head.h"
+void dealwith12(string command,string file,int Len,int loc)
+{
+ string name = "../data/";
+ string stringdata;
+ int intdata;
+ int Loc,Loc1,Loc2;
+ int i = 0;
+ while(command[loc+Len+i] <= '9'&&command[loc+Len+i] >= '0')
+ {
+ name += command[loc+Len+i];
+ i++;
+ }
+ name += ".html";
+ ofstream foutdata;
+ foutdata.open(name);
+ foutdata << name << endl;
+ Loc = file.find("
失踪人姓名 | ");
+ Loc1 = file.find("",Loc+1);
+ Loc2 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+4,Loc2-Loc1-4);
+ foutdata << stringdata << endl;
+ Loc = file.find("性别 | ");
+ Loc1 = file.find("",Loc+1);
+ Loc2 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+4,Loc2-Loc1-4);
+ if(stringdata == "男")
+ foutdata << 0 << endl;
+ else
+ foutdata << 1 << endl;
+ Loc = file.find("失踪人籍贯 | ");
+ Loc1 = file.find("",Loc+1);
+ Loc2 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+4,Loc2-Loc1-4);
+ foutdata << stringdata << endl;
+ Loc = file.find("出生日期 | ");
+ Loc1 = file.find("",Loc+1);
+ Loc2 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+4,Loc2-Loc1-4);
+ int birthday;
+ birthday = datechange(stringdata);
+ foutdata << birthday << endl;
+ Loc = file.find("失踪日期 | ");
+ Loc1 = file.find("",Loc+1);
+ Loc2 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+4,Loc2-Loc1-4);
+ int lostday;
+ lostday = datechange(stringdata);
+ foutdata << lostday << endl;
+ Loc = file.find("失踪时身高 | ");
+ Loc1 = file.find("",Loc+1);
+ Loc2 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+4,Loc2-Loc1-4);
+ int height = 0;
+ for(int j = 0;j < Loc2-Loc1-4;j++)
+ {
+ height = height*10 + int(stringdata[j]-48);
+ }
+ foutdata << height << endl;
+ Loc = file.find("失踪地点 | ");
+ Loc1 = file.find("",Loc+1);
+ Loc2 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+4,Loc2-Loc1-4);
+ foutdata << stringdata << endl;
+ Loc = file.find("可能去向 | ");
+ Loc1 = file.find("",Loc+1);
+ Loc2 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+4,Loc2-Loc1-4);
+ foutdata << stringdata << endl;
+ Loc = file.find("详细信息 | ");
+ Loc1 = file.find(" | ",Loc1+1);
+ stringdata = file.substr(Loc1+3,Loc2-Loc1-3);
+ string prestringdata;
+ for(int k = 0 ; k < Loc2-Loc1-3;k++)
+ {
+ if(!(stringdata[k] < 48&&stringdata[k] >= 0&&stringdata[k] > 57&&stringdata[k] < 128))
+ {
+ prestringdata += stringdata[k];
+ }
+ }
+ ofstream foutinput;
+ ifstream finca;
+ string _part,all;
+ foutinput.open("input.txt");
+ foutinput << prestringdata << endl;
+ system("python take_apart.py");
+ finca.open("cache.txt");
+ while(getline(finca,_part))
+ {
+ all += _part;
+ }
+ foutdata << all << endl;
+}
+void dealwith3(string command, string file, int Len, int loc)
+{
+ string name = "../data/";
+ string article = "";
+ string stringdata;
+ string information;
+ int intdata,len1;
+ int Loc,Loc1,Loc2;
+ int i = 0;
+ while(command[loc+Len+i] <= '9'&&command[loc+Len+i] >= '0')
+ {
+ name += command[loc+Len+i];
+ i++;
+ }
+ name += ".html";
+ ofstream foutdata;
+ foutdata.open(name);
+ foutdata << name << endl;
+ Loc = file.find(article);
+ information = "姓名: ";
+ Loc1 = file.find(information);
+ len1 = information.length();
+ Loc2 = file.find(",",Loc1+1);
+ stringdata = file.substr(Loc1+len1,Loc2-Loc1-len1);
+ foutdata << stringdata << endl;
+ Loc1 = file.find(" ",Loc2+1);
+ stringdata = file.substr(Loc1+1,1);
+ if(stringdata == "男")
+ foutdata << 0 << endl;
+ else
+ foutdata << 1 << endl;
+ //籍贯
+ information = "出生于";
+ Loc1 = file.find("出生于",Loc1+1);
+ len1 = information.length();
+ Loc2 = file.find("日",Loc1+1);
+ stringdata = file.substr(Loc1+len1,Loc2-Loc1-len1+1);
+
+ Loc1 = file.find("
",Loc+1);
+ stringdata = file.substr(Loc1+3,Loc2-Loc1-3);
+ string prestringdata;
+ for(int k = 0 ; k < Loc2-Loc1-3;k++)
+ {
+ if(!(stringdata[k] < 48&&stringdata[k] >= 0&&stringdata[k] > 57&&stringdata[k] < 128))
+ {
+ prestringdata += stringdata[k];
+ }
+ }
+ ofstream foutinput;
+ ifstream finca;
+ string _part,all;
+ foutinput.open("input.txt");
+ foutinput << prestringdata << endl;
+ system("python take_apart.py");
+ finca.open("cache.txt");
+ while(getline(finca,_part))
+ {
+ all += _part;
+ }
+ foutdata << all << endl;
+
+
+}
\ No newline at end of file
diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dfs.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dfs.cpp"
new file mode 100644
index 0000000..3d60183
--- /dev/null
+++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dfs.cpp"
@@ -0,0 +1,18 @@
+#include "head.h"
+
+void crawled::deep_search(void)
+{
+ url_to_be_crawled.push(_root);
+ while(!url_to_be_crawled.empty())
+ {
+ current_url = url_to_be_crawled.front();
+ url_to_be_crawled.pop();
+ if(url_already_be_crawled.find(current_url) == url_already_be_crawled.end())
+ {
+ number++;
+ getContent(current_url);
+ }
+ url_already_be_crawled.insert(current_url);
+ }
+ cout << number << endl;
+}
diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/getcontent.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/getcontent.cpp"
new file mode 100644
index 0000000..b612327
--- /dev/null
+++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/getcontent.cpp"
@@ -0,0 +1,88 @@
+#include "head.h"
+void crawled::getContent(string url)
+{
+ string command;
+ string infloc = "https://www.zgszrkdak.cn/home/person/show/id/";
+ string infloc2 = "https://www.zgszrkdak.cn/home/family/show/id/";
+ string infloc3 = "https://www.zgszrkdak.cn/home/news/index/classid/";
+ command = "wget -O url_has_been_crawled.txt --tries=2 --timeout=3 \"";
+ command = command + url + "\"";
+ sleep(0.4);
+ system(command.data());
+ ifstream fin;
+ ofstream fout;
+ string file,file_part;
+ int length = 0,len = 0,len1 = 0,location_first = 0,location_end = 0;
+ string url_format1 = "href=";
+ string url_format2 ;
+ fin.open("url_has_been_crawled.txt");
+ fout.open("all_different_urls.txt");
+ if(fin.is_open() == false)
+ {
+ cerr << "Can't open file!\n";
+ exit(EXIT_FAILURE);
+ }
+ while(getline(fin,file_part))
+ {
+ file += file_part;
+ }
+ if(command.find(infloc) != string::npos||command.find(infloc2) != string::npos)
+ {
+ int loc ,Len;
+ loc = command.find(infloc);
+ Len = infloc.length();
+ dealwith12(command,file,Len,loc);
+ }
+ if(command.find(infloc3) != string::npos)
+ {
+ int loc ,Len;
+ loc = command.find(infloc);
+ Len = infloc3.length();
+ //dealwith3(command,file,Len,loc);
+ }
+ location_first = file.find(url_format1);
+ if(location_first == string::npos)
+ return;
+ else
+ {
+ while(location_first != string::npos)
+ {
+ string url_to_be_finded;
+ url_format2 = file[location_first+5];
+ location_end = file.find(url_format2,location_first+6);
+ len = location_end-location_first-6;
+ url_to_be_finded = file.substr(location_first+6,len);
+ if(url_to_be_finded.find(_root) != string::npos)
+ {
+ if(url_already_be_crawled.find(url_to_be_finded) == url_already_be_crawled.end())
+ {
+ fout << url_to_be_finded << endl;
+ url_to_be_crawled.push(url_to_be_finded);
+ }
+ }
+ if(url_to_be_finded.find("http://") == string::npos)
+ {
+ int loc = url_to_be_finded.find("/");
+ string url_to_be_finded_part;
+ if(loc == 0)
+ url_to_be_finded = _root+url_to_be_finded;
+ else if(loc == string::npos)
+ url_to_be_finded = _root + "/" + url_to_be_finded;
+ else
+ {
+ url_to_be_finded_part = url_to_be_finded.substr(0,loc);
+ url_to_be_finded = _root + "/" + url_to_be_finded_part;
+ }
+ if(url_already_be_crawled.find(url_to_be_finded) == url_already_be_crawled.end())
+ {
+ fout << url_to_be_finded << endl;
+ url_to_be_crawled.push(url_to_be_finded);
+ }
+ }
+ location_first = file.find(url_format1,location_first+1);
+ url_to_be_finded.erase(0,200);
+ }
+ }
+ return;
+}
+
diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/head.h" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/head.h"
new file mode 100644
index 0000000..8fff5a2
--- /dev/null
+++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/head.h"
@@ -0,0 +1,43 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include