diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/Makefile" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/Makefile" new file mode 100644 index 0000000..b89f3ad --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/Makefile" @@ -0,0 +1,12 @@ +bighomework : main.o getcontent.o dfs.o datechange.o dealwith.o main.cpp getcontent.cpp dfs.cpp datechange.cpp dealwith.cpp + g++ -o bighomework main.o getcontent.o dfs.o datechange.o dealwith.o -std=c++11 +main.o : main.cpp head.h + g++ -c main.cpp +dfs.o : dfs.cpp head.h + g++ -c dfs.cpp +datechange.o : datechange.cpp head.h + g++ -c datechange.cpp +getcontent.o : getcontent.cpp head.h + g++ -c getcontent.cpp +dealwith.o : dealwith.cpp head.h + g++ -c dealwith.cpp \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/add" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/add" new file mode 100644 index 0000000..76d4bb8 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/add" @@ -0,0 +1 @@ +add diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/datechange.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/datechange.cpp" new file mode 100644 index 0000000..e96b522 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/datechange.cpp" @@ -0,0 +1,35 @@ +#include "head.h" +int datechange(string date) +{ + int loc1,loc2,m_date = 0; + char middle[9]; + for(int i = 0;i < 4;i++) + middle[i] = date[i]; + loc1 = date.find("月"); + if(loc1 == 7) + { + middle[4] = '0'; + middle[5] = date[6]; + } + else if(loc1 == 8) + { + middle[4] = date[6]; + middle[5] = date[7]; + } + loc2 = date.find("日"); + if(loc2-loc1 == 3) + { + middle[6] = '0'; + middle[7] = date[loc1+2]; + } + else if(loc2-loc1 == 4) + { + middle[6] = date[loc1+2]; + middle[7] = date[loc1+3]; + } + for(int i = 0;i < 8;i++) + { + m_date = m_date*10+int(middle[i]-48); + } + return m_date; +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dealwith.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dealwith.cpp" new file mode 100644 index 0000000..cf2f410 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dealwith.cpp" @@ -0,0 +1,157 @@ +#include "head.h" +void dealwith12(string command,string file,int Len,int loc) +{ + string name = "../data/"; + string stringdata; + int intdata; + int Loc,Loc1,Loc2; + int i = 0; + while(command[loc+Len+i] <= '9'&&command[loc+Len+i] >= '0') + { + name += command[loc+Len+i]; + i++; + } + name += ".html"; + ofstream foutdata; + foutdata.open(name); + foutdata << name << endl; + Loc = file.find("失踪人姓名"); + Loc1 = file.find("",Loc+1); + Loc2 = file.find("",Loc1+1); + stringdata = file.substr(Loc1+4,Loc2-Loc1-4); + foutdata << stringdata << endl; + Loc = file.find("性别"); + Loc1 = file.find("",Loc+1); + Loc2 = file.find("",Loc1+1); + stringdata = file.substr(Loc1+4,Loc2-Loc1-4); + if(stringdata == "男") + foutdata << 0 << endl; + else + foutdata << 1 << endl; + Loc = file.find("失踪人籍贯"); + Loc1 = file.find("",Loc+1); + Loc2 = file.find("",Loc1+1); + stringdata = file.substr(Loc1+4,Loc2-Loc1-4); + foutdata << stringdata << endl; + Loc = file.find("出生日期"); + Loc1 = file.find("",Loc+1); + Loc2 = file.find("",Loc1+1); + stringdata = file.substr(Loc1+4,Loc2-Loc1-4); + int birthday; + birthday = datechange(stringdata); + foutdata << birthday << endl; + Loc = file.find("失踪日期"); + Loc1 = file.find("",Loc+1); + Loc2 = file.find("",Loc1+1); + stringdata = file.substr(Loc1+4,Loc2-Loc1-4); + int lostday; + lostday = datechange(stringdata); + foutdata << lostday << endl; + Loc = file.find("失踪时身高"); + Loc1 = file.find("",Loc+1); + Loc2 = file.find("",Loc1+1); + stringdata = file.substr(Loc1+4,Loc2-Loc1-4); + int height = 0; + for(int j = 0;j < Loc2-Loc1-4;j++) + { + height = height*10 + int(stringdata[j]-48); + } + foutdata << height << endl; + Loc = file.find("失踪地点"); + Loc1 = file.find("",Loc+1); + Loc2 = file.find("",Loc1+1); + stringdata = file.substr(Loc1+4,Loc2-Loc1-4); + foutdata << stringdata << endl; + Loc = file.find("可能去向"); + Loc1 = file.find("",Loc+1); + Loc2 = file.find("",Loc1+1); + stringdata = file.substr(Loc1+4,Loc2-Loc1-4); + foutdata << stringdata << endl; + Loc = file.find("详细信息"); + Loc1 = file.find("

",Loc1+1); + stringdata = file.substr(Loc1+3,Loc2-Loc1-3); + string prestringdata; + for(int k = 0 ; k < Loc2-Loc1-3;k++) + { + if(!(stringdata[k] < 48&&stringdata[k] >= 0&&stringdata[k] > 57&&stringdata[k] < 128)) + { + prestringdata += stringdata[k]; + } + } + ofstream foutinput; + ifstream finca; + string _part,all; + foutinput.open("input.txt"); + foutinput << prestringdata << endl; + system("python take_apart.py"); + finca.open("cache.txt"); + while(getline(finca,_part)) + { + all += _part; + } + foutdata << all << endl; +} +void dealwith3(string command, string file, int Len, int loc) +{ + string name = "../data/"; + string article = "
"; + string stringdata; + string information; + int intdata,len1; + int Loc,Loc1,Loc2; + int i = 0; + while(command[loc+Len+i] <= '9'&&command[loc+Len+i] >= '0') + { + name += command[loc+Len+i]; + i++; + } + name += ".html"; + ofstream foutdata; + foutdata.open(name); + foutdata << name << endl; + Loc = file.find(article); + information = "姓名: "; + Loc1 = file.find(information); + len1 = information.length(); + Loc2 = file.find(",",Loc1+1); + stringdata = file.substr(Loc1+len1,Loc2-Loc1-len1); + foutdata << stringdata << endl; + Loc1 = file.find(" ",Loc2+1); + stringdata = file.substr(Loc1+1,1); + if(stringdata == "男") + foutdata << 0 << endl; + else + foutdata << 1 << endl; + //籍贯 + information = "出生于"; + Loc1 = file.find("出生于",Loc1+1); + len1 = information.length(); + Loc2 = file.find("日",Loc1+1); + stringdata = file.substr(Loc1+len1,Loc2-Loc1-len1+1); + + Loc1 = file.find("
",Loc+1); + stringdata = file.substr(Loc1+3,Loc2-Loc1-3); + string prestringdata; + for(int k = 0 ; k < Loc2-Loc1-3;k++) + { + if(!(stringdata[k] < 48&&stringdata[k] >= 0&&stringdata[k] > 57&&stringdata[k] < 128)) + { + prestringdata += stringdata[k]; + } + } + ofstream foutinput; + ifstream finca; + string _part,all; + foutinput.open("input.txt"); + foutinput << prestringdata << endl; + system("python take_apart.py"); + finca.open("cache.txt"); + while(getline(finca,_part)) + { + all += _part; + } + foutdata << all << endl; + + +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dfs.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dfs.cpp" new file mode 100644 index 0000000..3d60183 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/dfs.cpp" @@ -0,0 +1,18 @@ +#include "head.h" + +void crawled::deep_search(void) +{ + url_to_be_crawled.push(_root); + while(!url_to_be_crawled.empty()) + { + current_url = url_to_be_crawled.front(); + url_to_be_crawled.pop(); + if(url_already_be_crawled.find(current_url) == url_already_be_crawled.end()) + { + number++; + getContent(current_url); + } + url_already_be_crawled.insert(current_url); + } + cout << number << endl; +} diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/getcontent.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/getcontent.cpp" new file mode 100644 index 0000000..b612327 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/getcontent.cpp" @@ -0,0 +1,88 @@ +#include "head.h" +void crawled::getContent(string url) +{ + string command; + string infloc = "https://www.zgszrkdak.cn/home/person/show/id/"; + string infloc2 = "https://www.zgszrkdak.cn/home/family/show/id/"; + string infloc3 = "https://www.zgszrkdak.cn/home/news/index/classid/"; + command = "wget -O url_has_been_crawled.txt --tries=2 --timeout=3 \""; + command = command + url + "\""; + sleep(0.4); + system(command.data()); + ifstream fin; + ofstream fout; + string file,file_part; + int length = 0,len = 0,len1 = 0,location_first = 0,location_end = 0; + string url_format1 = "href="; + string url_format2 ; + fin.open("url_has_been_crawled.txt"); + fout.open("all_different_urls.txt"); + if(fin.is_open() == false) + { + cerr << "Can't open file!\n"; + exit(EXIT_FAILURE); + } + while(getline(fin,file_part)) + { + file += file_part; + } + if(command.find(infloc) != string::npos||command.find(infloc2) != string::npos) + { + int loc ,Len; + loc = command.find(infloc); + Len = infloc.length(); + dealwith12(command,file,Len,loc); + } + if(command.find(infloc3) != string::npos) + { + int loc ,Len; + loc = command.find(infloc); + Len = infloc3.length(); + //dealwith3(command,file,Len,loc); + } + location_first = file.find(url_format1); + if(location_first == string::npos) + return; + else + { + while(location_first != string::npos) + { + string url_to_be_finded; + url_format2 = file[location_first+5]; + location_end = file.find(url_format2,location_first+6); + len = location_end-location_first-6; + url_to_be_finded = file.substr(location_first+6,len); + if(url_to_be_finded.find(_root) != string::npos) + { + if(url_already_be_crawled.find(url_to_be_finded) == url_already_be_crawled.end()) + { + fout << url_to_be_finded << endl; + url_to_be_crawled.push(url_to_be_finded); + } + } + if(url_to_be_finded.find("http://") == string::npos) + { + int loc = url_to_be_finded.find("/"); + string url_to_be_finded_part; + if(loc == 0) + url_to_be_finded = _root+url_to_be_finded; + else if(loc == string::npos) + url_to_be_finded = _root + "/" + url_to_be_finded; + else + { + url_to_be_finded_part = url_to_be_finded.substr(0,loc); + url_to_be_finded = _root + "/" + url_to_be_finded_part; + } + if(url_already_be_crawled.find(url_to_be_finded) == url_already_be_crawled.end()) + { + fout << url_to_be_finded << endl; + url_to_be_crawled.push(url_to_be_finded); + } + } + location_first = file.find(url_format1,location_first+1); + url_to_be_finded.erase(0,200); + } + } + return; +} + diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/head.h" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/head.h" new file mode 100644 index 0000000..8fff5a2 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/head.h" @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; +int datechange(string date); +void dealwith12(string command,string file,int Loc ,int loc); +void dealwith3(string command,string file,int Loc ,int loc); +class crawled +{ + private: + int number ; + string _root ; + string current_url ; + queue url_to_be_crawled; + set url_already_be_crawled; + void getContent(string); + void deep_search(void); + public: + crawled() + { + number = 0; + } + crawled(string root) + { + number = 0; + _root = root; + deep_search(); + } +}; + + + diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/main.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/main.cpp" new file mode 100644 index 0000000..d7434b0 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/main.cpp" @@ -0,0 +1,15 @@ +#include "head.h" +int main() +{ + int time,time_beg,time_end; + string root; + time_beg = clock(); + cout << "Please input the name of url that you are going to crawl" << endl; + cin >> root; + crawled website = crawled(root); + time_end = clock(); + time = time_end-time_beg; + cout << time << endl; + return 0; +} + diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/take_apart.py" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/take_apart.py" new file mode 100644 index 0000000..e9fa299 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/crawled/take_apart.py" @@ -0,0 +1,13 @@ +#encoding=utf-8 +import jieba +import jieba.posseg as pseg +filename = "input.txt" +database = "cache.txt" +f = open(filename,"r") +fn = open(database,"w+") +line = f.read() +words = pseg.cut(line) +for word in words: + print >>fn,str(word) +f.close() +fn.close() \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Makefile" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Makefile" new file mode 100644 index 0000000..c90dfa7 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Makefile" @@ -0,0 +1,22 @@ +bighomework.cgi : bighomework main.o Query.o itoa.o store.o Similarity.o TopResult.o utf8change.o PersonData.o Scorer.o main.cpp Query.cpp itoa.cpp store.cpp Similarity.cpp TopResult.cpp utf8change.cpp PersonData.cpp Scorer.cpp + g++ -o bighomework.cgi main.o Query.o itoa.o store.o Similarity.o TopResult.o utf8change.o PersonData.o Scorer.o -std=c++11 +bighomework : main.o Query.o itoa.o store.o Similarity.o TopResult.o utf8change.o PersonData.o Scorer.o main.cpp Query.cpp itoa.cpp store.cpp Similarity.cpp TopResult.cpp utf8change.cpp PersonData.cpp Scorer.cpp + g++ -o bighomework main.o Query.o itoa.o store.o Similarity.o TopResult.o utf8change.o PersonData.o Scorer.o -std=c++11 +main.o : main.cpp PersonData.h + g++ -c main.cpp +Query.o : Query.cpp Query.h utf8change.h + g++ -c Query.cpp +itoa.o : itoa.cpp PersonData.h + g++ -c itoa.cpp +store.o : store.cpp Similarity.h PersonData.h + g++ -c store.cpp +Similarity.o : Similarity.cpp PersonData.h + g++ -c Similarity.cpp +TopResult.o : TopResult.cpp PersonData.h + g++ -c TopResult.cpp +utf8change.o : utf8change.cpp + g++ -c utf8change.cpp +PersonData.o : PersonData.cpp PersonData.h + g++ -c Persondata.cpp +Scorer.o : Scorer.cpp Similarity.h + g++ -c Scorer.cpp \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/PersonData.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/PersonData.cpp" new file mode 100644 index 0000000..114adaf --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/PersonData.cpp" @@ -0,0 +1,28 @@ +#include "PersonData.h" + +void PersonData::ComputeDataValue() +{ + value = 0; + if (!name.empty()) + value += 100; + if (!sex.empty()) + value += 20; + if (birthday) + value += 20; + if (lostday) + value += 5; + if (height) + value += 20; + if (!birthplace.first.empty()) + value += 22.5; + if (!birthplace.second.empty()) + value += 7.5; + if (!lostplace.first.empty()) + value += 7.5; + if (!lostplace.second.empty()) + value += 2.5; + if (!MayToGo.empty()) + value += 5; + if (!information.empty()) + value += 40; +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/PersonData.h" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/PersonData.h" new file mode 100644 index 0000000..ec85b9d --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/PersonData.h" @@ -0,0 +1,45 @@ +#pragma once +#include +#include +#include +#include +#include "Query.h" + +string itostring(int i); + +class PersonData +{ +private: + string name; + string sex; + int birthday;//YYYYMMDD + int lostday;//YYYYMMDD + int height; + pair birthplace; + pair lostplace; + string MayToGo; + string information; + string DataName; + double score; + double value; +public: + friend void store(vector& alldata); + friend double NameSimilarity(const string p, const string q); + friend double SexSimilarity(const string p, const string q); + friend double DaySimilarity(const int p, const int q); + friend double HeightSimilarity(const int p, const int q); + friend double PairStringSimilarity(const string p, const pair q); + friend double PairStringSimilarity(const pair p, const pair q); + friend double InformationSimilarity(const string p, const string q); + friend bool CompByScore(PersonData a, PersonData b); + friend void SortByScore(vector &data); + friend void print(vector &data, int shows, int page); + friend void scorer(PersonData &p, const Query &q); + friend void scorer(PersonData &p, const ComplexQuery &q); + //PersonData() = default; + PersonData(string dataname, string m_name, string m_sex, pair m_birthplace, int m_birthday, int m_lostday, int m_height, pair m_lostplace + , string maytogo, string m_information): + DataName(dataname), name(m_name), sex(m_sex), birthplace(m_birthplace), birthday(m_birthday), lostday(m_lostday), height(m_height), lostplace(m_lostplace), + MayToGo(maytogo), information(m_information), score(0), value(0) {} + void ComputeDataValue(); +}; diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Query.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Query.cpp" new file mode 100644 index 0000000..15980e0 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Query.cpp" @@ -0,0 +1,117 @@ +#include +#include +#include +#include "Query.h" +#include +#include "utf8change.h" + +ComplexQuery::ComplexQuery() +{ + name = sex = birthplace.first = birthplace.second = lostplace.first = lostplace.second = MayToGo.first = MayToGo.second = ""; + birthday = lostday = height = 0; +} + +/* name + sex + birthday + lostday + height + birthplacefirst + birthplacesecond + lostplacefirst + lostplacesecond + maytogofirst + maytogosecond + information +*/ + +ComplexQuery::ComplexQuery(const string &data) +{ + ComplexQuery(); + name = sex = birthplace.first = birthplace.second = lostplace.first = lostplace.second = MayToGo.first = MayToGo.second = ""; + birthday = lostday = height = 0; + int pos = 0, npos; + do + { + npos = data.find('&', pos); + string s = data.substr(pos, npos - pos); + int epos = s.find('='); + if (s[0] == 'n') + { + name = s.substr(epos + 1); + name = UrlDecode(name); + } + else if (s[0] == 's') + { + sex = s.substr(epos + 1); + sex =UrlDecode(sex); + } + else if (s[0] == 'b') + { + if (s[5] == 'd') + { + if (s.substr(epos + 1).empty()) + birthday = 0; + else birthday = stoi(s.substr(epos + 1)); + } + else if (s[10] == 'f') + { + birthplace.first = s.substr(epos + 1); + birthplace.first = UrlDecode(birthplace.first); + } + else + { + birthplace.second = s.substr(epos + 1); + birthplace.second = UrlDecode(birthplace.second); + } + } + else if (s[0] == 'l') + { + if (s[4] == 'd') + { + if (s.substr(epos + 1).empty()) + lostday = 0; + else lostday = stoi(s.substr(epos + 1)); + } + else if (s[9] == 'f') + { + lostplace.first = s.substr(epos + 1); + lostplace.first = UrlDecode(lostplace.first); + } + else + { + lostplace.second = s.substr(epos + 1); + lostplace.second = UrlDecode(lostplace.second); + } + } + else if (s[0] == 'h') + { + if (s.substr(epos + 1).empty()) + height = 0; + else height = stoi(s.substr(epos + 1)); + } + else if (s[0] == 'i') + { + information = s.substr(epos + 1); + information = UrlDecode(information); + cout << "infor" << information << endl; + ifstream fin; + ofstream fout; + string part; + fout.open("input.txt"); + fout << information << endl; + system("py take_apart.py"); + fin.open("cache.txt"); + information = ""; + while(getline(fin,part)) + { + information += part; + } + } + pos = npos + 1; + } while (npos != string::npos); +} + +void ComplexQuery::print() +{ +} diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Query.h" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Query.h" new file mode 100644 index 0000000..a298633 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Query.h" @@ -0,0 +1,32 @@ +#pragma once +#include +using namespace std; + +class PersonData; + +class Query +{ +private: + string name; +public: + friend void scorer(PersonData &p, const Query &q); +}; + +class ComplexQuery +{ +private: + string name; + string sex; + int birthday; + int lostday; + int height; + pair birthplace; + pair lostplace; + pair MayToGo; + string information; +public: + ComplexQuery(); + ComplexQuery(const string &data); + friend void scorer(PersonData &p, const ComplexQuery &q); + void print(); +}; \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Scorer.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Scorer.cpp" new file mode 100644 index 0000000..f71ae00 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Scorer.cpp" @@ -0,0 +1,24 @@ +#include "Similarity.h" + +void scorer(PersonData &p, const Query &q) +{ + p.score = NameSimilarity(p.name, q.name) * 10000; +} + +void scorer(PersonData &p, const ComplexQuery &q) +{ + p.score = 0; + if (!q.name.empty()) + p.score += NameSimilarity(p.name, q.name) * 10000; + if (!q.sex.empty()) + p.score += SexSimilarity(p.sex, q.sex) * 2000; + if (q.birthday != 0) + p.score += DaySimilarity(p.birthday, q.birthday) * 2000; + if (q.lostday != 0) + p.score += DaySimilarity(p.lostday, q.lostday) * 500; + if (q.height != 0) + p.score += HeightSimilarity(p.height, q.height) * 2000; + p.score += PairStringSimilarity(p.birthplace, q.birthplace) * 3000 + + PairStringSimilarity(p.lostplace, q.lostplace) * 1000; + + InformationSimilarity(p.information, q.information) * 4000; +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Similarity.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Similarity.cpp" new file mode 100644 index 0000000..3d069f3 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Similarity.cpp" @@ -0,0 +1,113 @@ +#include +#include +#include +#include "PersonData.h" +#include "Query.h" +#include +using namespace std; + +double NameSimilarity(const string p, const string q) +{ + if (p.length() >= 12) + return 0.0; + if (p == q) + return 1.0; + int lp = p.length(), lq = q.length(); + if (lp == 0 || lq == 0) + return 0; + int dp[20][20]; + for (int i = 0; i <= lp; i++) + dp[i][0] = 0; + for (int i = 1; i <= lq; i++) + dp[0][i] = 0; + for (int i = 3; i <= lp; i += 3) + { + for (int j = 3; j <= lq; j += 3) + { + dp[i][j] = max(dp[i - 3][j], dp[i][j - 3]); + dp[i][j] = max(dp[i][j], dp[i - 3][j - 3] + 3 * (p[i] == q[j] && p[i - 1] == q[j - 1] && p[i - 2] == q[i - 2])); + } + } + return (double)(dp[lp][lq]) * dp[lp][lq] / lp / lq; +} + +double SexSimilarity(const string p, const string q) +{ + if (p == "不详") + return 1.0; + return (double)(p == q); +} + +double DaySimilarity(const int p, const int q) +{ + if (p == 0) + return 0.0; + int diff = abs(p - q); + int y = abs(p / 10000 - q / 10000), + m = abs(p % 10000 / 100 - q % 10000 / 100), + d = abs(p % 100 - q % 100); + if (y < 0 || y > 100 || m < 0 || m > 12 || d < 0 || d > 31) + return 0.0; + return 1.0 - y / 150.0 - m / 36.0 - d / 90.0; +} + +double HeightSimilarity(const int p, const int q) +{ + if (p == 0 || q <= 0 || q > 300) + return 0.0; + return 1.0 - double(abs(p - q)) * abs(p - q) / p / q; +} + +double PairStringSimilarity(const string p, const pair q) +{ + if (q.first == "不详" || q.first.empty() || p.find(q.first) == string::npos) + return 0.0; + else if (q.second == "不详" || q.second.empty() || p.find(q.second) == string::npos) + return 0.75; + else return 1.0; +} + +double PairStringSimilarity(const pair p, const pair q) +{ + if (q.first == "不详" || q.first.empty() || p.first != q.first) + return 0.0; + else if (q.second == "不详" || q.second.empty() || p.second != q.second) + return 0.75; + else return 1.0; +} + +double InformationSimilarity(const string p, const string q) +{ + if(p.length() == 0) + { + return 0.0; + } + if(p.find(q) != string::npos) + return 1.0; + else + { + int total = 0; + int k = 0; + int sum = 0; + int loc1 = 0,loc2; + string preq; + cout << preq << endl; + cout << loc1 << loc2 << endl; + while(!(q[k] <= 'z' && q[k] >= 'A' )) + { + preq += q[k]; + k++; + } + while(preq.find("/",loc1) != string::npos) + { + string part; + total++; + loc2 = preq.find("/",loc1+1); + part = preq.substr(loc1,loc2-loc1); + if(p.find(part) != string::npos) + sum++; + loc1 = loc2; + } + return log(1.0+(sum+1.0)/(total+1.0)); + } +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Similarity.h" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Similarity.h" new file mode 100644 index 0000000..d63aedb --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/Similarity.h" @@ -0,0 +1,19 @@ +#include +#include +#include +#include "PersonData.h" +using namespace std; + +double NameSimilarity(const string p, const string q); + +double SexSimilarity(const string p, const string q); + +double DaySimilarity(const int p, const int q); + +double HeightSimilarity(const int p, const int q); + +double PairStringSimilarity(const string p, const pair q); + +double PairStringSimilarity(const pair p, const pair q); + +double InformationSimilarity(const string p, const string q); \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/TopResult.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/TopResult.cpp" new file mode 100644 index 0000000..ad18b33 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/TopResult.cpp" @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "PersonData.h" + +const double eps = 1e-2; + +bool CompByScore(PersonData a, PersonData b) +{ + if (abs(a.score - b.score) > eps) + return a.score > b.score; + else return a.value > b.value; +} + +void SortByScore(vector &data) +{ + sort(data.begin(), data.end(), CompByScore); +} + +void print(vector &data, int shows, int page) +{ + int t = min(page * shows, (int)data.size()); + cout<<"

\n"; + for (int i = (page - 1) * shows; i < t; i++) + { + + cout << ("姓名:") << data[i].name <<"
"<< ("得分:") << data[i].score <<"
""详细信息 "<
\n"; + } + cout<<"

\n"; +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/add" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/add" new file mode 100644 index 0000000..76d4bb8 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/add" @@ -0,0 +1 @@ +add diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/bighomework.cgi" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/bighomework.cgi" new file mode 100644 index 0000000..8bf5fb1 Binary files /dev/null and "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/bighomework.cgi" differ diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/itoa.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/itoa.cpp" new file mode 100644 index 0000000..d0fd031 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/itoa.cpp" @@ -0,0 +1,25 @@ +#include "PersonData.h" +string itostring(int i) +{ + string s; + int n,j = 0; + char x[20] = {0}, xx[20] = {0}; + n = i; + while(n != 0) + { + x[j] = char(n%10+48); + n = n/10; + j++; + } + for(int k = 0;k < j;k++) + { + xx[k] = x[j-k-1]; + } + xx[j] = '.'; + xx[j+1] = 'h'; + xx[j+2] = 't'; + xx[j+3] = 'm'; + xx[j+4] = 'l'; + s = xx; + return s; +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/main.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/main.cpp" new file mode 100644 index 0000000..12bcf94 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/main.cpp" @@ -0,0 +1,70 @@ +#include +#include "PersonData.h" +using namespace std; + +int main() +{ + cout<<"Content-type:text/html\n\n"<"<\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"失踪人口查询系统\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<" \n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"

\n"; + cout<<"Missing\n"; + cout<<"Population\n"; + cout<<"Inquiry\n"; + cout<<"System

\n"; + cout<< "

\n"; + cout<<"查询结果

\n"; + + cout<<"\n"; + cout<<"\n"; + cout<<"
\n"; + + //string s("name=&sex=男&birthday=&lostday=&height=&birthplacefirst=&birthplacesecond=&lostplacefirst=福建省&lostplacesecond=&information=") ; + //cout<> s; + vector data; + store(data); + ComplexQuery q(s); + //cout << data.size() << endl; + for (size_t i = 0; i < data.size(); i++) + scorer(data[i], q); + SortByScore(data); + print(data, 5, 1); + q.print(); + + cout<<"
\n"; + + + cout<<"\n"; + cout<<"\n"; + cout<<"
\n"; + cout<<"

Copyright © 2019.林润博 付嘉庆 吴翔宇.

\n"; + cout<<"
\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + cout<<"\n"; + + return 0; +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/store.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/store.cpp" new file mode 100644 index 0000000..4e68004 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/store.cpp" @@ -0,0 +1,148 @@ +#include "PersonData.h" + +void store(vector &alldata) +{ + int max = 10860; + ifstream fin; + ofstream fout; + for(int i = 0; i <= max;i++) + { + string name = "C:/Users/12772/Desktop/CMakeProject/data/" + itostring(i); + fin.open(name); + if(fin.is_open() == true) + { + int loc1,loc2; + string place; + string dataname; + string m_name; + bool m_sex;//0 : male + string mm_sex; + int m_birthday;//YYYYMMDD + pair m_birthplace; + int m_lostday;//YYYYMMDD + int m_height; + pair m_lostplace; + string maytogo; + string m_information; + fin >> dataname >> m_name >> m_sex >> place >> m_birthday; + if(place.find("省") != string::npos) + { + loc1 = place.find("省"); + m_birthplace.first = place.substr(0,loc1+3); + loc2 = place.find("市"); + if(loc2 != string::npos) + m_birthplace.second = place.substr(loc1+3,loc2-loc1+3); + else + { + m_birthplace.second = "不详"; + } + } + else if(place.find("自治区") != string::npos) + { + loc1 = place.find("自治区"); + m_birthplace.first = place.substr(0,loc1+9); + loc2 = place.find("市"); + if(loc2 != string::npos) + m_birthplace.second = place.substr(loc1+9,loc2-loc1+3); + else + { + m_birthplace.second = "不详"; + } + } + else if(place.find("特别行政区") != string::npos) + { + loc1 = place.find("特别行政区"); + m_birthplace.first = place.substr(0,loc1+15); + loc2 = place.find("市"); + if(loc2 != string::npos) + m_birthplace.second = place.substr(loc1+15,loc2-loc1+3); + else + { + m_birthplace.second = "不详"; + } + } + else if(place.find("市") != string::npos) + { + loc1 = place.find("市"); + m_birthplace.first = place.substr(0,loc1+3); + loc2 = place.find("区"); + if(loc2 != string::npos) + m_birthplace.second = place.substr(loc1+3,loc2-loc1+3); + else + { + m_birthplace.second = "不详"; + } + } + else + { + m_birthplace.first = "不详"; + m_birthplace.second = "不详"; + } + fin >> m_lostday >> m_height >> place; + if(place.find("省") != string::npos) + { + loc1 = place.find("省"); + m_lostplace.first = place.substr(0,loc1+3); + loc2 = place.find("市"); + if(loc2 != string::npos) + m_lostplace.second = place.substr(loc1+3,loc2-loc1+3); + else + { + m_lostplace.second = "不详"; + } + } + else if(place.find("自治区") != string::npos) + { + loc1 = place.find("自治区"); + m_lostplace.first = place.substr(0,loc1+9); + loc2 = place.find("市"); + if(loc2 != string::npos) + m_lostplace.second = place.substr(loc1+9,loc2-loc1+3); + else + { + m_lostplace.second = "不详"; + } + } + else if(place.find("特别行政区") != string::npos) + { + loc1 = place.find("特别行政区"); + m_lostplace.first = place.substr(0,loc1+15); + loc2 = place.find("市"); + if(loc2 != string::npos) + m_lostplace.second = place.substr(loc1+15,loc2-loc1+3); + else + { + m_lostplace.second = "不详"; + } + } + else if(place.find("市") != string::npos) + { + loc1 = place.find("市"); + m_lostplace.first = place.substr(0,loc1+3); + loc2 = place.find("区"); + if(loc2 != string::npos) + m_lostplace.second = place.substr(loc1+3,loc2-loc1+3); + else + { + m_lostplace.second = "不详"; + } + } + else + { + m_lostplace.first = "不详"; + m_lostplace.second = "不详"; + } + fin >> maytogo >> m_information; + if(m_sex == 1) + mm_sex = "女"; + else + { + mm_sex = "男"; + } + + PersonData temp( dataname ,m_name ,mm_sex , m_birthplace, m_birthday, m_lostday, m_height, m_lostplace,maytogo, m_information); + alldata.push_back(temp); + } + fin.close(); + } +} \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/take_apart.py" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/take_apart.py" new file mode 100644 index 0000000..e9fa299 --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/take_apart.py" @@ -0,0 +1,13 @@ +#encoding=utf-8 +import jieba +import jieba.posseg as pseg +filename = "input.txt" +database = "cache.txt" +f = open(filename,"r") +fn = open(database,"w+") +line = f.read() +words = pseg.cut(line) +for word in words: + print >>fn,str(word) +f.close() +fn.close() \ No newline at end of file diff --git "a/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/utf8change.cpp" "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/utf8change.cpp" new file mode 100644 index 0000000..2c3ca8f --- /dev/null +++ "b/summer/\345\220\264\347\277\224\345\256\207_\346\236\227\346\266\246\345\215\232_\344\273\230\345\230\211\345\272\206_\345\244\261\350\270\252\344\272\272\345\217\243\346\220\234\347\264\242/search/utf8change.cpp" @@ -0,0 +1,71 @@ + +#include +#include +#include +#include + +unsigned char ToHex(unsigned char x) +{ + return x > 9 ? x + 55 : x + 48; +} + +unsigned char FromHex(unsigned char x) +{ + unsigned char y; + if (x >= 'A' && x <= 'Z') y = x - 'A' + 10; + else if (x >= 'a' && x <= 'z') y = x - 'a' + 10; + else if (x >= '0' && x <= '9') y = x - '0'; + else assert(0); + return y; +} + +std::string UrlEncode(const std::string& str) +{ + std::string strTemp = ""; + size_t length = str.length(); + for (size_t i = 0; i < length; i++) + { + if (isalnum((unsigned char)str[i]) || + (str[i] == '-') || + (str[i] == '_') || + (str[i] == '.') || + (str[i] == '~')) + strTemp += str[i]; + else if (str[i] == ' ') + strTemp += "+"; + else + { + strTemp += '%'; + strTemp += ToHex((unsigned char)str[i] >> 4); + strTemp += ToHex((unsigned char)str[i] % 16); + } + } + return strTemp; +} + +std::string UrlDecode(const std::string& str) +{ + std::string strTemp = ""; + size_t length = str.length(); + for (size_t i = 0; i < length; i++) + { + if (str[i] == '+') strTemp += ' '; + else if (str[i] == '%') + { + assert(i + 2 < length); + unsigned char high = FromHex((unsigned char)str[++i]); + unsigned char low = FromHex((unsigned char)str[++i]); + strTemp += high*16 + low; + } + else strTemp += str[i]; + } + return strTemp; +} +/* int main(){ + std::string url="%E5%BC%A0%E4%B8%89"; + //std::cin>>url; + //url = UrlEncode(url); + url = UrlDecode(url); + std::cout<