diff --git a/emlog2duoshuo.php b/emlog2duoshuo.php new file mode 100644 index 0000000..415017e --- /dev/null +++ b/emlog2duoshuo.php @@ -0,0 +1,240 @@ +'duoshuo','version'=>'0.1'); + } + $ds_split[$key][] = $v; + if($ds_count==SPLIT_SIZE || $arr_count==$arr_len){ + $ds_split = json_encode($ds_split); + file_put_contents('content/backup/emlog_comment_'.$file_count.'.json',$ds_split); + ++$file_count; + unset($ds_split); + $ds_count = 0; + } + ++$ds_count; + ++$arr_count; + } + //返回最后一个当前已保存的文件序号 + return $file_count-1; + } + +/* 程序界面 */ + + //指定编码 + header('content-type:text/html; charset=utf-8'); + + //默认页面 + if(empty($_GET)){ +?> + +

emlog评论导出工具

+
+

☆ 导出选项:

+ +

☆ 性能测试:

+ +

☆ 开始导出:

+ + + +
+

到作者博客

+

提示:为了您的数据安全,建议您操作完成后删除本程序。

+

提示:对于评论量较大的用户,建议您选择分卷保存到服务器。
+ 当前程序以条评论进行分割,生成的每个分卷应该均不超过200K。
+ 如果实际生成的分卷过小或过大,您可以手动修改本程序的分割参数。
+ 导出文件保存地址:emlog目录/content/backup/emlog_comment_序号.json

+'duoshuo','version'=>'0.1'); + +/* 准备被评文章数据 */ + + //获得被评文章的id + $sql = 'select distinct(gid) from '.DB_PREFIX.'comment'.$allow_hide_sql; + $gid_list = db_get_gids($sql); + + //获得文章数据 + $sql = 'select gid,title,alias from '.DB_PREFIX.'blog where gid in('.implode(',',$gid_list).')'; + $th_list = db_get_array($sql); + + //拼接文章json + foreach($th_list as $v){ + $ds['threads'][] = array('thread_key'=>$v['gid'],'title'=>$v['title'],'url'=>$url.'/'.$v['alias']); + } + + //记录测试结果并释放资源 + if($test['test']){ + $test['gid'] = count($gid_list); + $test['th']['count'] = count($th_list); + $test['th']['mem'] = memory_get_usage(); + } + unset($gid_list); + unset($th_list); + + +/* 准备评论数据 */ + + //获得评论数据 + $sql = 'select cid,gid,pid,comment,date,ip,mail,poster,url from '.DB_PREFIX.'comment'.$allow_hide_sql; + $db_list = db_get_array($sql); + + //拼接评论json + foreach($db_list as $v){ + $ds['posts'][] = array( + 'post_key'=>$v['cid'], + 'thread_key'=>$v['gid'], + 'parent_key'=>$v['pid'], + 'message'=>$v['comment'], + 'created_at'=>date('Y-m-d H:m:s',$v['date']), + 'ip'=>$v['ip'], + 'author_email'=>$v['mail'], + 'author_name'=>$v['poster'], + 'author_url'=>$v['url'], + ); + } + + //记录测试结果并释放资源 + if($test['test']){ + $test['db']['count'] = count($db_list); + $test['db']['mem'] = memory_get_usage(); + } + unset($db_list); + + +/* 分卷保存功能 */ + if($split_save){ + + /* 处理文章数据 */ + $file_count = file_split_save($ds['threads'],'threads'); + $tmp_count = $file_count; + /* 处理评论数据 */ + $file_count = file_split_save($ds['posts'],'posts',$file_count+1); + echo '保存成功,一共',$file_count,'个分卷。其中,前'.$tmp_count.'个分卷是文章数据,不包含评论。'; + exit; + } + +/* 输出文件或下载 */ + + $ds = json_encode($ds); + + //在线输出 + if(isset($_GET['show'])){ + echo $ds; + } + + //文件下载 + else if(isset($_GET['download'])){ + header("Content-type:text/json"); + header("Accept-Ranges: bytes"); + header("Accept-Length: ".strlen($ds)); + header("Content-Disposition: attachment; filename=" .'em_' .date('YmdHms') . '.json'); + echo $ds; + } + +/* 性能测试 */ + + if($test['test']){ + + //获得评论数量 + $sql = 'select count(gid) from '.DB_PREFIX.'comment'.$allow_hide_sql; + $test['count'] = db_get_count($sql); + + //json结束符检查 + $json_check = ( substr($ds,-4) =='"}]}' ) ? '正常结束' : '异常结束'; + + //内存占用检查 + $mem = ( $test['th']['mem'] > $test['db']['mem'] ? $test['th']['mem'] : $test['db']['mem'] ); + $mem_curr = memory_get_usage(); + $mem = ( $mem > $mem_curr ? $mem : $mem_curr ); + $mem_curr = round($mem_curr / 1024); + $mem = round($mem / 1024); + + $str = '

性能测试结果:

'; + $str .= '

您网站中的评论数量为:'.$test['count'].'
实际查询数量为:'.$test['db']['count'].'

'; + $str .= '

有评论的文章数量为:'.$test['gid'].'
实际查询数量为:'.$test['th']['count'].'

'; + $str .= '

编码后的json结束符检查:'.$json_check.'

'; + $str .= '

内存使用:最大值('. $mem.'K) 当前('.$mem_curr.'K)

'; + $str .= '

返回

'; + echo $str; + + exit; + } diff --git a/emlog2typecho3.py b/emlog2typecho3.py new file mode 100644 index 0000000..043e8fc --- /dev/null +++ b/emlog2typecho3.py @@ -0,0 +1,163 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +# +# Descrption:emlog2typecho 是一个用python写的脚本,用来迁移Emlog的数据库到Typecho。 +# + + +# 一些设置项 + +# Emlog 数据库名 +emlog_database_name = 'emlog' +# Typecho 数据库名 +typecho_database_name = 'typecho' +# 数据库地址 +database_host = 'localhost' +# 数据库用户名 +database_port = 3306 +# 数据库用户名 +database_user_name = 'root' +# 数据库用户名 +database_user_password = 'root' +# 字符集 +database_charset = 'utf8' + +################################################################################# +import mysql.connector #用mysql.connector替换MySQLdb +#import MySQLdb + +# 连接数据库... +conn=mysql.connector.connect(host = database_host, + user = database_user_name, + passwd = database_user_password, + port = database_port, + charset = database_charset, + database = emlog_database_name) + +# 切换emlog数据库... +#conn.select_db(emlog_database_name) +cur=conn.cursor() + +# 读取emlog所有分类 +cur.execute('select sid, sortname, alias, ( select count( * ) from emlog_blog where sortid = sid ) AS count FROM emlog_sort') +emlog_sort_list = cur.fetchall() + +# 读取Emlog所有Tag +cur.execute('select tid,tagname,gid from emlog_tag') +emlog_tag_list = [] +for row in cur.fetchall(): + tagname = row[1] + gid_list = row[2].split(',') + + # 移除列表中为空字符串的项 + for gid in gid_list: + if gid == '': + gid_list.remove(gid) + # 组装 + tag = {'tagname':tagname,'gidlist':gid_list} + emlog_tag_list.append(tag) + +# 读取emlog blog表... +cur.execute('select gid,title,date,content,excerpt,alias,sortid,type,allow_remark from emlog_blog') +emlog_blog_list = cur.fetchall() + +# 读取Emlog comment表 +cur.execute("SELECT * FROM `emlog_comment`") +emlog_comment_list = cur.fetchall() + +# ------------------------------------------ +# --- Emlog表读取完毕,切换Typecho表进行写入 --- +# ------------------------------------------ + +# 切换Typecho数据库... +#conn.select_db(typecho_database_name) +conn=mysql.connector.connect(host = database_host, + user = database_user_name, + passwd = database_user_password, + port = database_port, + charset = database_charset, + database = typecho_database_name) +cur=conn.cursor() + +# 删除Typecho 所有分类和标签... +cur.execute('delete from typecho_metas') + +# 插入emlog所有分类 +for sort in emlog_sort_list: + sort_id = sort[0] + sort_name = sort[1] + sort_sulg = sort[2] # sort[0] if sort[1] == '' else sort[1] + sort_count = sort[3] + cur.execute("insert into typecho_metas (mid, name, slug, type, description, `count`, `order`) VALUES (%s, %s, %s, 'category', NULL, %s, 0)" , (sort_id,sort_name, sort_sulg,sort_count)) + +# 删除Typecho 所有文章... +cur.execute('delete from typecho_contents') +# 删除文章所有关系 +cur.execute('delete from typecho_relationships') +# 删除所有评论 +cur.execute('delete from typecho_comments') + +# 转移所有文章 +for blog in emlog_blog_list: + print(blog[0],"Done!") + blog_id = blog[0] + blog_title = blog[1] + blog_create_date = blog[2] + blog_content = blog[3] + blog_excerpt = blog[4] + + # 不能为空字符串 + blog_alias = blog[5] + if blog_alias == '': + blog_alias = None + + # emlog --> blog page + # typecho --> post page + if blog[7] == 'blog': + blog_type = 'post' + else: + blog_type = 'page' + + # allow comment + if blog[8] == 'y': + blog_allow_comment = '1' + else: + blog_allow_comment = '0' + + params = (blog_id,blog_title,blog_alias,blog_create_date,blog_content,blog_type,blog_allow_comment) + cur.execute("insert into `typecho_contents` (`cid`, `title`, `slug`, `created`, `modified`, `text`, `order`, `authorId`, `template`, `type`, `status`, `password`, `commentsNum`, `allowComment`, `allowPing`, `allowFeed`, `parent`) VALUES (%s, %s, %s, %s, NULL, %s, '0', '1', NULL, %s, 'publish', NULL, '0', %s, '0', '0', '0')",params) + + # 添加文章的relationships + blog_sortid = blog[6] + + # emlog 中 分类id -1 为页面 + if blog_sortid == -1: + continue + cur.execute('insert into `typecho_relationships` (`cid`, `mid`) VALUES (%s, %s)',(blog_id,blog_sortid)) + +# 插入所有Tag(和关系) +cur.execute("select MAX( mid ) FROM `typecho_metas`") +sort_max_id = (cur.fetchall()[0][0]) + 1 + +# 从刚插入的分类最后一个ID+1作为ID开始循环插入 +for tag in emlog_tag_list: + cur.execute("insert into `typecho_metas` (`mid`, `name`, `slug`, `type`, `description`, `count`, `order`) VALUES (%s, %s, %s, 'tag', NULL, %s, '0');",(sort_max_id,tag['tagname'],tag['tagname'],len(tag['gidlist']))) + for gid in tag['gidlist']: + params = (int(gid),sort_max_id) + # !有时会遇到重复项插入失败跳过 + try: + cur.execute('insert into `typecho_relationships` (`cid`, `mid`) VALUES (%s, %s)',params) + except: + print('失败一条Tag:%s,%s' % (params)) + sort_max_id = sort_max_id + 1 + +# 插入评论 +for comment in emlog_comment_list: + params = (comment[0],comment[1],comment[3],comment[4],comment[6],comment[7],comment[8],comment[5],comment[2]) + cur.execute("INSERT INTO `typecho_comments` (`coid`, `cid`, `created`, `author`, `authorId`, `ownerId`, `mail`, `url`, `ip`, `agent`, `text`, `type`, `status`, `parent`) VALUES (%s, %s, %s, %s, '0', '1', %s, %s, %s, NULL, %s, 'comment' , 'approved', %s)",params) + +# 关闭数据库连接 +cur.close() +conn.close() + +print('转移完成...')