毕业设计——基于SpringBoot+Neo4j+Spark实现的论文智能分析问答系统(采用朴素贝叶斯分类器)

完整项目地址:https://download.csdn.net/download/lijunhcn/88430302

写在前面

分析了下这个电影知识问答系统,底层功能实现是操作cypher语句,前台的业务:
1.汉语分词器HanLP将原始语句分词
2.语句抽象化(提高匹配问题模板标签准确率)
3.获取模板标签,使用模板将句子转化成系统可以识别的结果
4.cypher语句获取结果返回前台
既然涉及问答系统,中途也看了微软小冰和其他的语料库资料,感觉自己做出一个偏向应用的石油相关智能问答系统的可能性不大,首先自己不做
爬虫就语料库这个问题也解决不了的,要真有现成的语料库那也就没我做的必要了。

区别

对比自己想做的石油论文智能分析系统,我的数据来源都是国外网站,用户的原始语句是英文就用不到分词,但词汇库就复杂了,需要自己去找英
文人名词汇表,提取论文信息生成全文搜索词汇表。所以对这个项目我抱的期望不是很大,先罗列几个比较困难的点,做出来更新:
1.项目中通过稠密向量来生成训练集,而每个局部向量是由词汇表来确定的,电影知识问答系统中是个190词的电影相关汉语词汇表,但石油相关
词汇都是英语网站的数据,所以词汇表内容都是英语词汇,解决办法是在有些数据后生成这个表,但是搞爬虫的同学还在准备中期考试 、
2.问题归类,英语比较吃力了,同样的一个问题怎么来问,同一个问题预设问法越多,模型在学习后识别同类问题的准确率才会更高。
3.。。。

/*将author.csv引入到neo4j中,在Neo4j中创建Author节点**/
load csv with headers from "file:///author.csv" as line
merge(p:Author{id:toInteger(line.id),name:line.name,email:line.email,birth:line.birth});


/*将paper.csv引入到neo4j中,在Neo4j中创建Paper节点*/
load csv with headers from "file:///paper.csv" as line
merge(p:Paper{id:toInteger(line.id),name:line.name,doi:line.doi,document_id:line.document_id,publisher:line.publisher,
publication_date:line.publication_date,summary:line.summary,introduction:line.introduction});


/*将keyword.csv引入到neo4j中,在Neo4j中创建Keyword节点**/
load csv with headers from "file:///keyword.csv" as line
merge(p:Keyword{id:toInteger(line.id),name:line.name});


/*将author_paper.csv引入到neo4j,创建Author与Paper之间的create的relationship*/
load csv with headers from "file:///author_paper.csv" as line
match (from:Author{id:toInteger(line.author_id)}),(to:Paper{id:toInteger(line.paper_id)})
merge (from)-[r:create{author_id:toInteger(line.author_id),paper_id:toInteger(line.paper_id)}]->(to);


/*将paper_keyword.csv引入到neo4j,创建paper与keyword之间的attribute的relationship*/
load csv with headers from "file:///paper_keyword.csv" as line
match (from:Paper{id:toInteger(line.paper_id)}),(to:Keyword{id:toInteger(line.keyword_id)})
merge (from)-[r:attribute{paper_id:toInteger(line.paper_id),keyword_id:toInteger(line.keyword_id)}]->(to);







/* mysql数据库的SQL */

--实体类表
CREATE TABLE `author` (
	`id` INT(11) NOT NULL AUTO_INCREMENT,
	`name` VARCHAR(100) NULL DEFAULT NULL,
	`email` VARCHAR(50) NULL DEFAULT NULL,
	`birth` INT(11) NULL DEFAULT NULL,
	PRIMARY KEY (`id`)
)
COMMENT='论文作者'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=4
;

CREATE TABLE `paper` (
	`id` INT(11) NOT NULL AUTO_INCREMENT,
	`doi` VARCHAR(50) NULL DEFAULT NULL,
	`document_id` VARCHAR(50) NULL DEFAULT NULL,
	`publisher` VARCHAR(50) NULL DEFAULT NULL,
	`publication_date` VARCHAR(50) NULL DEFAULT NULL,
	`abstract` VARCHAR(255) NULL DEFAULT NULL,
	`keywords` VARCHAR(100) NULL DEFAULT NULL,
	PRIMARY KEY (`id`)
)
COMMENT='论文详细信息'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=2
;

CREATE TABLE `genre` (
	`id` INT(11) NOT NULL AUTO_INCREMENT,
	`type` VARCHAR(255) NULL DEFAULT NULL,
	PRIMARY KEY (`id`)
)
COMMENT='论文类别'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=2
;

CREATE TABLE `meeting` (
	`id` INT(11) NOT NULL AUTO_INCREMENT,
	`location` VARCHAR(100) NULL DEFAULT NULL,
	`date` VARCHAR(100) NULL DEFAULT NULL,
	`name` VARCHAR(100) NULL DEFAULT NULL,
	PRIMARY KEY (`id`)
)
COMMENT='论文参与的会议'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=2
;

CREATE TABLE `origination` (
	`id` INT(11) NOT NULL AUTO_INCREMENT,
	`name` VARCHAR(255) NULL DEFAULT NULL,
	`location` VARCHAR(255) NULL DEFAULT NULL,
	PRIMARY KEY (`id`)
)
COMMENT='作者属于的组织'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=2
;


--关联表,在转入neo4j会转化成相应的relationship
CREATE TABLE `author_paper` (
	`author_id` INT(11) NOT NULL,
	`paper_id` INT(11) NULL DEFAULT NULL,
	UNIQUE INDEX `author_id` (`author_id`),
	INDEX `paper_id` (`paper_id`),
	CONSTRAINT `FK__author_paper_author` FOREIGN KEY (`author_id`) REFERENCES `author` (`id`),
	CONSTRAINT `FK__author_paper_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)
)
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
;

CREATE TABLE `paper_genre` (
	`paper_id` INT(11) NOT NULL,
	`genre_id` INT(11) NULL DEFAULT NULL,
	UNIQUE INDEX `paper_id` (`paper_id`),
	INDEX `genre_id` (`genre_id`),
	CONSTRAINT `FK__paper_genre_genre` FOREIGN KEY (`genre_id`) REFERENCES `genre` (`id`),
	CONSTRAINT `FK__paper_genre_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)
)
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
;

CREATE TABLE `paper_meeting` (
	`paper_id` INT(11) NOT NULL,
	`meeting_id` INT(11) NOT NULL,
	UNIQUE INDEX `paper_id` (`paper_id`),
	INDEX `meeting_id` (`meeting_id`),
	CONSTRAINT `FK__paper_meeting_meeting` FOREIGN KEY (`meeting_id`) REFERENCES `meeting` (`id`),
	CONSTRAINT `FK__paper_meeting_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)
)
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
;

CREATE TABLE `author_origination` (
	`author_id` INT(11) NOT NULL,
	`origination_id` INT(11) NOT NULL,
	UNIQUE INDEX `author_id` (`author_id`),
	INDEX `origination_id` (`origination_id`),
	CONSTRAINT `FK__author_origination_author` FOREIGN KEY (`author_id`) REFERENCES `author` (`id`),
	CONSTRAINT `FK__author_origination_origination` FOREIGN KEY (`origination_id`) REFERENCES `origination` (`id`)
)
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
;

猜你喜欢

转载自blog.csdn.net/lijunhcn/article/details/135177241