C++学习之第十天-实现文本查询

作业第5题、文本查询该程序将读取用户指定的任意文本文件【当前目录下的china_daily.txt】,然后允许用户从该文件中查找单词。查询的结果是该单词出现的次数,并列出每次出现所在的行。如果某单词在同一行中多次出现,程序将只显示该行一次。行号按升序显示。

要求:a、它必须允许用户指明要处理的文件名字。b、程序将存储该文件的内容,以便输出每个单词所在的原始行。

  vector<string> _lines; 

c、它必须将每一行分解为各个单词,并记录每个单词所在的所有行。在输出行号时,应保证以升序输出,并且不重复。

map<string, set<int> > _word2Numbers;
map<string, int> _dict;

d、对特定单词的查询将返回出现该单词的所有行的行号。e、输出某单词所在的行文本时,程序必须能根据给定的行号从输入文件中获取相应的行。​ 示例:

使用提供的文件内容,然后查找单词 "element"。输出的前几行为:

element occurs 125 times.
	(line 62) element with a given key.
	(line 64) second element with the same key.
	(line 153) element |==| operator.
	(line 250) the element type.

(line 398) corresponding element.

程序接口[可选]:
class TextQuery
{
public:
    //......
    void readFile(const string filename);
    void query(const string &word);//查询和打印耦合在一起了
	
private:
	//......
	vector<string> _lines; //存储的是每一行?
	map<string, set<int> > _word2Numbers;//单词都在哪一行出现了
	map<string, int> _dict;//单词出现多少次
};

void print(ostream & os, const QueryResult &);

//程序测试用例
int main(int argc, char *argv[])
{
   string  queryWord("hello");

   TextQuery tq;
   tq.readFile("test.dat");
   tq.query(queryWord);			   

   return 0;
} 

文本查询要点:

        1.容器string的学习

        2.容器vector的学习

        3.容器map的学习

query.h

#include <iostream>
using namespace std;
#include <string>
#include <fstream>
#include <map>
#include <set>
#include <vector>
#include <regex>
class TextQuery
{
public:
    void readFile(const string &filename);
    

    //1.打印单词以及其在文中出现的次数
    void print_Dict();
    //2.通过正则表达式提取每个单词,map<string,int>_dict存储单词以及该单词在文中出现的次数
    //map<string,set<int>>_word2Numbers,键存储单词,值记录单词所在的每一行的集合
    void regex_Search(string &tempStr, int line_num);
     //3.打印单词以及其所在的行数
    void print_word2Numbers();
    //4.查询单词,给出其出现的次数以及所在的行数
    void query(const string &word);

private:
    vector<string> _lines;
    map<string,set<int>>_word2Numbers;
    map<string,int>_dict;
};

TextQuery.cc

#include "query.h"
//1.string字符串去掉首尾空格
void trim(string &s)
{

    if (!s.empty())
    {
        s.erase(0, s.find_first_not_of(" "));
        s.erase(s.find_last_not_of(" ") + 1);
    }

}
//2.处理句子的标点符号,把不是字母的字符换成空格、然后去掉句子首尾空格
void delete_punctuation(string &str)
{

    for(int i =0;i<str.size();i++)
    {
        if((str[i]>='A'&&str[i]<='Z')||(str[i]>='a'&&str[i]<='z'))
            continue;
        str[i] = ' ';//不是26个字母,换成空格
    }
    trim(str);//把句子首尾空格砍掉
}
 //3.通过正则表达式提取每个单词,map<string,int>_dict存储单词以及该单词在文中出现的次数
    //map<string,set<int>>_word2Numbers,键存储单词,值记录单词所在的每一行的集合
void TextQuery::regex_Search(string &tempStr, int line_num)
{
    regex pattern("^[A-Za-z]+$");//正则提取单词
    smatch result;

    line_num++;//单词所在的行数
    if(regex_search(tempStr, result,pattern))//提取到的单词存储在result里
    {
        string temp = result.str(0); //获取单词
       // cout<<temp<<endl;
		
        //统计并且存储单词出现的次数
        map<string, int>::iterator ret = _dict.find(temp);//ret是迭代器
        if (ret != _dict.end())	//	如果单词在_dict中存在,
        {
            ret->second++;//词频加1

        }
        else//不存在就插入,单词个数置1
        {
            _dict.insert(make_pair(temp,1));
        }
		//存储单词所出现的行数
        map<string, set<int>>::iterator ret2 = _word2Numbers.find(temp);
        if(ret2!=_word2Numbers.end())//如果容器中已有该单词,把单词所在行数添加进来即可
        {
            ret2->second.insert(line_num);
        }
        else//不存在,单词以及函数一起添加进map容器
        {
            set<int> temp_Set;
            temp_Set.insert(line_num);
            _word2Numbers.insert(make_pair(temp,temp_Set));
        }
    }
}
//把句子从文件中读出来,然后一句一句处理
void TextQuery::readFile(const string &filename)
{
    //1.先把每行句子读取出来,存入vector
    ifstream ifs(filename);
    if(!ifs.good())
    {
        cerr<<"open"<<filename<<"fail"<<endl;
        return;
    }
    string line;
    while(getline(ifs,line))
    {
        _lines.push_back(line);//把读出来的数据存入lines容器
    }
	//2.把每句话单独拿出来处理,i用来是记录行号
    for(int i=0;i<_lines.size();i++)
    {
        string str = _lines[i];

        int pos = -1;
        int start = 0;

        delete_punctuation(str);//删除标点符号
        while(true)
        //这个循环做的事是:空格作为分隔符,提取句子中的每个单词,然后对提取处理的单词进行处理
        {

            pos = str.find(" ",start);//从句子中start位置查找,找到第一个空格的位置,没找到会返回-1
            if(pos==-1)//处理最后一个单词
            {
               //str.substr(start,n),从字符串start位置开始,拿到str中的n个字符
                string tempStr = str.substr(start,str.size()-start);//拿到最后一个单词
                if(!tempStr.empty())//处理单词并且进行存储
                    regex_Search(tempStr, i);
                break;//跳出循环
            }
			//
            string tempStr = str.substr(start,pos-start);
            if(!tempStr.empty())
                regex_Search(tempStr,i);//处理单词并且进行存储
            start = pos+1;//空格符的下一个位置

        }

    }
}
//打印单词以及其出现的次数
void TextQuery::print_Dict()
{
    for(map<string,int>::iterator it = _dict.begin(); it!=_dict.end();it++)
    {
        cout<<it->first<<" "<<it->second<<endl;
    }
}
//打印单词以及其所出现的所有行数
void TextQuery::print_word2Numbers()
{
    for(map<string, set<int>>::iterator it=_word2Numbers.begin();it!=_word2Numbers.end();it++)
    {
        cout<<it->first<<" ";
        for(set<int>::iterator it1 = it->second.begin();it1!=it->second.end();it1++)
        {
            cout<<*it1<<" ";
        }
        cout<<endl;
    }
}

//文本查询功能
void TextQuery::query(const string &word)
{
    map<string,int>::iterator ret1=_dict.find(word);
    if(ret1==_dict.end())
    {
        cout<<"There is no "<<word<<"in the passage"<<endl;
        return;
    }

    map<string,set<int>>::iterator ret2 = _word2Numbers.find(word);
    cout<<ret1->first<<" occurs "<< ret1->second<<" times"<<endl;
    //cout<<"times:"<<ret1->second<<endl;
    for(set<int>::iterator it=ret2->second.begin();it!=ret2->second.end();it++)
    {
        cout<<"(lines "<<*it<<") "<<_lines[*it-1]<<endl;
    }
    cout<<endl;
}

main.cc

#include "query.h"
 
 void test01()
 {
    string filename = "china_daily.txt";
    TextQuery t1;
    t1.readFile(filename);
    //t1.print_Dict();
   // t1.print_word2Numbers();
   t1.query("with");
   t1.query("vehicles");
   t1.query("hehhehehehw");                                                                          
 }          
 int main() 
 {          
     test01();
     return 0;
 }  

运行结果

with occurs 13 times
(lines 15) private firms with high growth potential, Gao said.
(lines 24) Bloomberg reported earlier citing people familiar with the matter that the launch 
(lines 42) Chinese travelers like to take instant noodles with them while travelling abroad, 
(lines 44) And the favorite items travelers like to bring with them in their luggage differed 
(lines 60) Quanjude, China's iconic restaurant chain for original Peking roast duck with a 
(lines 61) history since 1864, has embraced the nation's "Internet Plus" strategy, with a new 
(lines 63) "Internet Plus" has sparked integration of the Internet with traditional industries, 
(lines 71) with Quanjude and chairman of a new joint-venture company that is pursuing the online takeout and e-commerce market.
(lines 74) the company, Yage Technology Inc, in October 2015 with Chongqing Kuangcao Technology 
(lines 77) "We believe with our time-honored brand image, experienced artisan cooking skills, 
(lines 87) duck rolls are made in Quanjude restaurants, with the same recipe and ingredients 
(lines 94) according to Yang Aixiang, general manager with Yage Technology.
(lines 104) Yang Xun, a publicist with Baidu Takeout, which handles delivery service of 

vehicles occurs 2 times
(lines 102) limits for delivery vehicles, including Beijing, the first and most important market for Quanjude Takeout and e-commerce.
(lines 107) avoid barred roads for delivery vehicles.

There is no hehhehehehwin the passage
 

猜你喜欢

转载自blog.csdn.net/weixin_49278191/article/details/121199045