/*************************************************************************
> File Name: FileSort.h
> Author: wangzhicheng
> Mail: [email protected]
> Created Time: Sat 31 Dec 2016 09:30:39 AM AWST
> Brief:sort strings in the file
************************************************************************/
#ifndef FILE_SORT_H
#define FILE_SORT_H
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <omp.h>
#include <iostream>
#include <fstream>
#include <thread>
#include <algorithm>
#include <functional>
namespace filesort
{
#define ASC 0
#define DESC 1
using namespace std;
class FileSort
{
public:
/*
* @brief sort a file
* @input_path the input file full path
* @sortype sort type
* @return true if sort is ok
* */
static bool sort(const char *input_path, const char *output_path, int sortype = ASC, int threadnum = 2);
private:
/*
* @brief use the stl sort
* */
inline static void threadsort(vector<string>&input, int sortype)
{
switch(sortype)
{
case ASC: // ASC
std::sort(input.begin(), input.end());
break;
case DESC: // DESC
std::sort(input.rbegin(), input.rend());
// lamba
/*
std::sort(input.begin(), input.end(), [](const string &str0, const string &str1)
{
return str0 > str1;
});
*/
break;
}
}
};
}
#endif
/*************************************************************************
> File Name: FileSort.cpp
> Author: wangzhicheng
> Mail: [email protected]
> Created Time: Sat 31 Dec 2016 09:30:39 AM AWST
> Brief: sort strings in a file
************************************************************************/
#include "FileSort.h"
namespace filesort
{
bool FileSort::sort(const char *input_path, const char *output_path, int sortype, int threadnum)
{
// safe check entry arguments
int MaxThreadNum = omp_get_num_procs() << 1;
if(threadnum <= 0) threadnum = 1;
if(threadnum >= MaxThreadNum) threadnum = MaxThreadNum;
// open the input file
ifstream is(input_path);
if(!is)
{
cerr << input_path << " open failed...!" << endl;
return false;
}
// import the string in the input file to various segments
typedef vector<string> stringset;
vector<stringset>segments;
segments.resize(threadnum); // every thead is responsible for every segment
int index = 0;
string line;
while(getline(is, line))
{
segments[index].emplace_back(line);
index = (index + 1) % threadnum;
}
is.close();
// start theads to sort
vector<thread>sortthreads;
int i;
for(i = 0;i < threadnum;i++)
{
stringset &strings = segments[i];
sortthreads.push_back(thread(FileSort::threadsort, ref(strings), sortype));
}
for(auto &th:sortthreads)
{
th.join();
}
// open the output file
ofstream os(output_path, ios::trunc);
if(!os)
{
cerr << input_path << " open failed...!" << endl;
return false;
}
// merge the vector to sort
// init the heap
vector<pair<string, int> >outstrings; // first -- key second -- position
for(i = 0;i < threadnum;i++)
{
stringset &strings = segments[i];
if(!strings.empty())
{
outstrings.emplace_back(pair<string, int>(strings.front(), i));
strings.erase(strings.begin());
}
}
index = 0;
switch(sortype)
{
case ASC:
make_heap(outstrings.begin(), outstrings.end(), [](const pair<string, int>&p0, const pair<string, int>&p1)
{
return p0.first > p1.first;
});
break;
case DESC:
make_heap(outstrings.begin(), outstrings.end(), [](const pair<string, int>&p0, const pair<string, int>&p1)
{
return p0.first < p1.first;
});
break;
}
while(!outstrings.empty())
{
pop_heap(outstrings.begin(), outstrings.end());
string &key = outstrings.back().first;
int pos = outstrings.back().second;
os << key << endl;
outstrings.pop_back();
stringset &strings = segments[pos];
if(!strings.empty())
{
outstrings.emplace_back(pair<string, int>(strings.front(), pos));
strings.erase(strings.begin());
}
switch(sortype)
{
case ASC:
make_heap(outstrings.begin(), outstrings.end(), [](const pair<string, int>&p0, const pair<string, int>&p1)
{
return p0.first > p1.first;
});
break;
case DESC:
make_heap(outstrings.begin(), outstrings.end(), [](const pair<string, int>&p0, const pair<string, int>&p1)
{
return p0.first < p1.first;
});
break;
}
}
os.close();
}
}
/*************************************************************************
> File Name: main.cpp
> Author: wangzhicheng
> Mail: [email protected]
> Created Time: Sat 31 Dec 2016 09:44:59 AM AWST
************************************************************************/
#include "FileSort.h"
#include <iterator>
#include <time.h>
using namespace filesort;
int main()
{
// generate testing data
static const int N = 10;
static const int MAX = 100;
int i, j;
ofstream test_input("./input_data", ios::trunc);
if(!test_input)
{
cerr << "input file generate failed...!" << endl;
return 1;
}
char buf[64];
srand(time(0));
for(i = 0;i < MAX;i++)
{
for(j = 0;j < N;j++)
{
sprintf(buf + j, "%d", rand() % N);
}
test_input << buf << endl;
}
test_input.close();
// sort
FileSort::sort("./input_data", "./output_data", DESC);
return 0;
}
CC = g++
DBG =
ifndef DEBUG_SET
DEBUG_SET= -std=c++11 -g -pthread
endif
IFLAGS =-I .\
INDEX_ROOT=..
LIBS = -L .\
-lpthread\
-lgomp\
LINK =
TARGET=MergeSort
all:$(TARGET)
OBJS=FileSort.o\
main.o\
$(TARGET):$(OBJS)
$(CC) -fPIC -o ./$(TARGET) $(OBJS) $(LIBS) $(LINK)
.cpp.o:
$(CC) $(DBG) $(DEBUG_SET) $(IFLAGS) -fPIC -c $<
clean:
rm *.o -fr
rm -f MergeSort
FileSort
猜你喜欢
转载自blog.csdn.net/wangzhicheng2013/article/details/53966697
今日推荐
周排行