字符串匹配
在工作中,需要用字符串匹配,为了可以快速切换不同的库,编写了这个匹配接口,方便实现不同字符串匹配库的切换.
接口封装
- 头文件
kwmatch.h
#ifndef __KWMATCH_H__
#define __KWMATCH_H__
// control log print switch
#define KW_DEBUG 1
#if KW_DEBUG
#define __DEBUG(p,...) printf("[FILE:%s,FUNC:%s,Line:%d]"p"\n",__FILE__,__FUNCTION__,__LINE__,##__VA_ARGS__)
#else
#define __DEBUG(p,...) do{}while(0);
#endif
struct keywords_ctx;
struct keyword_operations{
int (*keyword_init)(struct keywords_ctx* data, int max); // 初始化
int (*keyword_add)(struct keywords_ctx* data, char **keyword, int num); // 添加关键字
int (*keyword_compile)(struct keywords_ctx* data,void *keyword_data); // 编译
int (*keyword_compare)(struct keywords_ctx* data, void *keyword_data, char* file_content,int file_content_len); // 匹配
void (*keyword_clean)(struct keywords_ctx* data, void *keyword_data); // 清理单个文件的配置
void (*keyword_free)(struct keywords_ctx* data); // 清理关键字配置
};
struct keywords_ctx{
void *kw_cfg; // 存储数据配置
struct keyword_operations *kw_ops; // 存储操作指针
};
#endif
hyperscan关键字匹配库封装
- 头文件
hyperscan.h
#ifndef __HYPERSCAN_H__
#define __HYPERSCAN_H__
#include "hs.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "kwmatch.h"
struct keyword_hy_config{
char **keywords; // 关键字数组
int *ids; // 关键字编号
unsigned int flag; // 匹配规则(具体看hyperscan的说明)
int elements; // 关键字数量
int max_elements; // 关键字最大数量
hs_database_t *database; // 关键字匹配库
};
struct keyword_hy_data{
int result; // 匹配结果(-1为未命中,大于等于0为命中,值为关键字下标)
hs_compile_error_t *compile_err; // 获取hy的报错原因
hs_stream_t *stream;
hs_scratch_t *scratch;
};
extern struct keyword_hy_config hy_kw_cfg;
extern struct keyword_operations hy_kw_ops;
#endif
- 对封装的接口的实现
hyperscan.c
,基于流形式的封装
#include "hyperscan.h"
static int keyword_hy_init(struct keywords_ctx* data, int max);
static int keyword_hy_add(struct keywords_ctx* data, char **keywords, int num);
static int keyword_hy_compile(struct keywords_ctx* data,void *keyword_data);
static int keyword_hy_compare(struct keywords_ctx* data, void *keyword_data, char* file_content,int file_content_len);
static void keyword_hy_clean(struct keywords_ctx* data, void *keyword_data);
static void keyword_hy_free(struct keywords_ctx* data);
struct keyword_operations hy_kw_ops = {
.keyword_init = keyword_hy_init,
.keyword_add = keyword_hy_add,
.keyword_compile = keyword_hy_compile,
.keyword_compare = keyword_hy_compare,
.keyword_clean = keyword_hy_clean,
.keyword_free = keyword_hy_free
};
struct keyword_hy_config hy_kw_cfg = {
0};
static int keyword_hy_hit_handler(unsigned int id, unsigned long long from,
unsigned long long to, unsigned int flags, void *_ctx)
{
int* result = _ctx;
*result = id;
printf("keyword hit!from:%lld, to:%lld\n",from,to);
}
// 返回-1是错误,返回0是成功
static int keyword_hy_init(struct keywords_ctx* data, int max)
{
if(!data)
return -1;
struct keyword_hy_config * key_config = (struct keyword_hy_config*)data->kw_cfg;
if(!key_config)
return -1;
// keywords
key_config->keywords = (char **)malloc(sizeof(char *) * max);
if(!key_config->keywords)
return -1;
memset(key_config->keywords, 0, sizeof(char *) * max);
// ids
key_config->ids = (int *)malloc(sizeof(int) * max);
if(!key_config->ids)
return -1;
memset(key_config->ids, 0, sizeof(int) * max);
key_config->max_elements = max;
key_config->elements = 0;
key_config->flag = HS_FLAG_SINGLEMATCH;
key_config->database = NULL;
return 0;
}
// 返回-1是错误,返回0是成功
static int keyword_hy_add(struct keywords_ctx* data, char **keywords, int num)
{
if(!data || !keywords)
return -1;
struct keyword_hy_config* key_config = (struct keyword_hy_config*)data->kw_cfg;
if(!key_config || key_config->max_elements <= num)
return -1;
int i = 0,len = 0;
for(i = 0; i < num; i++){
if(!keywords[i]){
goto add_error;
}
len = strlen(keywords[i]);
key_config->ids[i] = i;
key_config->keywords[i] = (char *)malloc(len+1);
if(!key_config->keywords[i])
goto add_error;
memset(key_config->keywords[i], 0, len+1);
memcpy(key_config->keywords[i], keywords[i], len);
}
key_config->elements = num;
hs_database_t *database = key_config->database;
hs_compile_error_t *compile_err = NULL;;
if(!database){
if (hs_compile_multi(key_config->keywords, &key_config->flag, key_config->ids, key_config->elements,
HS_MODE_STREAM|HS_MODE_SOM_HORIZON_SMALL, &database,&compile_err) != HS_SUCCESS) {
hs_free_compile_error(compile_err);
fprintf(stderr, "ERROR: Unable to compile error\n");
}
key_config->database = database;
}
return 0;
add_error:
for(i = 0; i < num; i++){
if(key_config->keywords[i])
free(key_config->keywords[i]);
}
return -1;
}
// 返回-1是错误,返回0是成功
static int keyword_hy_compile(struct keywords_ctx* data, void *keyword_data)
{
if(!data)
return -1;
struct keyword_hy_config * key_config = (struct keyword_hy_config*)data->kw_cfg;
struct keyword_hy_data * key_data = (struct keyword_hy_data*)keyword_data;
if(!key_config || !key_data)
return -1;
hs_database_t *database = key_config->database;
hs_stream_t *stream = NULL;
if(hs_open_stream(database, keyword_hy_hit_handler, 0, &key_data->result, &stream) != HS_SUCCESS){
printf("ERROR: Unable to open stream. Exiting.\n");
goto error;
}
hs_scratch_t *scratch = NULL;
if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
printf("ERROR: Unable to allocate scratch space. Exiting.\n");
goto error;
}
key_data->stream = stream;
key_data->scratch = scratch;
return 0;
error:
keyword_hy_clean(data,keyword_data);
return -1;
}
// 返回-1是错误,返回0是命中,返回1为不命中
static int keyword_hy_compare(struct keywords_ctx* data, void *keyword_data, char* file_content,int file_content_len)
{
struct keyword_hy_data * key_data = (struct keyword_hy_data*)keyword_data;
if(!key_data)
return -1;
hs_scan_stream(key_data->stream, (const char *)file_content, file_content_len, 0, key_data->scratch);
if(0 <= key_data->result)
return 0;
return 1;
}
static void keyword_hy_clean(struct keywords_ctx* data, void *keyword_data)
{
struct keyword_hy_data * key_data = (struct keyword_hy_data*)keyword_data;
if(!key_data)
return ;
if(key_data->scratch)
hs_free_scratch(key_data->scratch);
key_data->stream = NULL;
key_data->scratch = NULL;
key_data->result = -1;
}
static void keyword_hy_free(struct keywords_ctx* data)
{
if(!data)
return ;
struct keyword_hy_config * key_config = (struct keyword_hy_config*)data->kw_cfg;
if(!key_config)
return ;
int i = 0;
for(i = 0; i < key_config->elements; i++){
if(key_config->keywords[i]){
free(key_config->keywords[i]);
key_config->keywords[i] = NULL;
}
}
if(key_config->ids)
free(key_config->ids);
if(key_config->keywords)
free(key_config->keywords);
if(key_config->database)
hs_free_database(key_config->database);
key_config->ids = NULL;
key_config->keywords = NULL;
key_config->database = NULL;
}
调用案例
test.c
#include "hs.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "hyperscan.h"
#define READ_SIZE 10
// control log print switch
#define HY_DEBUG 1
#if HY_DEBUG
#define __DEBUG(p,...) printf("[FILE:%s,FUNC:%s,Line:%d]"p"\n",__FILE__,__FUNCTION__,__LINE__,##__VA_ARGS__)
#else
#define __DEBUG(p,...) do{}while(0);
#endif
static int keyword_hy_hit_handler(unsigned int id, unsigned long long from,
unsigned long long to, unsigned int flags, void *_ctx)
{
int* result = _ctx;
*result = id;
printf("keyword hit!from:%lld, to:%lld\n",from,to);
}
void main(int argc, char **argv)
{
// 未封装调用例子
#if 0
if(argc < 3){
__DEBUG("please input %s [filepath] [pattern] \n",argv[0]);
return ;
}
FILE* fp = NULL;;
if((fp = fopen(argv[1],"r")) == NULL){
__DEBUG("file open fail\n");
return ;
}else{
__DEBUG("file open success\n");
}
char *keyword = argv[2];
int id = 0;
int elements = 1;
int result = -1;
char* one_line = NULL;
one_line = (char*)malloc(READ_SIZE * sizeof(char) + 1);
if(!one_line){
__DEBUG("one_line malloc fail\n");
goto error;
}
int flag = HS_FLAG_SINGLEMATCH;
hs_database_t *database = NULL;
hs_compile_error_t *compile_err;
if (hs_compile_multi(&keyword, &flag, &id, elements,
HS_MODE_STREAM|HS_MODE_SOM_HORIZON_SMALL, &database,&compile_err) != HS_SUCCESS) {
fprintf(stderr, "ERROR: Unable to compile error: %s\n",compile_err->message);
hs_free_compile_error(compile_err);
goto error;
}
hs_stream_t *stream = NULL;
if(hs_open_stream(database, keyword_hy_hit_handler, 0, &result, &stream) != HS_SUCCESS){
__DEBUG("ERROR: Unable to open stream. Exiting.\n");
goto error;
}
hs_scratch_t *scratch = NULL;
if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
__DEBUG("ERROR: Unable to allocate scratch space. Exiting.\n");
goto error;
}
while(fgets(one_line, READ_SIZE, fp) != NULL){
hs_scan_stream(stream, (const char *)one_line, strlen(one_line), 0, scratch);
if(result >= 0){
__DEBUG("new line data:%s\n",one_line);
__DEBUG("keyword result is hit!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
break;
}
}
error:
if(fp) fclose(fp);
if(one_line) free(one_line);
if(scratch) hs_free_scratch(scratch);
if(database) hs_free_database(database);
return ;
// 封装后的demo
#else
if(argc < 3){
__DEBUG("please input %s [filepath] [pattern1] [pattern2]\n",argv[0]);
return ;
}
FILE* fp = NULL;;
if((fp = fopen(argv[1],"r")) == NULL){
__DEBUG("file open fail\n");
return ;
}else{
__DEBUG("file open success\n");
}
struct keywords_ctx content;
struct keyword_hy_data keyword_data;
struct keyword_hy_config keyword_config;
memset(&keyword_data,0,sizeof(struct keyword_hy_data));
keyword_data.result = -1;
content.kw_ops = &hy_kw_ops;
content.kw_cfg = &keyword_config;
if(content.kw_ops->keyword_init(&content,32) < 0)
{
printf("init fail\n");
goto error2;
}
int i = 0;
char *key[10];
int num = atoi(argv[2]);
for(i = 0;i < num; i++){
key[i] = argv[i + 3];
}
if(content.kw_ops->keyword_add(&content,key,num)< 0)
{
printf("add fail\n");
goto error2;
}
if(content.kw_ops->keyword_compile(&content,&keyword_data)< 0)
{
printf("compile fail\n");
goto error2;
}
char* one_line = NULL;
one_line = (char*)malloc(READ_SIZE * sizeof(char) + 1);
if(!one_line){
__DEBUG("one_line malloc fail\n");
goto error2;
}
while(fgets(one_line, READ_SIZE, fp) != NULL){
if(content.kw_ops->keyword_compare(&content,&keyword_data, one_line,strlen(one_line)) < 0)
{
printf("keyword_compare fail\n");
goto error2;
}
if(keyword_data.result >= 0){
__DEBUG("new line data:%s\n",one_line);
__DEBUG("keyword result is hit!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
break;
}
}
error2:
content.kw_ops->keyword_clean(&content,&keyword_data);
content.kw_ops->keyword_free(&content);
if(fp) fclose(fp);
if(one_line) free(one_line);
#endif
}
编译运行
文件下载链接
把lib文件放在/usr/lib/
目录下
/usr/lib/libhs_runtime-2.1.so
/usr/lib/libhs-2.1.so
gcc -g test.c hyperscan.c -lhs -o test