Python3通过Everything SDK访问本地文件

需求:

        本地文件中,查找在书单<信息安全从业者书单>的书籍。

原理:

        遍历 README.md 将通过Everything SDK在本地查找每本书。

1、计算文件CRC32

        因为只是确定本地文件的唯一性,CRC32计算效率上比md5和sha1更快,所以计算CRC.

#!usr/bin/env python  
#-*- coding:utf-8 -*-  

import zlib
import os

block_size = 1024 * 1024
#从文件中读取block_size大小,计算CRC32
def crc32_simple(filepath):
    try:
        with open(filepath,'rb') as f:
            s=f.read(block_size)
            return zlib.crc32(s,0)
    except Exception as e:
        print(str(e))
        return 0

#计算整个文件的crc32
def crc32_file(filepath):
    crc = 0
    try:
        fd = open(filepath, 'rb')
        while True:
            buffer = fd.read(block_size)
            if len(buffer) == 0: # EOF or file empty. return hashes
                fd.close()
                if sys.version_info[0] < 3 and crc < 0:
                    crc += 2 ** 32
                return crc#返回的是十进制的值
            crc = zlib.crc32(buffer, crc)
    except Exception as e:
        if sys.version_info[0] < 3:
            error = unicode(e)
        else:
            error = str(e)
        print(error)
        return 0

2、文件大小自动变换单位

递归实现 文件大小根据bytes,返回合理区间['B', 'KB', 'MB', 'GB', 'TB', 'PB']。eg : 16473740 bytes--> 15.727 MB

#根据文件大小 返回合理区间,16473740 bytes--> 15.727 MB
def FormatSize(size):
    print(size)
    #递归实现,精确为最大单位值 + 小数点后三位
    def formatsize(integer, remainder, level):
        if integer >= 1024:
            remainder = integer % 1024
            integer //= 1024
            level += 1
            return formatsize(integer, remainder, level)
        else:
            return integer, remainder, level

    units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
    integer, remainder, level = formatsize(size, 0, 0)
    if level+1 > len(units):
        level = -1
    return ( '{}.{:>03d} {}'.format(integer, remainder, units[level]) )

3、调用Everything SDK,通过everything64.dll来完成交互。

import ctypes
import datetime
import struct

#dll imports
everything_dll = ctypes.WinDLL (r"./Everything64.dll")
everything_dll.Everything_GetResultDateModified.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultSize.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultFileNameW.argtypes = [ctypes.c_int]
everything_dll.Everything_GetResultFileNameW.restype = ctypes.c_wchar_p

#转换时间
def get_time(filetime):
    #convert a windows FILETIME to a python datetime
    #https://stackoverflow.com/questions/39481221/convert-datetime-back-to-windows-64-bit-filetime
    WINDOWS_TICKS = int(1/10**-7)  # 10,000,000 (100 nanoseconds or .1 microseconds)
    WINDOWS_EPOCH = datetime.datetime.strptime('1601-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
    POSIX_EPOCH = datetime.datetime.strptime('1970-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
    EPOCH_DIFF = (POSIX_EPOCH - WINDOWS_EPOCH).total_seconds()  # 11644473600.0
    WINDOWS_TICKS_TO_POSIX_EPOCH = EPOCH_DIFF * WINDOWS_TICKS  # 116444736000000000.0
    
    """Convert windows filetime winticks to python datetime.datetime."""
    winticks = struct.unpack('<Q', filetime)[0]
    microsecs = (winticks - WINDOWS_TICKS_TO_POSIX_EPOCH) / WINDOWS_TICKS
    return datetime.datetime.fromtimestamp(microsecs)
    
#defines 定义参看Everything.h
EVERYTHING_REQUEST_FILE_NAME = 0x00000001
EVERYTHING_REQUEST_PATH = 0x00000002
EVERYTHING_REQUEST_SIZE = 0x00000010
EVERYTHING_REQUEST_DATE_MODIFIED = 0x00000040

EVERYTHING_SORT_SIZE_DESCENDING = 6

#关键词搜索
def searchfile(bookName):
    recom = re.compile(r'[《》::、;.,,;—— -()()【】\'\"]')
    keyword = recom.sub(' ',bookName).strip()
    if len(keyword) <1:
        return
    #文件大小倒序
    everything_dll.Everything_SetSort(EVERYTHING_SORT_SIZE_DESCENDING)
    everything_dll.Everything_SetSearchW(keyword)
    everything_dll.Everything_SetRequestFlags(EVERYTHING_REQUEST_FILE_NAME | EVERYTHING_REQUEST_PATH | EVERYTHING_REQUEST_SIZE | EVERYTHING_REQUEST_DATE_MODIFIED)

    #execute the query
    everything_dll.Everything_QueryW(1)

    #get the number of results
    num_results = everything_dll.Everything_GetNumResults()

    #show the number of results
    result = "\nResult Count: {}\n".format(num_results)
    print(keyword,result)
    
    #create buffers
    file_name = ctypes.create_unicode_buffer(260)
    file_modi = ctypes.c_ulonglong(1)
    file_size = ctypes.c_ulonglong(1)

    bPrint = False
    nCount = 0
    #show results
    for i in range(num_results):
        everything_dll.Everything_GetResultFullPathNameW(i,file_name,260)
        everything_dll.Everything_GetResultDateModified(i,file_modi)
        everything_dll.Everything_GetResultSize(i,file_size)
        filepath = ctypes.wstring_at(file_name)
        if filepath.endswith('.lnk') or filepath.endswith('.txt'):
            continue
        #计算文件crc32,格式化为0x1122AAFF
        filecrc = hex(crc32_file(filepath)).upper().replace("0X","0x")
        filesize = FormatSize(file_size.value)
        modtime = get_time(file_modi)
        strInfo = "\nFilePath: {}\nSize: {}    CRC32:{}".format(filepath,filesize,filecrc)
        print(strInfo)
        if not bPrint:
            fout.write("\n=======↓↓↓↓↓===========\n")
            fout.write(bookName)
            fout.write("\n-----------------")
            bPrint = True
        fout.write(strInfo)
        nCount+=1
    if bPrint:
        fout.write("\nFind Count:{}".format(nCount))
        fout.write("\n=======↑↑↑↑↑===========\n")

完整代码

#!usr/bin/env python  
#-*- coding:utf-8 -*-  
""" 
@author:hiltonwei
@file: secBooksFind.py 
@time: 2021/12/06 
@desc: 
    信息安全从业者书单推荐 https://github.com/riusksk/secbook
    step1 读入 README.md,读取《》内书名
    step2 通过everything的sdk查找文件,并计算文件CRC32校验值,写入txt中
"""

import zlib
import os
import sys
import ctypes
import datetime
import struct
import io
import re

#dll imports
everything_dll = ctypes.WinDLL (r"./Everything64.dll")
everything_dll.Everything_GetResultDateModified.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultSize.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultFileNameW.argtypes = [ctypes.c_int]
everything_dll.Everything_GetResultFileNameW.restype = ctypes.c_wchar_p


fout = open("secBooksFind.txt", 'a+')

block_size = 1024 * 1024
#从文件中读取block_size大小,计算CRC32
def crc32_simple(filepath):
    try:
        with open(filepath,'rb') as f:
            s=f.read(block_size)
            return zlib.crc32(s,0)
    except Exception as e:
        print(str(e))
        return 0

#计算整个文件的crc32
def crc32_file(filepath):
    crc = 0
    try:
        fd = open(filepath, 'rb')
        while True:
            buffer = fd.read(block_size)
            if len(buffer) == 0: # EOF or file empty. return hashes
                fd.close()
                if sys.version_info[0] < 3 and crc < 0:
                    crc += 2 ** 32
                return crc#返回的是十进制的值
            crc = zlib.crc32(buffer, crc)
    except Exception as e:
        if sys.version_info[0] < 3:
            error = unicode(e)
        else:
            error = str(e)
        print(error)
        return 0

#根据文件大小 返回合理区间,16473740 bytes--> 15.727 MB
def FormatSize(size):
    print(size)
    #递归实现,精确为最大单位值 + 小数点后三位
    def formatsize(integer, remainder, level):
        if integer >= 1024:
            remainder = integer % 1024
            integer //= 1024
            level += 1
            return formatsize(integer, remainder, level)
        else:
            return integer, remainder, level

    units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
    integer, remainder, level = formatsize(size, 0, 0)
    if level+1 > len(units):
        level = -1
    return ( '{}.{:>03d} {}'.format(integer, remainder, units[level]) )

#转换时间
def get_time(filetime):
    #convert a windows FILETIME to a python datetime
    #https://stackoverflow.com/questions/39481221/convert-datetime-back-to-windows-64-bit-filetime
    WINDOWS_TICKS = int(1/10**-7)  # 10,000,000 (100 nanoseconds or .1 microseconds)
    WINDOWS_EPOCH = datetime.datetime.strptime('1601-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
    POSIX_EPOCH = datetime.datetime.strptime('1970-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
    EPOCH_DIFF = (POSIX_EPOCH - WINDOWS_EPOCH).total_seconds()  # 11644473600.0
    WINDOWS_TICKS_TO_POSIX_EPOCH = EPOCH_DIFF * WINDOWS_TICKS  # 116444736000000000.0
    
    """Convert windows filetime winticks to python datetime.datetime."""
    winticks = struct.unpack('<Q', filetime)[0]
    microsecs = (winticks - WINDOWS_TICKS_TO_POSIX_EPOCH) / WINDOWS_TICKS
    return datetime.datetime.fromtimestamp(microsecs)
    
#defines 定义参看Everything.h
EVERYTHING_REQUEST_FILE_NAME = 0x00000001
EVERYTHING_REQUEST_PATH = 0x00000002
EVERYTHING_REQUEST_SIZE = 0x00000010
EVERYTHING_REQUEST_DATE_MODIFIED = 0x00000040

EVERYTHING_SORT_SIZE_DESCENDING = 6

#关键词搜索
def searchfile(bookName):
    recom = re.compile(r'[《》::、;.,,;—— -()()【】\'\"]')
    keyword = recom.sub(' ',bookName).strip()
    if len(keyword) <1:
        return
    #文件大小倒序
    everything_dll.Everything_SetSort(EVERYTHING_SORT_SIZE_DESCENDING)
    everything_dll.Everything_SetSearchW(keyword)
    everything_dll.Everything_SetRequestFlags(EVERYTHING_REQUEST_FILE_NAME | EVERYTHING_REQUEST_PATH | EVERYTHING_REQUEST_SIZE | EVERYTHING_REQUEST_DATE_MODIFIED)

    #execute the query
    everything_dll.Everything_QueryW(1)

    #get the number of results
    num_results = everything_dll.Everything_GetNumResults()

    #show the number of results
    result = "\nResult Count: {}\n".format(num_results)
    print(keyword,result)
    
    #create buffers
    file_name = ctypes.create_unicode_buffer(260)
    file_modi = ctypes.c_ulonglong(1)
    file_size = ctypes.c_ulonglong(1)

    bPrint = False
    nCount = 0
    #show results
    for i in range(num_results):
        everything_dll.Everything_GetResultFullPathNameW(i,file_name,260)
        everything_dll.Everything_GetResultDateModified(i,file_modi)
        everything_dll.Everything_GetResultSize(i,file_size)
        filepath = ctypes.wstring_at(file_name)
        if filepath.endswith('.lnk') or filepath.endswith('.txt'):
            continue
        #计算文件crc32,格式化为0x1122AAFF
        filecrc = hex(crc32_file(filepath)).upper().replace("0X","0x")
        filesize = FormatSize(file_size.value)
        modtime = get_time(file_modi)
        strInfo = "\nFilePath: {}\nSize: {}    CRC32:{}".format(filepath,filesize,filecrc)
        print(strInfo)
        if not bPrint:
            fout.write("\n=======↓↓↓↓↓===========\n")
            fout.write(bookName)
            fout.write("\n-----------------")
            bPrint = True
        fout.write(strInfo)
        nCount+=1
    if bPrint:
        fout.write("\nFind Count:{}".format(nCount))
        fout.write("\n=======↑↑↑↑↑===========\n")

#读取文件,将《》内的名称去特殊符号后,通过everything查找
def readMd(fileName):
    dataStr = []
    with io.open(fileName,'r', encoding='utf-8') as f:
        dataStr = f.readlines()
    for line in dataStr:
        if line.startswith('·'):
            #《》的内容
            start = line.find('《')
            end = line.find('》')
            end = end if end == -1 else end+1
            f0 = line[start:end]
            searchfile(f0)
        
if __name__ == "__main__":
    readMd("README.md")

fout.close()

猜你喜欢

转载自blog.csdn.net/mybachelor123/article/details/121795915
今日推荐