python小脚本2则

一、节假日判断模块

将未来的节假日存到数据库表holiday中

    day     |   day_e    | type | case
------------+------------+------+------
 2017-10-06 |            |    2 | 休息
 2017-10-05 |            |    2 | 休息
 2017-10-04 |            |    2 | 休息
 2017-10-03 |            |    2 | 休息
 2017-10-02 |            |    2 | 休息
 2017-09-30 |            |    0 | 工作
 2017-07-01 | 2017-06-30 |    0 | 工作
 2017-07-01 | 2017-08-31 |    1 | 休息
 2017-05-30 |            |    2 | 休息
 2017-05-29 |            |    2 | 休息
 2017-05-27 |            |    0 | 工作
 2017-05-01 |            |    2 | 休息

def isWorkDay(db):
  cur = db.cursor()

  now = datetime.datetime.now()
  today="%04d-%02d-%02d"%(now.year,now.month,now.day)

  cur.execute("select type from holiday where day<='%s' and day_e is not null and day_e>='%s'"%(today,today) )
  type=-1
  for r in cur.fetchall():
    type = r[0]
    
  if type==0 :
    return True
  elif type>0 :
    return False

  cur.execute("select type from holiday where day='%s'"%(today) )
  type=-1
  for r in cur.fetchall():
    type = r[0]
    
  if type==0 :
    return True
  elif type>0 :
    return False

  return True if now.weekday()>=0 and now.weekday()<=4 else False

使用和调用：

import util
#...
if util.isWorkDay(db):
  #...

二、删除重复文件

服务器上有个文件夹存储作中转站使用，文件在数据库中有个表记录下来

             数据表 "public.short_url"
    栏位     |            类型             | 修饰词
-------------+-----------------------------+--------
 id          | bigint                      | 非空
 version     | bigint                      | 非空
 origin      | character varying(1024)     | 非空
 u           | character varying(255)      | 非空
 keep_days   | integer                     |
 upload_date | timestamp without time zone |
 has_file    | boolean                     |
 sha1        | character varying(255)      |

扫描二维码关注公众号，回复： 3729394 查看本文章

当文件很多的时候，占用很大空间，扫描一遍，文件内容相同的文件，保留一份，删除其他的。

# coding=gb2312
import sys
import pgdb
import datetime
import os 
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-c", "--cron", dest="cron",action="store_true",help="",default=False)
(options, args) = parser.parse_args()

db=pgdb.connect(database='postgres',host='x.x.x.x',user='newupload',password='')
cur = db.cursor()

if not options.cron:
  print "++++++++++++++++get files sha1code++++++++++++++++"

#搜集每个文件的sha1摘要
cur.execute("select sha1,origin from public.short_url where has_file='t' and sha1 is null and origin like 'http://?????/newupload/%'")
for r in cur.fetchall():
  file=r[1][33:]
  cmd = 'openssl sha1 /usr/local/newupload/'+file
  (si, so, se) = os.popen3(cmd) 
  t=so.readlines()
  sha1='' 
  for line in t:
    sha1 = line[line.index('=')+1:].strip()
  if not options.cron:
    print sha1
  cur.execute("update public.short_url set sha1='%s' where origin='%s%s'"%(sha1,'http://????/newupload/',file))
    
db.commit()
  
if not options.cron:
  print "++++++++++++++++remove dup files++++++++++++++++"

#处理sha1相同的文件，保留一个

cur.execute("select sha1,origin from public.short_url where has_file='t' and sha1 is not null")
for r in cur.fetchall():
  sha1=r[0]
  file=r[1][33:]
  cur2 = db.cursor()
  cur2.execute("select count(distinct origin) from short_url where sha1='%s'"%(sha1))
  for r2 in cur2.fetchall():
    #print '1'
    if r2[0]>1:
      cur2.execute(("update short_url set upload_date = now() ,origin='http://？？？？/newupload/%s',"+
      "keep_days="+
      "(select max(date_part('day', upload_date + interval '1 day'*keep_days- now()+ interval '1 day')) from short_url where sha1='%s')"+
      "where sha1='%s'")%(file,sha1,sha1))
      if not options.cron:
        print 'process dup files'
      db.commit()
      break

db.commit()
  
if not options.cron:
  print "++++++++++++++++remove unreferenced local files++++++++++++++++"

#删除数据库里没有而文件夹里有的文件
for name in os.listdir("/usr/local/newupload/"):
  if os.path.isfile(os.path.join("/usr/local/newupload/", name)):
    #print name
    cur.execute("select count(*) from public.short_url where has_file='t' and  origin = 'http://????/newupload/%s'"%name)
    found=False
    for r in cur.fetchall():
      if r[0]>0:
        found = True
        break
        
    if not found:
      cmd ="rm -rf /usr/local/newupload/%s" % name
      if not options.cron:
        print cmd
      os.system(cmd)
  
db.close()

猜你喜欢