用python删除C++注释部分

参考 http://cn.voidcc.com/question/p-gsegzgeh-oh.html,我采用这个方案。


import re def removeCCppComment(text) : def blotOutNonNewlines(strIn) : # Return a string containing only the newline chars contained in strIn return "" + ("\n" * strIn.count('\n')) def replacer(match) : s = match.group(0) if s.startswith('/'): # Matched string is //...EOL or /*...*/ ==> Blot out all non-newline chars return blotOutNonNewlines(s) else: # Matched string is '...' or "..." ==> Keep unchanged return s pattern = re.compile( r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE ) return re.sub(pattern, replacer, text)
还有另外一个方案:
from subprocess import check_output 

class Util: def strip_comments(self,source_code): process = check_output(['cpp', '-fpreprocessed', source_code],shell=False) return process if __name__ == "__main__": util = Util() print util.strip_comments("somefile.ext") 

对于 #if 0之类的注释,我的方案是:
def get_encoding(file):
    # 二进制方式读取,获取字节数据,检测类型
    with open(file, 'rb') as f:
        code = chardet.detect(f.read())['encoding']
        # print( type(code))
        return code

#--------------------------------------------------------------
def deal_file(src):
  # sys.reload()
  # sys.setdefaultencoding('utf-8')

  if not os.path.exists(src):
      print( 'Error: file - %s doesn\'t exist.'% src)
      return False
  if os.path.islink(src):
      print( 'Error: file - %s is a link.')
      return False
  filetype = (os.path.splitext(src))[1]
  if not filetype in ['.c','.h','.cpp','.hh','.cc']:
      return False
  try:
      if not os.access(src, os.W_OK):
        # if platform.system() == 'Linux':
          # os.chmod(src, 0664)
          os.chmod(src, stat.S_IWOTH | stat.S_IROTH)
  except:
      print( 'Error: you can not chang %s\'s mode.'% src)

  encoding = get_encoding(src)
  # print (encoding)
  # encoding = 'UTF-8'
  inputf = open(src, 'r', encoding = encoding, errors='ignore')
  out = ranstr(8) + '.cpp'
  outputf = open(out, 'w', encoding=encoding)

  lines = ""
  # flag = False
  addCom = 0
  acc = 0  #use for if embedded
  for line in inputf:
    # print(line)
    if acc == 0:
      if find_if0(line):
        # flag = True
        acc += 1
        if Test:
            print('aaa')
      else:
        outputf.write(line)    
    else:
        if find_endif(line):
            acc -= 1
        elif find_if(line):
            acc += 1

  assert acc == 0
  inputf.close()
  outputf.close()
  # process = check_output(['cpp', '-fpreprocessed', out, '>', src], shell=False) 
  p = Popen('cpp -fpreprocessed %s ' % out, shell=True, stdout=PIPE, stderr=PIPE, encoding=encoding)
  of = open(src, 'w', encoding=encoding)
  data = [ i for i in p.stdout]
  p.stdout.close()
  for i in data[1:]:
    of.write(i)
  of.close()
  # shutil.move(out, src)  
  os.remove(out)
  # print('bbb')
  return True
 

猜你喜欢

转载自www.cnblogs.com/tangxiaosheng/p/12502140.html