python在文件中写 ‘\b’ ,在gedit里面显示正常,在sublime里面显示<0x08>,如果用fortran读文件,会出错,改为 ’ ’ 后sublime和fortran都正常了
上次的爬虫代码有问题,现在改为
# Wang Jianfeng Dec 14 2018
# python3
# Install selenium first: pip3 install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
from urllib.request import urlopen
# Install geckodriver first
driver = webdriver.Firefox()
##phaselist = (['1s0']) # For test
phaselist = (['1s0','3p0','1p1','3s1','3p1','3d1','e1',
'1d2','3p2','3d2','3f2','e2',
'1f3','3p2','3p2','3f2','e3',
'1g4','3f4','3g4','3h4','e4',
'1h5','3g5','3h5','3i5','e5'
])
url1 = "http://nn-online.org/NN/nn.php?program=NNphs2&s01=1&r=2&tmin="
url2 = "&tmax="
url3 = "&tint=0.01&ps="
nntype = "NP_"
txt = ".txt"
tmin = 0.01
tmax = 10.00
for phase in phaselist:
fw = open(nntype + phase + txt,"w")#,encoding="utf-8")
fw.write(" ")
tmin = 0.01
tmax = 10
while tmax <= 300:
url = url1 + str(round(tmin,2)) + url2 + str(tmax) + url3 + phase
driver.get(url)
html = driver.page_source
res = re.findall(r"pwa93(.+?)</pre>",html,flags=re.DOTALL)
fw.write(res[0].strip())
fw.write('\n')
if tmax <100:
fw.write(" ")
tmin = tmin+10
tmax = tmax+10
fw.close()
去掉重复行的代码改为:
phaselist = (['1s0','3p0','1p1','3s1','3p1','3d1','e1',
'1d2','3p2','3d2','3f2','e2',
'1f3','3p2','3p2','3f2','e3',
'1g4','3f4','3g4','3h4','e4',
'1h5','3g5','3h5','3i5','e5'
])
nntype = "NP_"
txt = ".txt"
dat = ".dat"
path = "out/"
for phase in phaselist:
ii=0
fr = open(nntype + phase + txt,"r",encoding="utf-8")
fw = open(path + nntype + phase + dat,"w",encoding="utf-8")
line1 = fr.readline(8)
line2 = fr.readline(11)
line3 = fr.readline()
while line1:
line11=line1
line22=line2
line1 = fr.readline(8)
line2 = fr.readline(11)
line3 = fr.readline()
if line11 != line1:
fw.write(line11+line22+'\n')
ii=ii+1
fr.close()
fw.close()
print(phase + " complete. Line = " + str(ii))