做页面静态化 网上经常提到的有两种
1、模板页+替换的方式
2、伪静态化,地址重写
现在我想采用第三种方式,基于网页抓取技术实现(参考)
1、创建实现类
public class MyUri : System.Uri
{
public MyUri(string uriString)
: base(uriString)
{
}
public int Depth;
}
public class MyWebRequest
{
public MyWebRequest(Uri uri, bool bKeepAlive)
{
Headers = new WebHeaderCollection();
RequestUri = uri;
Headers["Host"] = uri.Host;
KeepAlive = bKeepAlive;
if (KeepAlive)
Headers["Connection"] = "Keep-Alive";
Method = "GET";
}
public static MyWebRequest Create(Uri uri, MyWebRequest AliveRequest, bool bKeepAlive)
{
if (bKeepAlive &&
AliveRequest != null &&
AliveRequest.response != null &&
AliveRequest.response.KeepAlive &&
AliveRequest.response.socket.Connected &&
AliveRequest.RequestUri.Host == uri.Host)
{
AliveRequest.RequestUri = uri;
return AliveRequest;
}
return new MyWebRequest(uri, bKeepAlive);
}
public MyWebResponse GetResponse()
{
if (response == null || response.socket == null || response.socket.Connected == false)
{
response = new MyWebResponse();
response.Connect(this);
response.SetTimeout(Timeout);
}
response.SendRequest(this);
response.ReceiveHeader();
return response;
}
public int Timeout;
public WebHeaderCollection Headers;
public string Header;
public Uri RequestUri;
public string Method;
public MyWebResponse response;
public bool KeepAlive;
}
public class MyWebResponse
{
public MyWebResponse()
{
}
public void Connect(MyWebRequest request)
{
ResponseUri = request.RequestUri;
socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
IPEndPoint remoteEP = new IPEndPoint(Dns.Resolve(ResponseUri.Host).AddressList[0], ResponseUri.Port);
socket.Connect(remoteEP);
}
public void SendRequest(MyWebRequest request)
{
ResponseUri = request.RequestUri;
request.Header = request.Method + " " + ResponseUri.PathAndQuery + " HTTP/1.0\r\n" + request.Headers;
socket.Send(Encoding.ASCII.GetBytes(request.Header));
}
public void SetTimeout(int Timeout)
{
socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.SendTimeout, Timeout * 1000);
socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReceiveTimeout, Timeout * 1000);
}
public void ReceiveHeader()
{
Header = "";
Headers = new WebHeaderCollection();
byte[] bytes = new byte[10];
while (socket.Receive(bytes, 0, 1, SocketFlags.None) > 0)
{
Header += Encoding.ASCII.GetString(bytes, 0, 1);
if (bytes[0] == '\n' && Header.EndsWith("\r\n\r\n"))
break;
}
MatchCollection matches = new Regex("[^\r\n]+").Matches(Header.TrimEnd('\r', '\n'));
for (int n = 1; n < matches.Count; n++)
{
string[] strItem = matches[n].Value.Split(new char[] { ':' }, 2);
if (strItem.Length > 0)
Headers[strItem[0].Trim()] = strItem[1].Trim();
}
// check if the page should be transfered to another location
if (matches.Count > 0 && (
matches[0].Value.IndexOf(" 302 ") != -1 ||
matches[0].Value.IndexOf(" 301 ") != -1))
// check if the new location is sent in the "location" header
if (Headers["Location"] != null)
{
try { ResponseUri = new Uri(Headers["Location"]); }
catch { ResponseUri = new Uri(ResponseUri, Headers["Location"]); }
}
ContentType = Headers["Content-Type"];
if (Headers["Content-Length"] != null)
ContentLength = int.Parse(Headers["Content-Length"]);
KeepAlive = (Headers["Connection"] != null && Headers["Connection"].ToLower() == "keep-alive") ||
(Headers["Proxy-Connection"] != null && Headers["Proxy-Connection"].ToLower() == "keep-alive");
}
public void Close()
{
socket.Close();
}
public Uri ResponseUri;
public string ContentType;
public int ContentLength;
public WebHeaderCollection Headers;
public string Header;
public Socket socket;
public bool KeepAlive;
}
/// <summary>
///uits 的摘要说明
/// </summary>
public class uits
{
public static string BasePath = "";
public uits()
{
//
//TODO: 在此处添加构造函数逻辑
//
}
public static void ParseUri(MyUri uri, ref MyWebRequest request)
{
bool KeepAlive = false;
int RequestTimeout = 10;
string strStatus = "";
// check if connection is kept alive from previous connections or not
if (request != null && request.response.KeepAlive)
strStatus += "Connection live to: " + uri.Host + "\r\n\r\n";
else
strStatus += "Connecting: " + uri.Host + "\r\n\r\n";
try
{
// create web request
request = MyWebRequest.Create(uri, request, KeepAlive);
// set request timeout
request.Timeout = RequestTimeout * 1000;
// retrieve response from web request
MyWebResponse response = request.GetResponse();
// update status text with the request and response headers
strStatus += request.Header + response.Header;
// check for redirection
if (response.ResponseUri.Equals(uri) == false)
{
// add the new uri to the queue
// this.EnqueueUri(new MyUri(response.ResponseUri.AbsoluteUri), true);
// update status
strStatus += "Redirected to: " + response.ResponseUri + "\r\n";
// log current uri status
LogUri(uri.AbsoluteUri, strStatus);
// reset current request to avoid response socket opening case
request = null;
return;
}
bool AllMIMETypes = false;
string MIMETypes = "";
// check for allowed MIME types
if (AllMIMETypes == false && response.ContentType != null && MIMETypes.Length > 0)
{
string strContentType = response.ContentType.ToLower();
int nExtIndex = strContentType.IndexOf(';');
if (nExtIndex != -1)
strContentType = strContentType.Substring(0, nExtIndex);
if (strContentType.IndexOf('*') == -1 && (nExtIndex = MIMETypes.IndexOf(strContentType)) == -1)
{
// LogError(uri.AbsoluteUri, strStatus + "\r\nUnlisted Content-Type (" + strContentType + "), check settings.");
request = null;
return;
}
// find numbers
Match match = new Regex(@"\d+").Match(MIMETypes, nExtIndex);
int nMin = int.Parse(match.Value) * 1024;
match = match.NextMatch();
int nMax = int.Parse(match.Value) * 1024;
if (nMin < nMax && (response.ContentLength < nMin || response.ContentLength > nMax))
{
// LogError(uri.AbsoluteUri, strStatus + "\r\nContentLength limit error (" + response.ContentLength + ")");
request = null;
return;
}
}
// check for response extention
string[] ExtArray = { ".gif", ".jpg", ".css", ".zip", ".exe" };
bool bParse = true;
foreach (string ext in ExtArray)
if (uri.AbsoluteUri.ToLower().EndsWith(ext) == true)
{
bParse = false;
break;
}
string strLocalPath = uri.LocalPath;
// check if the path ends with / to can crate the file on the HD
if (strLocalPath.EndsWith("/") == true)
// check if there is no query like (.asp?i=32&j=212)
if (uri.Query == "")
// add a default name for / ended pathes
strLocalPath += "default.html";
// check if the uri includes a query string
if (uri.Query != "")
// construct the name from the query hash value to be the same if we download it again
strLocalPath += uri.Query.GetHashCode() + ".html";
// construct the full path folder
string Downloadfolder = "d:";
BasePath = Downloadfolder + "\\" + uri.Host + Path.GetDirectoryName(uri.AbsolutePath);
// check if the folder not found
if (Directory.Exists(BasePath) == false)
// create the folder
Directory.CreateDirectory(BasePath);
// construct the full path name of the file
string PathName = Downloadfolder + "\\" + uri.Host + strLocalPath.Replace("%20", " ");
// open the output file
FileStream streamOut = File.Open(PathName, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
BinaryWriter writer = new BinaryWriter(streamOut);
// receive response buffer
string strResponse = "";
byte[] RecvBuffer = new byte[10240];
int nBytes, nTotalBytes = 0;
// loop to receive response buffer
while ((nBytes = response.socket.Receive(RecvBuffer, 0, 10240, SocketFlags.None)) > 0)
{
// increment total received bytes
nTotalBytes += nBytes;
// write received buffer to file
writer.Write(RecvBuffer, 0, nBytes);
// check if the uri type not binary to can be parsed for refs
if (bParse == true)
// add received buffer to response string
strResponse += Encoding.ASCII.GetString(RecvBuffer, 0, nBytes);
// update view text
// itemLog.SubItems[4].Text = Commas(nTotalBytes);
// check if connection Keep-Alive to can break the loop if response completed
if (response.KeepAlive && nTotalBytes >= response.ContentLength && response.ContentLength > 0)
break;
}
// close output stream
writer.Close();
streamOut.Close();
if (response.KeepAlive)
strStatus += "Connection kept alive to be used in subpages.\r\n";
else
{
// close response
response.Close();
strStatus += "Connection closed.\r\n";
}
// update status
strStatus += Commas(nTotalBytes) + " bytes, downloaded to \"" + PathName + "\"\r\n";
// parse the page to search for refs
string strRef = @"(href|HREF|src|SRC)[ ]*=[ ]*[""'][^""'#>]+[""']";
MatchCollection matches = new Regex(strRef).Matches(strResponse);
strStatus += "Found: " + matches.Count + " ref(s)\r\n";
LogUri(uri.AbsoluteUri, strStatus);
//获取该页面的所有其他链接
//if(ThreadsRunning == true && bParse == true && uri.Depth < WebDepth)
// {
// strStatus += "\r\nParsing page ...\r\n";
// // check for restricted words
// foreach(string strExcludeWord in ExcludeWords)
// if(strExcludeWord.Trim().Length > 0 && strResponse.IndexOf(strExcludeWord) != -1)
// {
// LogError(uri.AbsoluteUri, strStatus+"\r\nPage includes reserved word ("+strExcludeWord+")");
// EraseItem(itemLog);
// File.Delete(PathName);
// return;
// }
// // parse the page to search for refs
// string strRef = @"(href|HREF|src|SRC)[ ]*=[ ]*[""'][^""'#>]+[""']";
// MatchCollection matches = new Regex(strRef).Matches(strResponse);
// strStatus += "Found: "+matches.Count+" ref(s)\r\n";
// URLCount += matches.Count;
// foreach(Match match in matches)
// {
// strRef = match.Value.Substring(match.Value.IndexOf('=')+1).Trim('"', '\'', '#', ' ', '>');
// try
// {
// if(strRef.IndexOf("..") != -1 || strRef.StartsWith("/") == true || strRef.StartsWith("http://") == false)
// strRef = new Uri(uri, strRef).AbsoluteUri;
// Normalize(ref strRef);
// MyUri newUri = new MyUri(strRef);
// if(newUri.Scheme != Uri.UriSchemeHttp && newUri.Scheme != Uri.UriSchemeHttps)
// continue;
// if(newUri.Host != uri.Host && KeepSameServer == true)
// continue;
// newUri.Depth = uri.Depth+1;
// if(this.EnqueueUri(newUri, true) == true)
// strStatus += newUri.AbsoluteUri+"\r\n";
// }
// catch(Exception)
// {
// }
// }
// }
// LogUri(uri.AbsoluteUri, strStatus);
//}
}
catch (Exception e)
{
// LogError(uri.AbsoluteUri, strStatus + e.Message);
request = null;
}
finally
{
}
}
public static string Commas(int nNum)
{
string str = nNum.ToString();
int nIndex = str.Length;
while (nIndex > 3)
{
str = str.Insert(nIndex - 3, ",");
nIndex -= 3;
}
return str;
}
public static void Normalize(ref string strURL)
{
if (strURL.StartsWith("http://") == false)
strURL = "http://" + strURL;
if (strURL.IndexOf("/", 8) == -1)
strURL += '/';
}
public static void LogUri(string strHead, string strBody)
{
string RootPath =BasePath+"\\log.txt";
using (FileStream fs = new FileStream(RootPath, FileMode.Create, FileAccess.Write))
{
using (StreamWriter sw = new StreamWriter(fs, Encoding.GetEncoding("utf-8")))
{
sw.Write(strBody.ToString());
}
}
}
}
2、页面文件:
<form id="form1" runat="server">
<div>
<asp:TextBox ID="TextBox1"
runat="server" Text="http://www.baidu.com/" Width="424px">http://www.baidu.com/</asp:TextBox>
<asp:Button ID="Button1" runat="server" Text="Button" onclick="Button1_Click" />
</div>
</form>
protected void Button1_Click(object sender, EventArgs e)
{
string strUri = TextBox1.Text.Trim();
MyWebRequest request = null;
MyUri uri = new MyUri(strUri);
if (uri != null)
{
uits.ParseUri(uri, ref request);
}
}
3.为什么采用这样的方式
开发asp.net的网站时经常会用到用户自定义控件,这样一个网站的框架,可以先美工弄好,然后将一个一个的用户自定义控件拖进对应的框内就行 类似Nginx的(include),但是这样就是动态的网页,模板页不容易定义,不好弄。如果将每个模块做成静态化,然后像asp以后include,需要配置xhtml.
采用我这样的方式 只需要在内容更新的时候调用对应的动态地址,即可静态化程序的内容。当然导航等链接的地址需要做全局的考虑。可以将静态化的地址放到对应的数据库中,这样可以进行定数的静态化。