using System;
using System.Text;
using System.Net;
using System.IO;
using System.Threading;
using System.Text.RegularExpressions;
using Admin.Core.Common.Attributes;
namespace Admin.Core.Common.Helpers
{
///
/// Html操作相关类
///
[SingleInstance]
public class HtmlHelper
{
#region 私有字段
private readonly string _ContentType = "application/json";
private readonly string _Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
private readonly string _UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
private int _Delay = 1000;
private int _CurrentTry = 0;
#endregion
#region 公有属性
///
/// Cookie
///
public CookieContainer CookieContainer { get; } = new CookieContainer();
///
/// 语言
///
public Encoding Encoding { get; set; } = Encoding.GetEncoding("utf-8");
public int NetworkDelay
{
get
{
Random r = new Random();
return r.Next(_Delay, _Delay * 2);
}
set
{
_Delay = value;
}
}
public int MaxTry { get; set; } = 300;
#endregion
#region 获取HTML
///
/// 获取HTML
///
/// 地址
/// post 提交的字符串
/// 是否是post
/// CookieContainer
public string GetHtml(string url, string postData, bool isPost, CookieContainer cookieContainer)
{
if (string.IsNullOrEmpty(postData)) return GetHtml(url, cookieContainer);
Thread.Sleep(NetworkDelay);
_CurrentTry++;
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
try
{
byte[] byteRequest = Encoding.Default.GetBytes(postData);
httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
httpWebRequest.CookieContainer = cookieContainer;
httpWebRequest.ContentType = _ContentType;
httpWebRequest.ServicePoint.ConnectionLimit = MaxTry;
httpWebRequest.Referer = url;
httpWebRequest.Accept = _Accept;
httpWebRequest.UserAgent = _UserAgent;
httpWebRequest.Method = isPost ? "POST" : "GET";
httpWebRequest.ContentLength = byteRequest.Length;
Stream stream = httpWebRequest.GetRequestStream();
stream.Write(byteRequest, 0, byteRequest.Length);
stream.Close();
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Stream responseStream = httpWebResponse.GetResponseStream();
StreamReader streamReader = new StreamReader(responseStream, Encoding);
string html = streamReader.ReadToEnd();
streamReader.Close();
responseStream.Close();
_CurrentTry = 0;
httpWebRequest.Abort();
httpWebResponse.Close();
return html;
}
catch
{
if (_CurrentTry <= MaxTry) GetHtml(url, postData, isPost, cookieContainer);
_CurrentTry--;
if (httpWebRequest != null) httpWebRequest.Abort();
if (httpWebResponse != null) httpWebResponse.Close();
return string.Empty;
}
}
///
/// 获取HTML
///
/// 地址
/// CookieContainer
public string GetHtml(string url, CookieContainer cookieContainer)
{
Thread.Sleep(NetworkDelay);
_CurrentTry++;
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
try
{
httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
httpWebRequest.CookieContainer = cookieContainer;
httpWebRequest.ContentType = _ContentType;
httpWebRequest.ServicePoint.ConnectionLimit = MaxTry;
httpWebRequest.Referer = url;
httpWebRequest.Accept = _Accept;
httpWebRequest.UserAgent = _UserAgent;
httpWebRequest.Method = "GET";
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Stream responseStream = httpWebResponse.GetResponseStream();
StreamReader streamReader = new StreamReader(responseStream, Encoding);
string html = streamReader.ReadToEnd();
streamReader.Close();
responseStream.Close();
_CurrentTry--;
httpWebRequest.Abort();
httpWebResponse.Close();
return html;
}
catch (Exception)
{
if (_CurrentTry <= MaxTry) GetHtml(url, cookieContainer);
_CurrentTry--;
if (httpWebRequest != null) httpWebRequest.Abort();
if (httpWebResponse != null) httpWebResponse.Close();
return string.Empty;
}
}
#endregion
#region 获取字符流
//---------------------------------------------------------------------------------------------------------------
// 示例:
// System.Net.CookieContainer cookie = new System.Net.CookieContainer();
// Stream s = HttpHelper.GetStream("http://ptlogin2.qq.com/getimage?aid=15000102&0.43878429697395826", cookie);
// picVerify.Image = Image.FromStream(s);
//---------------------------------------------------------------------------------------------------------------
///
/// 获取字符流
///
/// 地址
/// cookieContainer
public Stream GetStream(string url, CookieContainer cookieContainer)
{
_CurrentTry++;
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
try
{
httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
httpWebRequest.CookieContainer = cookieContainer;
httpWebRequest.ContentType = _ContentType;
httpWebRequest.ServicePoint.ConnectionLimit = MaxTry;
httpWebRequest.Referer = url;
httpWebRequest.Accept = _Accept;
httpWebRequest.UserAgent = _UserAgent;
httpWebRequest.Method = "GET";
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Stream responseStream = httpWebResponse.GetResponseStream();
_CurrentTry--;
return responseStream;
}
catch (Exception)
{
if (_CurrentTry <= MaxTry)
{
GetHtml(url, cookieContainer);
}
_CurrentTry--;
if (httpWebRequest != null)
{
httpWebRequest.Abort();
} if (httpWebResponse != null)
{
httpWebResponse.Close();
}
return null;
}
}
#endregion
#region 清除HTML标记
///
/// 清除HTML标记
///
///
/// 已经去除后的文字
public string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"", "", RegexOptions.IgnoreCase);
//删除HTML
Regex regex = new Regex("<.+?>", RegexOptions.IgnoreCase);
Htmlstring = regex.Replace(Htmlstring, "");
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"