using Admin.Core.Common.Attributes; using System; using System.IO; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading; namespace Admin.Core.Common.Helpers { /// /// Html操作相关类 /// [SingleInstance] public class HtmlHelper { #region 私有字段 private readonly string _ContentType = "application/json"; private readonly string _Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*"; private readonly string _UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; private int _Delay = 1000; private int _CurrentTry = 0; #endregion 私有字段 #region 公有属性 /// /// Cookie /// public CookieContainer CookieContainer { get; } = new CookieContainer(); /// /// 语言 /// public Encoding Encoding { get; set; } = Encoding.GetEncoding("utf-8"); public int NetworkDelay { get { Random r = new Random(); return r.Next(_Delay, _Delay * 2); } set { _Delay = value; } } public int MaxTry { get; set; } = 300; #endregion 公有属性 #region 获取HTML /// /// 获取HTML /// /// 地址 /// post 提交的字符串 /// 是否是post /// CookieContainer public string GetHtml(string url, string postData, bool isPost, CookieContainer cookieContainer) { if (string.IsNullOrEmpty(postData)) return GetHtml(url, cookieContainer); Thread.Sleep(NetworkDelay); _CurrentTry++; HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { byte[] byteRequest = Encoding.Default.GetBytes(postData); httpWebRequest = (HttpWebRequest)WebRequest.Create(url); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = _ContentType; httpWebRequest.ServicePoint.ConnectionLimit = MaxTry; httpWebRequest.Referer = url; httpWebRequest.Accept = _Accept; httpWebRequest.UserAgent = _UserAgent; httpWebRequest.Method = isPost ? "POST" : "GET"; httpWebRequest.ContentLength = byteRequest.Length; Stream stream = httpWebRequest.GetRequestStream(); stream.Write(byteRequest, 0, byteRequest.Length); stream.Close(); httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); StreamReader streamReader = new StreamReader(responseStream, Encoding); string html = streamReader.ReadToEnd(); streamReader.Close(); responseStream.Close(); _CurrentTry = 0; httpWebRequest.Abort(); httpWebResponse.Close(); return html; } catch { if (_CurrentTry <= MaxTry) GetHtml(url, postData, isPost, cookieContainer); _CurrentTry--; if (httpWebRequest != null) httpWebRequest.Abort(); if (httpWebResponse != null) httpWebResponse.Close(); return string.Empty; } } /// /// 获取HTML /// /// 地址 /// CookieContainer public string GetHtml(string url, CookieContainer cookieContainer) { Thread.Sleep(NetworkDelay); _CurrentTry++; HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { httpWebRequest = (HttpWebRequest)WebRequest.Create(url); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = _ContentType; httpWebRequest.ServicePoint.ConnectionLimit = MaxTry; httpWebRequest.Referer = url; httpWebRequest.Accept = _Accept; httpWebRequest.UserAgent = _UserAgent; httpWebRequest.Method = "GET"; httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); StreamReader streamReader = new StreamReader(responseStream, Encoding); string html = streamReader.ReadToEnd(); streamReader.Close(); responseStream.Close(); _CurrentTry--; httpWebRequest.Abort(); httpWebResponse.Close(); return html; } catch (Exception) { if (_CurrentTry <= MaxTry) GetHtml(url, cookieContainer); _CurrentTry--; if (httpWebRequest != null) httpWebRequest.Abort(); if (httpWebResponse != null) httpWebResponse.Close(); return string.Empty; } } #endregion 获取HTML #region 获取字符流 //--------------------------------------------------------------------------------------------------------------- // 示例: // System.Net.CookieContainer cookie = new System.Net.CookieContainer(); // Stream s = HttpHelper.GetStream("http://ptlogin2.qq.com/getimage?aid=15000102&0.43878429697395826", cookie); // picVerify.Image = Image.FromStream(s); //--------------------------------------------------------------------------------------------------------------- /// /// 获取字符流 /// /// 地址 /// cookieContainer public Stream GetStream(string url, CookieContainer cookieContainer) { _CurrentTry++; HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { httpWebRequest = (HttpWebRequest)WebRequest.Create(url); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = _ContentType; httpWebRequest.ServicePoint.ConnectionLimit = MaxTry; httpWebRequest.Referer = url; httpWebRequest.Accept = _Accept; httpWebRequest.UserAgent = _UserAgent; httpWebRequest.Method = "GET"; httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); _CurrentTry--; return responseStream; } catch (Exception) { if (_CurrentTry <= MaxTry) { GetHtml(url, cookieContainer); } _CurrentTry--; if (httpWebRequest != null) { httpWebRequest.Abort(); } if (httpWebResponse != null) { httpWebResponse.Close(); } return null; } } #endregion 获取字符流 #region 清除HTML标记 /// /// 清除HTML标记 /// /// /// 已经去除后的文字 public string NoHTML(string Htmlstring) { //删除脚本 Htmlstring = Regex.Replace(Htmlstring, @"]*?>.*?", "", RegexOptions.IgnoreCase); //删除HTML Regex regex = new Regex("<.+?>", RegexOptions.IgnoreCase); Htmlstring = regex.Replace(Htmlstring, ""); Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"