您目前尚未登陆,请选择【登陆】或【注册
首页->全站代码->NETCMSv1.5(Build0509)完整源码版>>NetCMS.Content/Collect/PageNews.cs>>代码在线查看
温馨提示:代码在线浏览功能只能做为源码浏览参考,不能展示项目的全部,如果想更进一步了解该代码请下载:NETCMSv1.5(Build0509)完整源码版


当前文件路径:NetCMSv15/NetCMS.Content/Collect/PageNews.cs 文件类型
普通视图
		            
1//====================================================== 2//== (c)2008 aspxcms inc by NeTCMS v1.0 == 3//== Forum:bbs.aspxcms.com == 4//== Website:www.aspxcms.com == 5//====================================================== 6using System; 7using System.Collections.Generic; 8using System.Text; 9using System.Text.RegularExpressions; 10using System.Collections; 11 12namespace NetCMS.Content.Collect 13{ 14 public class PageNews : Page 15 { 16 私有变量 私有变量 26 public PageNews(string url) : base(url) { } 27 public PageNews(string url, string encode) : base(url, encode) { } 28 public string Title 29 { get { return _title; } } 30 public string Content 31 { get { return _content; } set { _content = value; } } 32 public string Author { get { return _author; } } 33 public string Source { get { return _source; } } 34 public DateTime AddTime { get { return _addtime; } } 35 public string RuleOfTitle 36 { 37 set { _titlerule = value; } 38 } 39 public string RuleOfContent 40 { 41 set { _contentrule = value; } 42 } 43 /// <summary> 44 /// 过滤数据,提取作者 45 /// </summary> 46 /// <param name="pattern"></param> 47 /// <param name="settled"></param> 48 public void FigureAuthor(string pattern, bool settled) 49 { 50 if (!settled && pattern.IndexOf("[作者]") >= 0) 51 { 52 Match m = Utility.GetMatchRigid(_Doc, pattern, "[作者]"); 53 if (m.Success) 54 { 55 _author = m.Groups["TARGET"].Value; 56 } 57 } 58 else 59 _author = pattern; 60 } 61 public void FigureSource(string pattern, bool settled) 62 { 63 if (!settled && pattern.IndexOf("[来源]") >= 0) 64 { 65 Match m = Utility.GetMatchRigid(_Doc, pattern, "[来源]"); 66 if (m.Success) 67 { 68 _source = m.Groups["TARGET"].Value; 69 } 70 } 71 else 72 { 73 _source = pattern; 74 } 75 } 76 public void FigureAddTime(string pattern, bool settled) 77 { 78 string tm = ""; 79 if (!settled && pattern.IndexOf("[加入时间]") >= 0) 80 { 81 Match m = Utility.GetMatchRigid(_Doc, pattern, "[加入时间]"); 82 if (m.Success) 83 { 84 tm = m.Groups["TARGET"].Value; 85 } 86 } 87 else 88 { 89 tm = pattern; 90 } 91 try 92 { 93 _addtime = DateTime.Parse(tm); 94 } 95 catch 96 { 97 _addtime = DateTime.Now; 98 } 99 } 100 public void FigureTitle() 101 { 102 if (_titlerule == null || _titlerule.IndexOf("[标题]") < 0) 103 throw new Exception("采集新闻标题规则还没有设定!"); 104 Match m = Utility.GetMatchRigid(_Doc, _titlerule, "[标题]"); 105 if (m.Success) 106 { 107 _title = m.Groups["TARGET"].Value; 108 } 109 } 110 public void FigureContent() 111 { 112 if (_contentrule == null || _contentrule.IndexOf("[内容]") < 0) 113 throw new Exception("采集新闻内容规则还没有设定!"); 114 Match m = Utility.GetMatch(_Doc, _contentrule, "[内容]"); 115 if (m.Success) 116 { 117 _content = m.Groups["TARGET"].Value; 118 } 119 } 120 private void FilterHtml(string element, int type) 121 { 122 string pattern = ""; 123 switch (type) 124 { 125 case 0: 126 pattern = element + "\\s?=\\s?(['\"][^'\"]*?['\"]|[^'\"]\\S*)"; 127 break; 128 case 1: 129 pattern = "<" + element + "[^>]*>|</" + element + ">"; 130 break; 131 case 2: 132 pattern = "<(?<tag>" + element + @")[^>]*>[\s\S]*</\k<tag>>"; 133 break; 134 default: 135 return; 136 } 137 try 138 { 139 Regex reg = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); 140 _content = reg.Replace(_content, ""); 141 } 142 catch 143 { } 144 } 145 public void Replace(string profile, string newstr, bool bIgnoreCase) 146 { 147 string pattern = Regex.Escape(profile); 148 string instead = newstr.Replace("$", "$$"); 149 pattern = pattern.Replace(@"\[变量]", @"[\s\S]*?"); 150 string[] _pattern = pattern.Split(new char[] { '[', '', '', '', '', '', ']' }, StringSplitOptions.RemoveEmptyEntries); 151 string p = ""; 152 string n = ""; 153 for (int i = 0; i < _pattern.Length; i++) 154 { 155 string s = _pattern[i]; 156 if (!s.Equals("")) 157 { 158 p += "(?<ch" + i + ">" + s + @")[\s\S]+?"; 159 n += "${ch" + i + "}" + instead; 160 } 161 } 162 Regex reg; 163 if (bIgnoreCase) 164 reg = new Regex(p, RegexOptions.Compiled | RegexOptions.IgnoreCase); 165 else 166 reg = new Regex(p, RegexOptions.Compiled); 167 _content = reg.Replace(_content, n); 168 } 169 public void Filter(bool ridhtml, bool ridstyle, bool riddiv, bool rida, bool ridclass, bool ridfont, bool ridspan, bool ridobject, bool ridiframe, bool ridscript) 170 { 171 //if(ridhtml); 172 if (ridstyle || ridhtml) 173 FilterHtml("style", 0); 174 if (riddiv || ridhtml) 175 FilterHtml("div", 1); 176 if (rida || ridhtml) 177 FilterHtml("a", 1); 178 if (ridclass || ridhtml) 179 FilterHtml("class", 0); 180 if (ridfont || ridhtml) 181 FilterHtml("font", 1); 182 if (ridspan || ridhtml) 183 FilterHtml("span", 1); 184 if (ridobject || ridhtml) 185 FilterHtml("object", 2); 186 if (ridiframe || ridhtml) 187 FilterHtml("iframe", 2); 188 if (ridscript || ridhtml) 189 FilterHtml("script", 2); 190 } 191 public string GetOtherPagination(string profile) 192 { 193 otherpgcon = ""; 194 GetOtherPage(_Url, _Doc, profile); 195 return otherpgcon; 196 } 197 private void GetOtherPage(string otherurl, string PageDoc, string pattern) 198 { 199 Match m = Utility.GetMatchUrl(PageDoc, pattern, "[分页新闻]"); 200 if (m.Success) 201 { 202 string obturl = Utility.StickUrl(otherurl, m.Groups["TARGET"].Value); 203 if (!obturl.Trim().Equals(otherurl.Trim())) 204 { 205 PageNews pgns = new PageNews(obturl, _Encode); 206 pgns.RuleOfContent = this._contentrule; 207 if (pgns.Fetch()) 208 { 209 pgns.FigureContent(); 210 otherpgcon += pgns.Content; 211 GetOtherPage(obturl, pgns._Doc, pattern); 212 } 213 } 214 } 215 } 216 public string GetIndexPagination(string profile) 217 { 218 string OtherContent = ""; 219 Match m = Utility.GetMatchUrl(_Doc, profile, "[分页新闻]"); 220 while (m.Success) 221 { 222 string otherurl = Utility.StickUrl(_Url, m.Groups["TARGET"].Value); 223 if (!otherurl.Trim().Equals(this._Url)) 224 { 225 PageNews pgns = new PageNews(otherurl, _Encode); 226 if (pgns.Fetch()) 227 { 228 pgns.FigureContent(); 229 OtherContent += pgns.Content; 230 } 231 } 232 m = m.NextMatch(); 233 } 234 return OtherContent; 235 } 236 public string AllDocument 237