温馨提示:代码在线浏览功能只能做为源码浏览参考,不能展示项目的全部,如果想更进一步了解该代码请下载:NETCMSv1.5(Build0509)完整源码版
当前文件路径:NetCMSv15/NetCMS.Content/Collect/PageNews.cs

1//====================================================== 2
//== (c)2008 aspxcms inc by NeTCMS v1.0 == 3
//== Forum:bbs.aspxcms.com == 4
//== Website:www.aspxcms.com == 5
//====================================================== 6
using System; 7
using System.Collections.Generic; 8
using System.Text; 9
using System.Text.RegularExpressions; 10
using System.Collections; 11
12
namespace NetCMS.Content.Collect 13
{ 14
public class PageNews : Page 15
{ 16
私有变量 私有变量 26
public PageNews(string url) : base(url) { } 27
public PageNews(string url, string encode) : base(url, encode) { } 28
public string Title 29
{ get { return _title; } } 30
public string Content 31
{ get { return _content; } set { _content = value; } } 32
public string Author { get { return _author; } } 33
public string Source { get { return _source; } } 34
public DateTime AddTime { get { return _addtime; } } 35
public string RuleOfTitle 36
{ 37
set { _titlerule = value; } 38
} 39
public string RuleOfContent 40
{ 41
set { _contentrule = value; } 42
} 43
/// <summary> 44
/// 过滤数据,提取作者 45
/// </summary> 46
/// <param name="pattern"></param> 47
/// <param name="settled"></param> 48
public void FigureAuthor(string pattern, bool settled) 49
{ 50
if (!settled && pattern.IndexOf("[作者]") >= 0) 51
{ 52
Match m = Utility.GetMatchRigid(_Doc, pattern, "[作者]"); 53
if (m.Success) 54
{ 55
_author = m.Groups["TARGET"].Value; 56
} 57
} 58
else 59
_author = pattern; 60
} 61
public void FigureSource(string pattern, bool settled) 62
{ 63
if (!settled && pattern.IndexOf("[来源]") >= 0) 64
{ 65
Match m = Utility.GetMatchRigid(_Doc, pattern, "[来源]"); 66
if (m.Success) 67
{ 68
_source = m.Groups["TARGET"].Value; 69
} 70
} 71
else 72
{ 73
_source = pattern; 74
} 75
} 76
public void FigureAddTime(string pattern, bool settled) 77
{ 78
string tm = ""; 79
if (!settled && pattern.IndexOf("[加入时间]") >= 0) 80
{ 81
Match m = Utility.GetMatchRigid(_Doc, pattern, "[加入时间]"); 82
if (m.Success) 83
{ 84
tm = m.Groups["TARGET"].Value; 85
} 86
} 87
else 88
{ 89
tm = pattern; 90
} 91
try 92
{ 93
_addtime = DateTime.Parse(tm); 94
} 95
catch 96
{ 97
_addtime = DateTime.Now; 98
} 99
} 100
public void FigureTitle() 101
{ 102
if (_titlerule == null || _titlerule.IndexOf("[标题]") < 0) 103
throw new Exception("采集新闻标题规则还没有设定!"); 104
Match m = Utility.GetMatchRigid(_Doc, _titlerule, "[标题]"); 105
if (m.Success) 106
{ 107
_title = m.Groups["TARGET"].Value; 108
} 109
} 110
public void FigureContent() 111
{ 112
if (_contentrule == null || _contentrule.IndexOf("[内容]") < 0) 113
throw new Exception("采集新闻内容规则还没有设定!"); 114
Match m = Utility.GetMatch(_Doc, _contentrule, "[内容]"); 115
if (m.Success) 116
{ 117
_content = m.Groups["TARGET"].Value; 118
} 119
} 120
private void FilterHtml(string element, int type) 121
{ 122
string pattern = ""; 123
switch (type) 124
{ 125
case 0: 126
pattern = element + "\\s?=\\s?(['\"][^'\"]*?['\"]|[^'\"]\\S*)"; 127
break; 128
case 1: 129
pattern = "<" + element + "[^>]*>|</" + element + ">"; 130
break; 131
case 2: 132
pattern = "<(?<tag>" + element + @")[^>]*>[\s\S]*</\k<tag>>"; 133
break; 134
default: 135
return; 136
} 137
try 138
{ 139
Regex reg = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); 140
_content = reg.Replace(_content, ""); 141
} 142
catch 143
{ } 144
} 145
public void Replace(string profile, string newstr, bool bIgnoreCase) 146
{ 147
string pattern = Regex.Escape(profile); 148
string instead = newstr.Replace("$", "$$"); 149
pattern = pattern.Replace(@"\[变量]", @"[\s\S]*?"); 150
string[] _pattern = pattern.Split(new char[] { '[', '过', '滤', '字', '符', '串', ']' }, StringSplitOptions.RemoveEmptyEntries); 151
string p = ""; 152
string n = ""; 153
for (int i = 0; i < _pattern.Length; i++) 154
{ 155
string s = _pattern[i]; 156
if (!s.Equals("")) 157
{ 158
p += "(?<ch" + i + ">" + s + @")[\s\S]+?"; 159
n += "${ch" + i + "}" + instead; 160
} 161
} 162
Regex reg; 163
if (bIgnoreCase) 164
reg = new Regex(p, RegexOptions.Compiled | RegexOptions.IgnoreCase); 165
else 166
reg = new Regex(p, RegexOptions.Compiled); 167
_content = reg.Replace(_content, n); 168
} 169
public void Filter(bool ridhtml, bool ridstyle, bool riddiv, bool rida, bool ridclass, bool ridfont, bool ridspan, bool ridobject, bool ridiframe, bool ridscript) 170
{ 171
//if(ridhtml); 172
if (ridstyle || ridhtml) 173
FilterHtml("style", 0); 174
if (riddiv || ridhtml) 175
FilterHtml("div", 1); 176
if (rida || ridhtml) 177
FilterHtml("a", 1); 178
if (ridclass || ridhtml) 179
FilterHtml("class", 0); 180
if (ridfont || ridhtml) 181
FilterHtml("font", 1); 182
if (ridspan || ridhtml) 183
FilterHtml("span", 1); 184
if (ridobject || ridhtml) 185
FilterHtml("object", 2); 186
if (ridiframe || ridhtml) 187
FilterHtml("iframe", 2); 188
if (ridscript || ridhtml) 189
FilterHtml("script", 2); 190
} 191
public string GetOtherPagination(string profile) 192
{ 193
otherpgcon = ""; 194
GetOtherPage(_Url, _Doc, profile); 195
return otherpgcon; 196
} 197
private void GetOtherPage(string otherurl, string PageDoc, string pattern) 198
{ 199
Match m = Utility.GetMatchUrl(PageDoc, pattern, "[分页新闻]"); 200
if (m.Success) 201
{ 202
string obturl = Utility.StickUrl(otherurl, m.Groups["TARGET"].Value); 203
if (!obturl.Trim().Equals(otherurl.Trim())) 204
{ 205
PageNews pgns = new PageNews(obturl, _Encode); 206
pgns.RuleOfContent = this._contentrule; 207
if (pgns.Fetch()) 208
{ 209
pgns.FigureContent(); 210
otherpgcon += pgns.Content; 211
GetOtherPage(obturl, pgns._Doc, pattern); 212
} 213
} 214
} 215
} 216
public string GetIndexPagination(string profile) 217
{ 218
string OtherContent = ""; 219
Match m = Utility.GetMatchUrl(_Doc, profile, "[分页新闻]"); 220
while (m.Success) 221
{ 222
string otherurl = Utility.StickUrl(_Url, m.Groups["TARGET"].Value); 223
if (!otherurl.Trim().Equals(this._Url)) 224
{ 225
PageNews pgns = new PageNews(otherurl, _Encode); 226
if (pgns.Fetch()) 227
{ 228
pgns.FigureContent(); 229
OtherContent += pgns.Content; 230
} 231
} 232
m = m.NextMatch(); 233
} 234
return OtherContent; 235
} 236
public string AllDocument 237
![]()




