温馨提示:代码在线浏览功能只能做为源码浏览参考,如果想更进一步了解该代码请下载:风讯dotNETCMSv1.0免安装版源码
当前文件:
FooSunCMS/Foosun.CMS/Collect/PageNews.cs[8K,2009-6-12 11:42:42],打开代码结构图
FooSunCMS/Foosun.CMS/Collect/PageNews.cs[8K,2009-6-12 11:42:42],打开代码结构图1//=========================================================== 2
//== (c)2007 Foosun Inc. by dotNETCMS 1.0 == 3
//== Forum:bbs.foosun.net == 4
//== website:www.foosun.net == 5
//== Address:NO.109 HuiMin ST.,Chengdu ,China == 6
//== TEL:86-28-85098980/66026180 == 7
//== qq:655071,MSN:ikoolls@gmail.com == 8
//== Code By JiangDong == 9
//=========================================================== 10
using System; 11
using System.Collections.Generic; 12
using System.Text; 13
using System.Text.RegularExpressions; 14
using System.Collections; 15
16
namespace Foosun.CMS.Collect 17
{ 18
public class PageNews : Page 19
{ 20
私有变量 私有变量 30
public PageNews(string url) : base(url) { } 31
public PageNews(string url, string encode) : base(url, encode) { } 32
public string Title 33
{ get { return _title; } } 34
public string Content 35
{ get { return _content; } set { _content = value; } } 36
public string Author { get { return _author; } } 37
public string Source { get { return _source; } } 38
public DateTime AddTime { get { return _addtime; } } 39
public string RuleOfTitle 40
{ 41
set { _titlerule = value; } 42
} 43
public string RuleOfContent 44
{ 45
set {_contentrule = value;} 46
} 47
/// <summary> 48
/// 过滤数据,提取作者 49
/// </summary> 50
/// <param name="pattern"></param> 51
/// <param name="settled"></param> 52
public void FigureAuthor(string pattern, bool settled) 53
{ 54
if (!settled && pattern.IndexOf("[作者]") >= 0) 55
{ 56
Match m = Utility.GetMatchRigid(_Doc, pattern, "[作者]"); 57
if (m.Success) 58
{ 59
_author = m.Groups["TARGET"].Value; 60
} 61
} 62
else 63
_author = pattern; 64
} 65
public void FigureSource(string pattern, bool settled) 66
{ 67
if (!settled && pattern.IndexOf("[来源]") >= 0) 68
{ 69
Match m = Utility.GetMatchRigid(_Doc, pattern, "[来源]"); 70
if (m.Success) 71
{ 72
_source = m.Groups["TARGET"].Value; 73
} 74
} 75
else 76
{ 77
_source = pattern; 78
} 79
} 80
public void FigureAddTime(string pattern, bool settled) 81
{ 82
string tm = ""; 83
if (!settled && pattern.IndexOf("[加入时间]") >= 0) 84
{ 85
Match m = Utility.GetMatchRigid(_Doc, pattern, "[加入时间]"); 86
if (m.Success) 87
{ 88
tm = m.Groups["TARGET"].Value; 89
} 90
} 91
else 92
{ 93
tm = pattern; 94
} 95
try 96
{ 97
_addtime = DateTime.Parse(tm); 98
} 99
catch 100
{ 101
_addtime = DateTime.Now; 102
} 103
} 104
public void FigureTitle() 105
{ 106
if (_titlerule == null || _titlerule.IndexOf("[标题]") < 0) 107
throw new Exception("采集新闻标题规则还没有设定!"); 108
Match m = Utility.GetMatchRigid(_Doc, _titlerule, "[标题]"); 109
if (m.Success) 110
{ 111
_title = m.Groups["TARGET"].Value; 112
} 113
} 114
public void FigureContent() 115
{ 116
if (_contentrule == null || _contentrule.IndexOf("[内容]") < 0) 117
throw new Exception("采集新闻内容规则还没有设定!"); 118
Match m = Utility.GetMatch(_Doc, _contentrule, "[内容]"); 119
if (m.Success) 120
{ 121
_content = m.Groups["TARGET"].Value; 122
} 123
} 124
private void FilterHtml(string element, int type) 125
{ 126
string pattern = ""; 127
switch (type) 128
{ 129
case 0: 130
pattern = element + "\\s?=\\s?(['\"][^'\"]*?['\"]|[^'\"]\\S*)"; 131
break; 132
case 1: 133
pattern = "<" + element + "[^>]*>|</" + element + ">"; 134
break; 135
case 2: 136
pattern = "<(?<tag>" + element + @")[^>]*>[\s\S]*</\k<tag>>"; 137
break; 138
default: 139
return; 140
} 141
try 142
{ 143
Regex reg = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); 144
_content = reg.Replace(_content, ""); 145
} 146
catch 147
{ } 148
} 149
public void Replace(string profile, string newstr, bool bIgnoreCase) 150
{ 151
string pattern = Regex.Escape(profile); 152
string instead = newstr.Replace("$", "$$"); 153
pattern = pattern.Replace(@"\[变量]", @"[\s\S]*?"); 154
string[] _pattern = pattern.Split(new char[] { '[', '过', '滤','字','符','串', ']' }, StringSplitOptions.RemoveEmptyEntries); 155
string p = ""; 156
string n = ""; 157
for (int i = 0; i < _pattern.Length; i++) 158
{ 159
string s = _pattern[i]; 160
if (!s.Equals("")) 161
{ 162
p += "(?<ch" + i + ">" + s + @")[\s\S]+?"; 163
n += "${ch" + i + "}" + instead; 164
} 165
} 166
Regex reg; 167
if (bIgnoreCase) 168
reg = new Regex(p, RegexOptions.Compiled | RegexOptions.IgnoreCase); 169
else 170
reg = new Regex(p, RegexOptions.Compiled); 171
_content = reg.Replace(_content, n); 172
} 173
public void Filter(bool ridhtml,bool ridstyle,bool riddiv,bool rida,bool ridclass,bool ridfont,bool ridspan,bool ridobject,bool ridiframe,bool ridscript) 174
{ 175
//if(ridhtml); 176
if (ridstyle || ridhtml) 177
FilterHtml("style", 0); 178
if(riddiv || ridhtml) 179
FilterHtml("div",1); 180
if( rida || ridhtml) 181
FilterHtml("a", 1); 182
if(ridclass|| ridhtml) 183
FilterHtml("class", 0); 184
if(ridfont|| ridhtml) 185
FilterHtml("font", 1); 186
if(ridspan || ridhtml) 187
FilterHtml("span", 1); 188
if(ridobject|| ridhtml) 189
FilterHtml("object", 2); 190
if(ridiframe|| ridhtml) 191
FilterHtml("iframe", 2); 192
if (ridscript || ridhtml) 193
FilterHtml("script", 2); 194
} 195
public string GetOtherPagination(string profile) 196
{ 197
otherpgcon = ""; 198
GetOtherPage(_Url, _Doc, profile); 199
return otherpgcon; 200
} 201
private void GetOtherPage(string otherurl, string PageDoc, string pattern) 202
{ 203
Match m = Utility.GetMatchUrl(PageDoc, pattern, "[分页新闻]"); 204
if (m.Success) 205
{ 206
string obturl = Utility.StickUrl(otherurl, m.Groups["TARGET"].Value); 207
if (!obturl.Trim().Equals(otherurl.Trim())) 208
{ 209
PageNews pgns = new PageNews(obturl, _Encode); 210
if (pgns.Fetch()) 211
{ 212
pgns.FigureContent(); 213
otherpgcon += pgns.Content; 214
GetOtherPage(obturl, pgns._Doc, pattern); 215
} 216
} 217
} 218
} 219
public string GetIndexPagination(string profile) 220
{ 221
string OtherContent = ""; 222
Match m = Utility.GetMatchUrl(_Doc, profile, "[分页新闻]"); 223
while (m.Success) 224
{ 225
string otherurl = Utility.StickUrl(_Url, m.Groups["TARGET"].Value); 226
if (!otherurl.Trim().Equals(this._Url)) 227
{ 228
PageNews pgns = new PageNews(otherurl, _Encode); 229
if (pgns.Fetch()) 230
{ 231
pgns.FigureContent(); 232
OtherContent += pgns.Content; 233
} 234
} 235
m = m.NextMatch(); 236
} 237
return OtherContent; 238
} 239
public string AllDocument 240
{ 241
get { return _Doc; } 242
} 243
} 244
} 245






}