温馨提示:代码在线浏览功能只能做为源码浏览参考,不能展示项目的全部,如果想更进一步了解该代码请下载:NETCMSv1.5(Build0509)完整源码版
当前文件路径:NetCMSv15/NetCMS.Content/Collect/Collect.cs

1//====================================================== 2
//== (c)2008 aspxcms inc by NeTCMS v1.0 == 3
//== Forum:bbs.aspxcms.com == 4
//== Website:www.aspxcms.com == 5
//====================================================== 6
using System; 7
using System.IO; 8
using System.Data; 9
using System.Net; 10
using System.Text; 11
using NetCMS.Model; 12
using NetCMS.Control; 13
14
namespace NetCMS.Content.Collect 15
...{ 16
/**//// <summary> 17
/// 采集类 18
/// </summary> 19
public class Collect 20
...{ 21
private NetCMS.DALFactory.ICollect dal; 22
private string ErrorMsg = ""; 23
private bool _ShowProGressBar; 24
/**//// <summary> 25
/// 构造函数 26
/// </summary> 27
public Collect() 28
...{ 29
_ShowProGressBar = true; 30
dal = NetCMS.DALFactory.DataAccess.CreateCollect(); 31
} 32
采集入库#region 采集入库 33
/**//// <summary> 34
/// 是否保存远程图片 35
/// </summary> 36
private bool bSaveRemotePic = false; 37
private string PicSavePath = ""; 38
private string PicSaveUrl = ""; 39
/**//// <summary> 40
/// 是否在采集时显示进度条,默认为true 41
/// </summary> 42
public bool ShowProGressBar 43
...{ 44
set ...{ _ShowProGressBar = value; } 45
get ...{ return _ShowProGressBar; } 46
} 47
/**//// <summary> 48
/// 开始采集 49
/// </summary> 50
/// <param name="folderid">目录名称</param> 51
/// <param name="num">采集数量</param> 52
public void Collecting(int folderid, int num, bool bnorepeat) 53
...{ 54
if (ShowProGressBar) HProgressBar.Start("正在读取列表数据"); 55
DataTable tb = GetSite(folderid); 56
检查数据是否完整#region 检查数据是否完整 57
if (tb == null || tb.Rows.Count < 1) 58
...{ 59
if (ShowProGressBar) HProgressBar.Roll("没有找到该站点的相关记录!", 0); 60
return; 61
} 62
DataRow r = tb.Rows[0]; 63
if (r.IsNull("LinkSetting") || r.IsNull("PageTitleSetting") || r.IsNull("PagebodySetting")) 64
...{ 65
if (ShowProGressBar) HProgressBar.Roll("相关的参数没有设置,无法取得新闻列表!", 0); 66
return; 67
} 68
if (bool.Parse(r["SaveRemotePic"].ToString())) 69
...{ 70
远程图片#region 远程图片 71
string rtpath = NetCMS.Config.UIConfig.dirFile; 72
if (rtpath == null || rtpath.Trim().Equals("")) 73
...{ 74
if (ShowProGressBar) HProgressBar.Roll("没有找到管理员附件目录!", 0); 75
return; 76
} 77
string dtpath = DateTime.Now.ToString("yyyyMMdd"); 78
PicSavePath = NetCMS.Common.ServerInfo.GetRootPath().TrimEnd('\\') + @"\" + rtpath + @"\RemoteFiles\" + dtpath; 79
if (!Directory.Exists(PicSavePath)) 80
Directory.CreateDirectory(PicSavePath); 81
PicSaveUrl = NetCMS.Publish.CommonData.getUrl() + "/" + rtpath + "/RemoteFiles/" + dtpath; 82
bSaveRemotePic = true; 83
#endregion 84
} 85
#endregion 检查数据是否完整 86
if (ShowProGressBar) HProgressBar.Roll("正在获取新闻列表页", 0); 87
88
string sListUrl = r["objURL"].ToString(); 89
string sEncode = r["Encode"].ToString(); 90
bool bReverse = bool.Parse(r["IsReverse"].ToString()); 91
string listset = @"<body[^>]*>(?<list>[\s\S]+?)</body>"; 92
if (!r.IsNull("ListSetting")) 93
listset = r["ListSetting"].ToString(); 94
PageList PL = new PageList(r["objURL"].ToString(), r["Encode"].ToString()); 95
PL.RuleOfList = listset; 96
PL.RuleOfLink = r["LinkSetting"].ToString(); 97
string[] NewsUrl = GetNewsList(PL); 98
if (NewsUrl == null) 99
...{ 100
if (ShowProGressBar) HProgressBar.Roll("没有找到相关新闻链接地址!", 0); 101
return; 102
} 103
int len = NewsUrl.Length; 104
if (len < num) 105
...{ 106
int pagetype = int.Parse(r["OtherType"].ToString()); 107
string[] otherurl = null; 108
switch (pagetype) 109
...{ 110
case 0: 111
break; 112
case 1://递归 113
otherurl = PL.Pagination(r["OtherPageSetting"].ToString(), num - len); 114
break; 115
case 2://其他页 116
otherurl = PL.SinglePagination(r["OtherPageSetting"].ToString(), num - len); 117
break; 118
case 3://索引页 119
otherurl = PL.IndexPagination(r["OtherPageSetting"].ToString(), int.Parse(r["StartPageNum"].ToString()), int.Parse(r["EndPageNum"].ToString()), num - len); 120
break; 121
default: 122
break; 123
} 124
if (otherurl != null && otherurl.Length > 0) 125
...{ 126
Array.Resize(ref NewsUrl, len + otherurl.Length); 127
otherurl.CopyTo(NewsUrl, len); 128
} 129
} 130
if (NewsUrl.Length < 1) 131
...{ 132
if (ShowProGressBar) HProgressBar.Roll("从列表内容中没有找到任何新闻的相关链接!", 0); 133
return; 134
} 135
if (bReverse) 136
Array.Reverse(NewsUrl); 137
if (ShowProGressBar) HProgressBar.Roll("开始采集新闻", 0); 138
int nSucceed = 0, nFailed = 0, nRepeat = 0; 139
for (int i = 0; i < NewsUrl.Length; i++) 140
...{ 141
if (i >= num) 142
break; 143
try 144
...{ 145
int flag = CollectPage(NewsUrl[i], r, bnorepeat); 146
if (flag != 1) 147
...{ 148
nSucceed++; 149
if (flag == -1) 150
nRepeat++; 151
} 152
else 153
nFailed++; 154
} 155
catch 156
...{ 157
nFailed++; 158
} 159
string prompt = "正在采集新闻,终止<a href=\"Collect_List.aspx\">返回</a>.成功:" + nSucceed * 100 / num + "% "; 160
if (nRepeat > 0) 161
prompt += "(其中重复:" + nRepeat * 100 / num + "%) "; 162
prompt += "失败:" + nFailed * 100 / num + "%"; 163
if (ShowProGressBar) HProgressBar.Roll(prompt, (i + 1) * 100 / num); 164
} 165
} 166
/**//// <summary> 167
/// 处理采集单条新闻 168
/// </summary> 169
/// <param name="Url"></param> 170
/// <param name="r"></param> 171
/// <param name="norepeat"></param> 172
/// <returns>0为成功,-1为重复,1,为失败</returns> 173
private int CollectPage(string Url, DataRow r, bool norepeat) 174
...{ 175
try 176
...{ 177
if (Url == null || Url.Trim().Equals("")) 178
return 1; 179
PageNews pn = new PageNews(Url, r["Encode"].ToString()); 180
if (!pn.Fetch()) 181
return 1; 182
pn.RuleOfTitle = r["PageTitleSetting"].ToString(); 183
pn.RuleOfContent = r["PagebodySetting"].ToString(); 184
pn.FigureTitle(); 185
if (norepeat) 186
...{ 187
if (pn.Title == null) 188
return 1; 189
if (dal.TitleExist(pn.Title)) 190
return -1; 191
} 192
pn.FigureContent(); 193
if (r.IsNull("HandSetAuthor")) 194
...{ 195
pn.FigureAuthor(r["AuthorSetting"].ToString(), false); 196
} 197
else 198
...{ 199
pn.FigureAuthor(r["HandSetAuthor"].ToString(), true); 200
} 201
if (r.IsNull("HandSetSource")) 202
...{ 203
pn.FigureSource(r["SourceSetting"].ToString(), false); 204
} 205
else 206
...{ 207
pn.FigureSource(r["HandSetSource"].ToString(), true); 208
} 209
if (r.IsNull("HandSetAddDate")) 210
...{ 211
pn.FigureAddTime(r["AddDateSetting"].ToString(), false); 212
} 213
else 214
...{ 215
pn.FigureAddTime(r["HandSetAddDate"].ToString(), true); 216
} 217
int pgtp = int.Parse(r["OtherNewsType"].ToString()); 218
if (pgtp == 1) 219
...{ 220
pn.Content += pn.GetOtherPagination(r["OtherNewsPageSetting"].ToString()); 221
} 222
else if (pgtp == 2) 223
...{ 224
pn.Content += pn.GetIndexPagination(r["OtherNewsPageSetting"].ToString()); 225
} 226
pn.Filter(bool.Parse(r["TextTF"].ToString()), 227
bool.Parse(r["IsStyle"].ToString()), bool.Parse(r["IsDIV"].ToString()), bool.Parse(r["IsA"].ToString()), 228
bool.Parse(r["IsClass"].ToString()), bool.Parse(r["IsFont"].ToString()), bool.Parse(r["IsSpan"].ToString()), 229
bool.Parse(r["IsObject"].ToString()), bool.Parse(r["IsIFrame"].ToString()), bool.Parse(r["IsScript"].ToString())); 230
if (!r.IsNull("OldContent") && !r.IsNull("ReContent") && !r.IsNull("IgnoreCase")) 231
pn.Replace(r["OldContent"].ToString(), r["ReContent"].ToString(), bool.Parse(r["IgnoreCase"].ToString())); 232
if (pn.Content != null && !pn.Content.Trim().Equals("") && !pn.Title.Trim().Equals("")) 233
...{ 234
NetCMS.Model.CollectNewsInfo ninf = new NetCMS.Model.CollectNewsInfo(); 235
ninf.Author = pn.Author; 236
ninf.Source = pn.Source; 237
ninf.AddDate = pn.AddTime; 238
ninf.Title = pn.Title; 239
ninf.SiteID = int.Parse(r["ID"].ToString()); 240
ninf.Links = Url; 241
ninf.ClassID = r["ClassID"].ToString(); 242
string Content = pn.Content; 243
if (bSaveRemotePic) 244
...{




