ZXNA是一个.net写的聚合器....感觉是国产精品...hoho.....作者写的教学...很有用!!.....
RSS聚合器原理不过是下载RSS,读取每个节点,再按时间顺序排列出来
其中比较麻烦的是:编码的问题,时间格式的问题应很多网友的要求,贴出我的源码
rss类
function 类是一个数据处理的类,在我的BLOG里
public class rss:function
{
public int _blogid,_blogclass;
public string _rssurl;
public DateTime _lastupdatetime;
file nn=new file();
root theroot=new root();
public DateTime lastupdatetime
{
get
{return _lastupdatetime; }
set
{_lastupdatetime=value;}
}
public int blogid
{
get{return _blogid;}
set{_blogid=value;}
}
public int blogclass
{
get{return _blogclass;}
set{_blogclass=value;}
}
public string rssurl
{
get{return _rssurl;}
set{_rssurl=value;}
}
public DateTime stime//设置刷新时间
{
get{
if (Application["stime"]==null)
{
Application.Lock();
Application["stime"]=DateTime.Now;
Application.UnLock();
}
return (DateTime)Application["stime"];
}
set{
Application.Lock();
Application["stime"]=value;
Application.UnLock();
}
}
public bool Rssing//设置是否在刷新
{
get{
if (Application["rssing"]==null)
{
Application.Lock();
Application["rssing"]=false;
Application.UnLock();
}
return (bool)Application["rssing"];
}
set{
Application.Lock();
Application["rssing"]=value;
Application.UnLock();
}
}
public DateTime getlastupdate(int id)//取得每个BLOG最后更新时间,如果大于这个时间,就添加新的RSS
{
DateTime str=DateTime.Parse("2005-7-20 00:00:00");
try {
DataRow dr= returndatarow("select top 1 * from blog_news where blogid="+id+" order by [datetime] desc");
str=(DateTime)dr["datetime"];
}
catch {
}
return str;
}
public void getrss()//下载RSS并入库
{
String path=foderpath+"xna/rssread/"+blogid+".txt";
String str;
System.Net.WebClient myWebClient = new System.Net.WebClient();
myWebClient.DownloadFile(rssurl,Server.MapPath(path));//我用WEBCLIENT类的DOWNLOADFILE下载文件,再用XML读取文件,这样就避免了乱码的问题
myWebClient.Dispose();
str=nn.readfile(path);
XmlDocument doc = new XmlDocument();
doc.Load(Server.MapPath(path));
XmlElement root = doc.DocumentElement;
XmlNodeList nodeList=null;
//nodeList = root.SelectNodes("/rss/channel/item");//取出节点,标准的RSS文件是这样格式的,为了兼容不合格的RSS用了下面的方法
nodeList = doc.GetElementsByTagName("item");
lastupdatetime=getlastupdate(blogid);//取得最后更新时间
foreach (XmlNode isbn in nodeList)//循环,一系列的转换,处理时间
{
XmlElement dd=null;
string title,content,PubDate,link,utc;
string[] sArray=null;
DateTime date;
title=isbn["title"].InnerText;
content=isbn["description"].InnerText;
//content=Regex.Replace(content,@"<.+?>","");
/*判断时间标签*/
if (isbn["PubDate"]!=null)
dd=isbn["PubDate"];
else if (isbn["pubDate"]!=null)
dd=isbn["pubDate"];
else if (isbn["dc:date"]!=null)
dd=isbn["dc:date"];
PubDate=dd.InnerText;
link=isbn["link"].InnerText;
sArray=getdate(PubDate);
//catchit(PubDate);
date=DateTime.Parse(sArray[0]);
utc=sArray[1].Replace(":","");
//PubDate=endate(DateTime.Parse(PubDate),"yyyy-MM-ddTH:mmzzz");
if (date>lastupdatetime)//判断时间
{
//Response.Write(title+"
");
String sql="select top 1 * from blog_news where link='"+link+"'";
DataSet ds=GetDataList(sql);
DataRow dr;
if (ds.Tables[0].Rows.Count>0)
dr = ds.Tables["table"].Rows[0];
else
dr = ds.Tables["table"].NewRow();
dr["title"]=title;
dr["content"]=getstring(content,1500);
dr["datetime"]=date;
dr["utc"]=utc;
dr["link"]=link;
dr["blogid"]=blogid;
dr["blogclass"]=blogclass;
if (ds.Tables[0].Rows.Count<1)
ds.Tables["table"].Rows.Add(dr);
updatetb(ds,sql);
}
}//end for*/
}
public string utc(string str)//在这里转换进区字符串,没找到怎么把字符转换成时区格的,只好一个个加了,现在只加了两个
{
str=str.Replace("CST","-06:00 ");
str=str.Replace("EST","-05:00 ");
return str;
}
public string[] getdate(string str)
{
Regex r;
Match m;
//str="Fr, 22 Jul 2005 13:00:00 +0800";
string[] sArray=new string[2];
str=Regex.Replace(str,@"(.+?,)*","");
str=str.Replace("+"," +");
//str=Regex.Replace(str,@"((\w+)\b)*","");
str=utc(str);
try {
sArray[0]=DateTime.Parse(str).ToString();
str+="+0800";
}
catch{
}
r = new Regex(@"(.+)((\+|\-).+)",RegexOptions.IgnoreCase);
m = r.Match(str);
sArray[0]=m.Groups[1].ToString();
sArray[1]=m.Groups[2].ToString();
return sArray;
}
}//end of class
下面是更新RSS的文件原理就是客记端不停的访问这个文件,这个文件作了限制,只能同时处理一个请求,并隔一段时间
using System;
using System.Web;
using System.IO;
using System.Net;
using System.Data;
using System.Web.UI;
using System.Collections;
using System.Configuration;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using article;
//非要中文么
public class main:rss
{
file nn=new file();
root theroot=new root();
public void Page_Load(Object sender, EventArgs e)
{
Response.Buffer = true ;
Response.ExpiresAbsolute = DateTime.Now.AddSeconds(-1);
Response.Expires = 0 ;
Response.CacheControl = "no-cache" ;
int rsstime=5;
double num=(DateTime.Now-stime).TotalSeconds;
string str=num.ToString();//判断两次时间间隔
if (num>rsstime)
{
string sql="select top 1 * from blogs where ait=1 and fs=0";//取得最大一个没有被下载的RSS
DataSet mylist=GetDataList(sql);
if (mylist.Tables[0].Rows.Count==0)
{
update("update blogs set fs=0");//如果没有找到,就重新开始
}
else
{
DataRow dr=mylist.Tables[0].Rows[0];
if (Rssing==true)//如果正在下载
catchit("msg=\""+dr["rssurl"]+" Has Rssing\";");
blogid=(int)dr["id"];
blogclass=(int)dr["classid"];
rssurl=dr["rssurl"].ToString();
Rssing=true;//设定APPTION为正在下载,这样就避免了同时多个进程
str+=" RSS:"+rssurl;
try {
getrss();
}
catch(System.Exception ero)
{
str=ero.Message+":"+rssurl;
update("update blogs set fs=1 where id="+blogid);
Rssing=false;
theroot.greatlog(str);
}
stime=DateTime.Now;
update("update blogs set fs=1 where id="+blogid);
Rssing=false;//避免出错,设定值为己下载过,完成后设定为己下载
}
}
Response.Write("msg=\""+str+"\";");
}//end of page_load
}
也就这些东西了,也不是很复杂的东西,另外时区的问题还没解决,就是上面的那个时区转换,哪位知道告诉一下
发表评论