View Javadoc

1   /*
2    * Copyright 2007 Sebastien Brunot (sbrunot@gmail.com)
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *   
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package net.sourceforge.buildmonitor.utils;
17  
18  import java.io.IOException;
19  import java.io.InputStream;
20  import java.net.URL;
21  import java.text.DateFormat;
22  import java.text.ParseException;
23  
24  import org.xml.sax.Attributes;
25  import org.xml.sax.InputSource;
26  import org.xml.sax.SAXException;
27  import org.xml.sax.XMLReader;
28  import org.xml.sax.helpers.DefaultHandler;
29  import org.xml.sax.helpers.XMLReaderFactory;
30  
31  /**
32   * A class that can parse an RSS feed and return a Feed object.
33   * @author sbrunot
34   *
35   */
36  public class RssFeedReader
37  {
38  	/////////////////////////////////
39  	// Nested classes
40  	/////////////////////////////////
41  
42  	public class RssFeedContentHandler extends DefaultHandler
43  	{
44  		///////////////////////////////
45  		// Constants
46  		///////////////////////////////
47  
48  		private static final int TITLE_ATTRIBUTE = 1;
49  		private static final int DESCRIPTION_ATTRIBUTE = 2;
50  		private static final int PUBDATE_ATTRIBUTE = 3;
51  		private static final int LINK_ATTRIBUTE = 4;
52  		
53  		///////////////////////////////
54  		// Attributes
55  		///////////////////////////////
56  
57  		RssFeedDocument rssFeedDocument = null;
58  		RssFeedItem currentItem = null;
59  		int currentItemAttributeToSet = -1;
60  		DateFormat rssFeedDateFormat = null;
61  		
62  		///////////////////////////////
63  		// Constructor
64  		///////////////////////////////
65  
66  		public RssFeedContentHandler(DateFormat theRssFeedDateFormat)
67  		{
68  			if (theRssFeedDateFormat == null)
69  			{
70  				throw new IllegalArgumentException("The RSS Feed date format cannot be null !");
71  			}
72  			this.rssFeedDateFormat = theRssFeedDateFormat;
73  		}
74  		
75  		///////////////////////////////
76  		// ContentHandler implementation
77  		///////////////////////////////
78  		
79  		public void startDocument()
80  		{
81  			this.rssFeedDocument = new RssFeedDocument();
82  		}
83  
84  		public void startElement(String theNameSpace, String theLocalName, String theQName, Attributes theAttributes)
85  		{
86  			if ("item".equals(theLocalName))
87  			{
88  				// this is a new Item
89  				this.currentItem = new RssFeedItem();
90  			}
91  			else if ("title".equals(theLocalName))
92  			{
93  				this.currentItemAttributeToSet = TITLE_ATTRIBUTE;
94  			}
95  			else if ("link".equals(theLocalName))
96  			{
97  				this.currentItemAttributeToSet = LINK_ATTRIBUTE;
98  			}
99  			else if ("description".equals(theLocalName))
100 			{
101 				this.currentItemAttributeToSet = DESCRIPTION_ATTRIBUTE;
102 			}
103 			else if ("pubDate".equals(theLocalName))
104 			{
105 				this.currentItemAttributeToSet = PUBDATE_ATTRIBUTE;
106 			}
107 			else
108 			{
109 				this.currentItemAttributeToSet = -1;
110 			}
111 		}
112 
113 		public void endElement(String theNameSpace, String theLocalName, String theQName)
114 		{
115 			if ("item".equals(theLocalName))
116 			{
117 				// end of the item: add it to the document
118 				this.rssFeedDocument.add(this.currentItem);
119 				this.currentItem = null;
120 			}
121 		}
122 
123 		public void characters(char[] theCharacters, int theStartIndex, int theLength)
124 		{
125 			String characters = new String(theCharacters, theStartIndex, theLength).trim();
126 			String trimedCharacters = characters.replace("\n", "");
127 			if (!"".equals(trimedCharacters))
128 			{
129 				setCurrentItemAttribute(characters);
130 			}
131 		}
132 
133 		///////////////////////////////
134 		// Public methods
135 		///////////////////////////////
136 
137 		public RssFeedDocument getDocument()
138 		{
139 			return this.rssFeedDocument;
140 		}
141 
142 		///////////////////////////////
143 		// Private methods
144 		///////////////////////////////
145 
146 		private void setCurrentItemAttribute(String theValueOfTheAttribute)
147 		{
148 			if (this.currentItem != null && this.currentItemAttributeToSet != -1)
149 			{
150 				if (this.currentItemAttributeToSet == TITLE_ATTRIBUTE)
151 				{
152 					this.currentItem.setTitle(theValueOfTheAttribute);
153 				}
154 				else if (this.currentItemAttributeToSet == DESCRIPTION_ATTRIBUTE)
155 				{
156 					this.currentItem.setDescription(theValueOfTheAttribute);
157 				}
158 				else if (this.currentItemAttributeToSet == LINK_ATTRIBUTE)
159 				{
160 					this.currentItem.setLink(theValueOfTheAttribute);
161 				}
162 				else if (this.currentItemAttributeToSet == PUBDATE_ATTRIBUTE)
163 				{
164 					// TODO: USE A DateFormat
165 					try
166 					{
167 						this.currentItem.setPubDate(this.rssFeedDateFormat.parse(theValueOfTheAttribute));
168 					}
169 					catch (ParseException e)
170 					{
171 						this.currentItem.setPubDate(null);
172 						// TODO: ADD A LOG INSTEAD OF SYSTEM.ERR OUTPUT
173 						System.err.println("WARNING: publication date <" + theValueOfTheAttribute + "> does not follow the expected date format.");
174 					}
175 				}
176 				else
177 				{
178 					throw new RuntimeException("Error discovered in RssFeedContentHandler: please contact sbrunot@gmail.com");
179 				}
180 			}
181 		}
182 	}
183 	
184 	/////////////////////////////////
185 	// Attributes
186 	/////////////////////////////////
187 
188 	private URL rssFeedUrl;
189 	
190 	private DateFormat rssFeedDateFormat;
191 	
192 	/////////////////////////////////
193 	// Constructor
194 	/////////////////////////////////
195 	
196 	public RssFeedReader(URL theRssFeedUrl, DateFormat theRssFeedDateFormat)
197 	{
198 		if (theRssFeedUrl == null)
199 		{
200 			throw new IllegalArgumentException("URL of the RSS feed cannot be null.");
201 		}
202 		if (theRssFeedDateFormat == null)
203 		{
204 			throw new IllegalArgumentException("Date format for the RSS feed cannot be null.");
205 		}
206 		this.rssFeedUrl = theRssFeedUrl;
207 		this.rssFeedDateFormat = theRssFeedDateFormat;
208 	}
209 
210 	/////////////////////////////////
211 	// Public methods
212 	/////////////////////////////////
213 
214 	public RssFeedDocument getRssFeedDocument() throws IOException, SAXException
215 	{
216 		InputStream rssDocumentInputStream = null;
217 		try
218 		{
219 			rssDocumentInputStream = this.rssFeedUrl.openStream();
220 			XMLReader rssDocumentReader = XMLReaderFactory.createXMLReader();
221 			RssFeedContentHandler contentHandler = new RssFeedContentHandler(this.rssFeedDateFormat);
222 			rssDocumentReader.setContentHandler(contentHandler);
223 			rssDocumentReader.parse(new InputSource(rssDocumentInputStream));
224 			return contentHandler.getDocument();
225 		}
226 		finally
227 		{
228 			if (rssDocumentInputStream != null)
229 			{
230 				try
231 				{
232 					rssDocumentInputStream.close();
233 				}
234 				catch (IOException e)
235 				{
236 					// do nothing here: it may mask a previous exception ? (to be verified in java spec)
237 				}
238 			}
239 		}
240 	}
241 }