syndication/rss2
document.cpp
00001 /* 00002 * This file is part of the syndication library 00003 * 00004 * Copyright (C) 2005 Frank Osterfeld <osterfeld@kde.org> 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Library General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Library General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Library General Public License 00017 * along with this library; see the file COPYING.LIB. If not, write to 00018 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00019 * Boston, MA 02110-1301, USA. 00020 * 00021 */ 00022 00023 #include <rss2/document.h> 00024 #include <rss2/category.h> 00025 #include <rss2/cloud.h> 00026 #include <rss2/image.h> 00027 #include <rss2/item.h> 00028 #include <rss2/textinput.h> 00029 00030 #include <constants.h> 00031 #include <documentvisitor.h> 00032 #include <tools.h> 00033 00034 #include <QtXml/QDomDocument> 00035 #include <QtCore/QList> 00036 #include <QtCore/QSet> 00037 #include <QtCore/QString> 00038 00039 namespace Syndication { 00040 namespace RSS2 { 00041 00042 class Document::DocumentPrivate 00043 { 00044 public: 00045 DocumentPrivate() : itemDescriptionIsCDATA(false), 00046 itemDescriptionContainsMarkup(false), 00047 itemDescGuessed(false), 00048 itemTitleIsCDATA(false), 00049 itemTitleContainsMarkup(false), 00050 itemTitlesGuessed(false) 00051 {} 00052 mutable bool itemDescriptionIsCDATA; 00053 mutable bool itemDescriptionContainsMarkup; 00054 mutable bool itemDescGuessed; 00055 mutable bool itemTitleIsCDATA; 00056 mutable bool itemTitleContainsMarkup; 00057 mutable bool itemTitlesGuessed; 00058 }; 00059 00060 Document::Document(const QDomElement& element) : SpecificDocument(), 00061 ElementWrapper(element), 00062 d(new DocumentPrivate) 00063 { 00064 } 00065 00066 Document Document::fromXML(const QDomDocument& doc) 00067 { 00068 QDomNode channelNode = doc.namedItem(QString::fromUtf8("rss")).namedItem(QString::fromUtf8("channel")); 00069 00070 return Document(channelNode.toElement()); 00071 } 00072 00073 Document::Document() : SpecificDocument(), ElementWrapper(), d(new DocumentPrivate) 00074 { 00075 } 00076 00077 Document::Document(const Document& other) : SpecificDocument(other), ElementWrapper(other) 00078 { 00079 d = other.d; 00080 } 00081 00082 Document::~Document() 00083 { 00084 } 00085 00086 Document& Document::operator=(const Document& other) 00087 { 00088 ElementWrapper::operator=(other); 00089 d = other.d; 00090 return *this; 00091 } 00092 bool Document::isValid() const 00093 { 00094 return !isNull(); 00095 } 00096 00097 QString Document::title() const 00098 { 00099 return extractElementTextNS(QString(), QString::fromUtf8("title")); 00100 } 00101 00102 QString Document::link() const 00103 { 00104 return extractElementTextNS(QString(), QString::fromUtf8("link") ); 00105 } 00106 00107 QString Document::description() const 00108 { 00109 QString desc = extractElementTextNS(QString(), QString::fromUtf8("description")); 00110 return normalize(desc); 00111 } 00112 00113 QString Document::language() const 00114 { 00115 QString lang = extractElementTextNS(QString(), 00116 QString::fromUtf8("language")); 00117 00118 if (!lang.isNull()) 00119 { 00120 return lang; 00121 } 00122 else 00123 { 00124 return extractElementTextNS( 00125 dublinCoreNamespace(), QString::fromUtf8("language")); 00126 } 00127 00128 } 00129 00130 QString Document::copyright() const 00131 { 00132 QString rights = extractElementTextNS(QString(), 00133 QString::fromUtf8("copyright")); 00134 if (!rights.isNull()) 00135 { 00136 return rights; 00137 } 00138 else 00139 { 00140 // if <copyright> is not provided, use <dc:rights> 00141 return extractElementTextNS(dublinCoreNamespace(), 00142 QString::fromUtf8("rights")); 00143 } 00144 } 00145 00146 QString Document::managingEditor() const 00147 { 00148 return extractElementTextNS(QString(), QString::fromUtf8("managingEditor")); 00149 } 00150 00151 QString Document::webMaster() const 00152 { 00153 return extractElementTextNS(QString(), QString::fromUtf8("webMaster")); 00154 } 00155 00156 time_t Document::pubDate() const 00157 { 00158 QString str = extractElementTextNS(QString(), QString::fromUtf8("pubDate")); 00159 00160 if (!str.isNull()) 00161 { 00162 return parseDate(str, RFCDate); 00163 } 00164 else 00165 { // if there is no pubDate, check for dc:date 00166 str = extractElementTextNS(dublinCoreNamespace(), QString::fromUtf8("date")); 00167 return parseDate(str, ISODate); 00168 } 00169 } 00170 00171 time_t Document::lastBuildDate() const 00172 { 00173 QString str = extractElementTextNS(QString(), QString::fromUtf8("lastBuildDate")); 00174 00175 return parseDate(str, RFCDate); 00176 } 00177 00178 QList<Category> Document::categories() const 00179 { 00180 QList<Category> categories; 00181 00182 QList<QDomElement> catNodes = elementsByTagNameNS(QString(), 00183 QString::fromUtf8("category")); 00184 QList<QDomElement>::ConstIterator it = catNodes.constBegin(); 00185 for ( ; it != catNodes.constEnd(); ++it) 00186 { 00187 categories.append(Category(*it)); 00188 } 00189 00190 return categories; 00191 } 00192 00193 QString Document::generator() const 00194 { 00195 return extractElementTextNS(QString(), QString::fromUtf8("generator")); 00196 } 00197 00198 QString Document::docs() const 00199 { 00200 return extractElementTextNS(QString(), QString::fromUtf8("docs")); 00201 } 00202 00203 Cloud Document::cloud() const 00204 { 00205 return Cloud(firstElementByTagNameNS(QString(), QString::fromUtf8("cloud"))); 00206 } 00207 00208 int Document::ttl() const 00209 { 00210 bool ok; 00211 int c; 00212 00213 QString text = extractElementTextNS(QString(), QString::fromUtf8("ttl")); 00214 c = text.toInt(&ok); 00215 return ok ? c : 0; 00216 } 00217 00218 Image Document::image() const 00219 { 00220 return Image(firstElementByTagNameNS(QString(), QString::fromUtf8("image"))); 00221 } 00222 00223 TextInput Document::textInput() const 00224 { 00225 TextInput ti = TextInput(firstElementByTagNameNS(QString(), QString::fromUtf8("textInput"))); 00226 00227 if (!ti.isNull()) 00228 return ti; 00229 00230 // Netscape's version of RSS 0.91 has textinput, not textInput 00231 return TextInput(firstElementByTagNameNS(QString(), QString::fromUtf8("textinput"))); 00232 } 00233 00234 QSet<int> Document::skipHours() const 00235 { 00236 QSet<int> skipHours; 00237 QDomElement skipHoursNode = firstElementByTagNameNS(QString(), 00238 QString::fromUtf8("skipHours")); 00239 if (!skipHoursNode.isNull()) 00240 { 00241 ElementWrapper skipHoursWrapper(skipHoursNode); 00242 bool ok = false; 00243 QList<QDomElement> hours = 00244 skipHoursWrapper.elementsByTagNameNS(QString(), 00245 QString::fromUtf8("hour")); 00246 QList<QDomElement>::ConstIterator it = hours.constBegin(); 00247 for ( ; it != hours.constEnd(); ++it) 00248 { 00249 int h = (*it).text().toInt(&ok); 00250 if (ok) 00251 skipHours.insert(h); 00252 } 00253 } 00254 00255 return skipHours; 00256 } 00257 00258 QSet<Document::DayOfWeek> Document::skipDays() const 00259 { 00260 QSet<DayOfWeek> skipDays; 00261 QDomElement skipDaysNode = firstElementByTagNameNS(QString(), QString::fromUtf8("skipDays")); 00262 if (!skipDaysNode.isNull()) 00263 { 00264 ElementWrapper skipDaysWrapper(skipDaysNode); 00265 QHash<QString, DayOfWeek> weekDays; 00266 00267 weekDays[QString::fromUtf8("Monday")] = Monday; 00268 weekDays[QString::fromUtf8("Tuesday")] = Tuesday; 00269 weekDays[QString::fromUtf8("Wednesday")] = Wednesday; 00270 weekDays[QString::fromUtf8("Thursday")] = Thursday; 00271 weekDays[QString::fromUtf8("Friday")] = Friday; 00272 weekDays[QString::fromUtf8("Saturday")] = Saturday; 00273 weekDays[QString::fromUtf8("Sunday")] = Sunday; 00274 00275 QList<QDomElement> days = skipDaysWrapper.elementsByTagNameNS(QString(), QString::fromUtf8("day")); 00276 for (QList<QDomElement>::ConstIterator it = days.constBegin(); it != days.constEnd(); ++it) 00277 { 00278 if (weekDays.contains((*it).text())) 00279 skipDays.insert(weekDays[(*it).text()]); 00280 } 00281 } 00282 00283 return skipDays; 00284 } 00285 00286 QList<Item> Document::items() const 00287 { 00288 QList<Item> items; 00289 00290 QList<QDomElement> itemNodes = elementsByTagNameNS(QString(), QString::fromUtf8("item")); 00291 00292 DocumentPtr doccpy(new Document(*this)); 00293 00294 for (QList<QDomElement>::ConstIterator it = itemNodes.constBegin(); it != itemNodes.constEnd(); ++it) 00295 { 00296 items.append(Item(*it, doccpy)); 00297 } 00298 00299 return items; 00300 } 00301 QList<QDomElement> Document::unhandledElements() const 00302 { 00303 // TODO: do not hardcode this list here 00304 QList<ElementType> handled; 00305 handled.append(QString::fromUtf8("title")); 00306 handled.append(QString::fromUtf8("link")); 00307 handled.append(QString::fromUtf8("description")); 00308 handled.append(QString::fromUtf8("language")); 00309 handled.append(QString::fromUtf8("copyright")); 00310 handled.append(QString::fromUtf8("managingEditor")); 00311 handled.append(QString::fromUtf8("webMaster")); 00312 handled.append(QString::fromUtf8("pubDate")); 00313 handled.append(QString::fromUtf8("lastBuildDate")); 00314 handled.append(QString::fromUtf8("skipDays")); 00315 handled.append(QString::fromUtf8("skipHours")); 00316 handled.append(QString::fromUtf8("item")); 00317 handled.append(QString::fromUtf8("textinput")); 00318 handled.append(QString::fromUtf8("textInput")); 00319 handled.append(QString::fromUtf8("image")); 00320 handled.append(QString::fromUtf8("ttl")); 00321 handled.append(QString::fromUtf8("generator")); 00322 handled.append(QString::fromUtf8("docs")); 00323 handled.append(QString::fromUtf8("cloud")); 00324 handled.append(ElementType(QString::fromUtf8("language"), dublinCoreNamespace())); 00325 handled.append(ElementType(QString::fromUtf8("rights"), dublinCoreNamespace())); 00326 handled.append(ElementType(QString::fromUtf8("date"), dublinCoreNamespace())); 00327 00328 QList<QDomElement> notHandled; 00329 00330 QDomNodeList children = element().childNodes(); 00331 for (int i = 0; i < children.size(); ++i) 00332 { 00333 QDomElement el = children.at(i).toElement(); 00334 if (!el.isNull() 00335 && !handled.contains(ElementType(el.localName(), el.namespaceURI()))) 00336 { 00337 notHandled.append(el); 00338 } 00339 } 00340 00341 return notHandled; 00342 } 00343 00344 QString Document::debugInfo() const 00345 { 00346 QString info; 00347 info += "### Document: ###################\n"; 00348 if (!title().isNull()) 00349 info += "title: #" + title() + "#\n"; 00350 if (!description().isNull()) 00351 info += "description: #" + description() + "#\n"; 00352 if (!link().isNull()) 00353 info += "link: #" + link() + "#\n"; 00354 if (!language().isNull()) 00355 info += "language: #" + language() + "#\n"; 00356 if (!copyright().isNull()) 00357 info += "copyright: #" + copyright() + "#\n"; 00358 if (!managingEditor().isNull()) 00359 info += "managingEditor: #" + managingEditor() + "#\n"; 00360 if (!webMaster().isNull()) 00361 info += "webMaster: #" + webMaster() + "#\n"; 00362 00363 QString dpubdate = dateTimeToString(pubDate()); 00364 if (!dpubdate.isNull()) 00365 info += "pubDate: #" + dpubdate + "#\n"; 00366 00367 QString dlastbuilddate = dateTimeToString(lastBuildDate()); 00368 if (!dlastbuilddate.isNull()) 00369 info += "lastBuildDate: #" + dlastbuilddate + "#\n"; 00370 00371 if (!textInput().isNull()) 00372 info += textInput().debugInfo(); 00373 if (!cloud().isNull()) 00374 info += cloud().debugInfo(); 00375 if (!image().isNull()) 00376 info += image().debugInfo(); 00377 00378 QList<Category> cats = categories(); 00379 00380 for (QList<Category>::ConstIterator it = cats.constBegin(); it != cats.constEnd(); ++it) 00381 info += (*it).debugInfo(); 00382 QList<Item> litems = items(); 00383 for (QList<Item>::ConstIterator it = litems.constBegin(); it != litems.constEnd(); ++it) 00384 info += (*it).debugInfo(); 00385 info += "### Document end ################\n"; 00386 return info; 00387 } 00388 00389 void Document::getItemTitleFormatInfo(bool* isCDATA, bool* containsMarkup) const 00390 { 00391 if (!d->itemTitlesGuessed) 00392 { 00393 QString titles; 00394 QList<Item> litems = items(); 00395 00396 if (litems.isEmpty()) 00397 { 00398 d->itemTitlesGuessed = true; 00399 return; 00400 } 00401 00402 QDomElement titleEl = (*litems.begin()).firstElementByTagNameNS(QString(), QString::fromUtf8("title")); 00403 d->itemTitleIsCDATA = titleEl.firstChild().isCDATASection(); 00404 00405 int nmax = litems.size() < 10 ? litems.size() : 10; // we check a maximum of 10 items 00406 int i = 0; 00407 00408 QList<Item>::ConstIterator it = litems.constBegin(); 00409 00410 while (i < nmax) 00411 { 00412 titles += (*it).originalTitle(); 00413 ++it; 00414 ++i; 00415 } 00416 00417 d->itemTitleContainsMarkup = stringContainsMarkup(titles); 00418 d->itemTitlesGuessed = true; 00419 } 00420 00421 if (isCDATA != 0L) 00422 *isCDATA = d->itemTitleIsCDATA; 00423 if (containsMarkup != 0L) 00424 *containsMarkup = d->itemTitleContainsMarkup; 00425 } 00426 00427 void Document::getItemDescriptionFormatInfo(bool* isCDATA, bool* containsMarkup) const 00428 { 00429 if (!d->itemDescGuessed) 00430 { 00431 QString desc; 00432 QList<Item> litems = items(); 00433 00434 00435 if (litems.isEmpty()) 00436 { 00437 d->itemDescGuessed = true; 00438 return; 00439 } 00440 00441 QDomElement descEl = (*litems.begin()).firstElementByTagNameNS(QString(), QString::fromUtf8("description")); 00442 d->itemDescriptionIsCDATA = descEl.firstChild().isCDATASection(); 00443 00444 int nmax = litems.size() < 10 ? litems.size() : 10; // we check a maximum of 10 items 00445 int i = 0; 00446 00447 QList<Item>::ConstIterator it = litems.constBegin(); 00448 00449 while (i < nmax) 00450 { 00451 desc += (*it).originalDescription(); 00452 ++it; 00453 ++i; 00454 } 00455 00456 d->itemDescriptionContainsMarkup = stringContainsMarkup(desc); 00457 d->itemDescGuessed = true; 00458 } 00459 00460 if (isCDATA != 0L) 00461 *isCDATA = d->itemDescriptionIsCDATA; 00462 if (containsMarkup != 0L) 00463 *containsMarkup = d->itemDescriptionContainsMarkup; 00464 } 00465 00466 bool Document::accept(DocumentVisitor* visitor) 00467 { 00468 return visitor->visitRSS2Document(this); 00469 } 00470 00471 } // namespace RSS2 00472 } // namespace Syndication