• Skip to content
  • Skip to link menu
KDE 4.6 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • KDE Home
  • Contact Us
 

kpimutils

linklocator.cpp
Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2002 Dave Corrie <kde@davecorrie.com>
00003 
00004   This library is free software; you can redistribute it and/or
00005   modify it under the terms of the GNU Library General Public
00006   License as published by the Free Software Foundation; either
00007   version 2 of the License, or (at your option) any later version.
00008 
00009   This library is distributed in the hope that it will be useful,
00010   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012   Library General Public License for more details.
00013 
00014   You should have received a copy of the GNU Library General Public License
00015   along with this library; see the file COPYING.LIB.  If not, write to
00016   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017   Boston, MA 02110-1301, USA.
00018 */
00029 #include "linklocator.h"
00030 
00031 #include <KEmoticons>
00032 
00033 #include <QtCore/QCoreApplication>
00034 #include <QtCore/QFile>
00035 #include <QtCore/QRegExp>
00036 #include <QtGui/QTextDocument>
00037 
00038 #include <climits>
00039 
00040 using namespace KPIMUtils;
00041 
00046 //@cond PRIVATE
00047 class KPIMUtils::LinkLocator::Private
00048 {
00049   public:
00050     int mMaxUrlLen;
00051     int mMaxAddressLen;
00052 };
00053 //@endcond
00054 
00055 // Use a static for this as calls to the KEmoticons constructor are expensive.
00056 K_GLOBAL_STATIC( KEmoticons, sEmoticons )
00057 
00058 LinkLocator::LinkLocator( const QString &text, int pos )
00059   : mText( text ), mPos( pos ), d( new KPIMUtils::LinkLocator::Private )
00060 {
00061   d->mMaxUrlLen = 4096;
00062   d->mMaxAddressLen = 255;
00063 
00064   // If you change either of the above values for maxUrlLen or
00065   // maxAddressLen, then please also update the documentation for
00066   // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
00067   // default values used for the maxUrlLen/maxAddressLen parameters
00068   // of convertToHtml().
00069 }
00070 
00071 LinkLocator::~LinkLocator()
00072 {
00073   delete d;
00074 }
00075 
00076 void LinkLocator::setMaxUrlLen( int length )
00077 {
00078   d->mMaxUrlLen = length;
00079 }
00080 
00081 int LinkLocator::maxUrlLen() const
00082 {
00083   return d->mMaxUrlLen;
00084 }
00085 
00086 void LinkLocator::setMaxAddressLen( int length )
00087 {
00088   d->mMaxAddressLen = length;
00089 }
00090 
00091 int LinkLocator::maxAddressLen() const
00092 {
00093   return d->mMaxAddressLen;
00094 }
00095 
00096 QString LinkLocator::getUrl()
00097 {
00098   QString url;
00099   if ( atUrl() ) {
00100     // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C
00101     // Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall
00102     // be allowed and should be ignored when the URI is extracted.
00103 
00104     // This implementation follows this recommendation and
00105     // allows the URL to be enclosed within different kind of brackets/quotes
00106     // If an URL is enclosed, whitespace characters are allowed and removed, otherwise
00107     // the URL ends with the first whitespace
00108     // Also, if the URL is enclosed in brackets, the URL itself is not allowed
00109     // to contain the closing bracket, as this would be detected as the end of the URL
00110 
00111     QChar beforeUrl, afterUrl;
00112 
00113     // detect if the url has been surrounded by brackets or quotes
00114     if ( mPos > 0 ) {
00115       beforeUrl = mText[mPos - 1];
00116 
00117       if ( beforeUrl == '(' ) {
00118         afterUrl = ')';
00119       } else if ( beforeUrl == '[' ) {
00120         afterUrl = ']';
00121       } else if ( beforeUrl == '<' ) {
00122         afterUrl = '>';
00123       } else if ( beforeUrl == '>' ) { // for e.g. <link>http://.....</link>
00124         afterUrl = '<';
00125       } else if ( beforeUrl == '"' ) {
00126         afterUrl = '"';
00127       }
00128     }
00129 
00130     url.reserve( maxUrlLen() );  // avoid allocs
00131     int start = mPos;
00132     while ( ( mPos < (int)mText.length() ) &&
00133             ( mText[mPos].isPrint() || mText[mPos].isSpace() ) &&
00134             ( ( afterUrl.isNull() && !mText[mPos].isSpace() ) ||
00135               ( !afterUrl.isNull() && mText[mPos] != afterUrl ) ) ) {
00136       if ( !mText[mPos].isSpace() ) {   // skip whitespace
00137         url.append( mText[mPos] );
00138         if ( url.length() > maxUrlLen() ) {
00139           break;
00140         }
00141       }
00142 
00143       mPos++;
00144     }
00145 
00146     if ( isEmptyUrl(url) || ( url.length() > maxUrlLen() ) ) {
00147       mPos = start;
00148       url = "";
00149     } else {
00150       --mPos;
00151     }
00152   }
00153 
00154   // HACK: This is actually against the RFC. However, most people don't properly escape the URL in
00155   //       their text with "" or <>. That leads to people writing an url, followed immediatley by
00156   //       a dot to finish the sentence. That would lead the parser to include the dot in the url,
00157   //       even though that is not wanted. So work around that here.
00158   //       Most real-life URLs hopefully don't end with dots or commas.
00159   if ( url.length() > 1 ) {
00160     QList<QChar> wordBoundaries;
00161     wordBoundaries << '.' << ',' << ':' << '!' << '?';
00162     if ( wordBoundaries.contains( url.at( url.length() - 1 ) ) ) {
00163       url.chop( 1 );
00164       --mPos;
00165     }
00166   }
00167 
00168   return url;
00169 }
00170 
00171 // keep this in sync with KMMainWin::slotUrlClicked()
00172 bool LinkLocator::atUrl() const
00173 {
00174   // the following characters are allowed in a dot-atom (RFC 2822):
00175   // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
00176   const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" );
00177 
00178   // the character directly before the URL must not be a letter, a number or
00179   // any other character allowed in a dot-atom (RFC 2822).
00180   if ( ( mPos > 0 ) &&
00181        ( mText[mPos-1].isLetterOrNumber() ||
00182          ( allowedSpecialChars.indexOf( mText[mPos-1] ) != -1 ) ) ) {
00183     return false;
00184   }
00185 
00186   QChar ch = mText[mPos];
00187   return
00188     ( ch == 'h' && ( mText.mid( mPos, 7 ) == "http://" ||
00189                      mText.mid( mPos, 8 ) == "https://" ) ) ||
00190     ( ch == 'v' && mText.mid( mPos, 6 ) == "vnc://" ) ||
00191     ( ch == 'f' && ( mText.mid( mPos, 7 ) == "fish://" ||
00192                      mText.mid( mPos, 6 ) == "ftp://" ||
00193                      mText.mid( mPos, 7 ) == "ftps://" ) ) ||
00194     ( ch == 's' && ( mText.mid( mPos, 7 ) == "sftp://" ||
00195                      mText.mid( mPos, 6 ) == "smb://" ) ) ||
00196     ( ch == 'm' && mText.mid( mPos, 7 ) == "mailto:" ) ||
00197     ( ch == 'w' && mText.mid( mPos, 4 ) == "www." ) ||
00198     ( ch == 'f' && ( mText.mid( mPos, 4 ) == "ftp." ||
00199                      mText.mid( mPos, 7 ) == "file://" ) ) ||
00200     ( ch == 'n' && mText.mid( mPos, 5 ) == "news:" );
00201 }
00202 
00203 bool LinkLocator::isEmptyUrl( const QString &url ) const
00204 {
00205   return url.isEmpty() ||
00206     url == "http://" ||
00207     url == "https://" ||
00208     url == "fish://" ||
00209     url == "ftp://" ||
00210     url == "ftps://" ||
00211     url == "sftp://" ||
00212     url == "smb://" ||
00213     url == "vnc://" ||
00214     url == "mailto" ||
00215     url == "www" ||
00216     url == "ftp" ||
00217     url == "news" ||
00218     url == "news://";
00219 }
00220 
00221 QString LinkLocator::getEmailAddress()
00222 {
00223   QString address;
00224 
00225   if ( mText[mPos] == '@' ) {
00226     // the following characters are allowed in a dot-atom (RFC 2822):
00227     // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
00228     const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" );
00229 
00230     // determine the local part of the email address
00231     int start = mPos - 1;
00232     while ( start >= 0 && mText[start].unicode() < 128 &&
00233             ( mText[start].isLetterOrNumber() ||
00234               mText[start] == '@' || // allow @ to find invalid email addresses
00235               allowedSpecialChars.indexOf( mText[start] ) != -1 ) ) {
00236       if ( mText[start] == '@' ) {
00237         return QString(); // local part contains '@' -> no email address
00238       }
00239       --start;
00240     }
00241     ++start;
00242     // we assume that an email address starts with a letter or a digit
00243     while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) {
00244       ++start;
00245     }
00246     if ( start == mPos ) {
00247       return QString(); // local part is empty -> no email address
00248     }
00249 
00250     // determine the domain part of the email address
00251     int dotPos = INT_MAX;
00252     int end = mPos + 1;
00253     while ( end < (int)mText.length() &&
00254             ( mText[end].isLetterOrNumber() ||
00255               mText[end] == '@' || // allow @ to find invalid email addresses
00256               mText[end] == '.' ||
00257               mText[end] == '-' ) ) {
00258       if ( mText[end] == '@' ) {
00259         return QString(); // domain part contains '@' -> no email address
00260       }
00261       if ( mText[end] == '.' ) {
00262         dotPos = qMin( dotPos, end ); // remember index of first dot in domain
00263       }
00264       ++end;
00265     }
00266     // we assume that an email address ends with a letter or a digit
00267     while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) {
00268       --end;
00269     }
00270     if ( end == mPos ) {
00271       return QString(); // domain part is empty -> no email address
00272     }
00273     if ( dotPos >= end ) {
00274       return QString(); // domain part doesn't contain a dot
00275     }
00276 
00277     if ( end - start > maxAddressLen() ) {
00278       return QString(); // too long -> most likely no email address
00279     }
00280     address = mText.mid( start, end - start );
00281 
00282     mPos = end - 1;
00283   }
00284   return address;
00285 }
00286 
00287 QString LinkLocator::convertToHtml( const QString &plainText, int flags,
00288                                     int maxUrlLen, int maxAddressLen )
00289 {
00290   LinkLocator locator( plainText );
00291   locator.setMaxUrlLen( maxUrlLen );
00292   locator.setMaxAddressLen( maxAddressLen );
00293 
00294   QString str;
00295   QString result( (QChar*)0, (int)locator.mText.length() * 2 );
00296   QChar ch;
00297   int x;
00298   bool startOfLine = true;
00299   QString emoticon;
00300 
00301   for ( locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length();
00302         locator.mPos++, x++ ) {
00303     ch = locator.mText[locator.mPos];
00304     if ( flags & PreserveSpaces ) {
00305       if ( ch == ' ' ) {
00306         if ( locator.mPos + 1 < locator.mText.length() ) {
00307           if ( locator.mText[locator.mPos + 1] != ' ' ) {
00308 
00309             // A single space, make it breaking if not at the start or end of the line
00310             const bool endOfLine = locator.mText[locator.mPos + 1] == '\n';
00311             if ( !startOfLine && !endOfLine ) {
00312               result += ' ';
00313             } else {
00314               result += "&nbsp;";
00315             }
00316           } else {
00317 
00318             // Whitespace of more than one space, make it all non-breaking
00319             while ( locator.mPos < locator.mText.length() && locator.mText[locator.mPos] == ' ' ) {
00320               result += "&nbsp;";
00321               locator.mPos++;
00322               x++;
00323             }
00324 
00325             // We incremented once to often, undo that
00326             locator.mPos--;
00327             x--;
00328           }
00329         } else {
00330           // Last space in the text, it is non-breaking
00331           result += "&nbsp;";
00332         }
00333 
00334         if ( startOfLine ) {
00335           startOfLine = false;
00336         }
00337         continue;
00338       } else if ( ch == '\t' ) {
00339         do
00340         {
00341           result += "&nbsp;";
00342           x++;
00343         }
00344         while ( ( x & 7 ) != 0 );
00345         x--;
00346         startOfLine = false;
00347         continue;
00348       }
00349     }
00350     if ( ch == '\n' ) {
00351       result += "<br />\n"; // Keep the \n, so apps can figure out the quoting levels correctly.
00352       startOfLine = true;
00353       x = -1;
00354       continue;
00355     }
00356 
00357     startOfLine = false;
00358     if ( ch == '&' ) {
00359       result += "&amp;";
00360     } else if ( ch == '"' ) {
00361       result += "&quot;";
00362     } else if ( ch == '<' ) {
00363       result += "&lt;";
00364     } else if ( ch == '>' ) {
00365       result += "&gt;";
00366     } else {
00367       const int start = locator.mPos;
00368       if ( !( flags & IgnoreUrls ) ) {
00369         str = locator.getUrl();
00370         if ( !str.isEmpty() ) {
00371           QString hyperlink;
00372           if ( str.left( 4 ) == "www." ) {
00373             hyperlink = "http://" + str;
00374           } else if ( str.left( 4 ) == "ftp." ) {
00375             hyperlink = "ftp://" + str;
00376           } else {
00377             hyperlink = str;
00378           }
00379 
00380           result += "<a href=\"" + hyperlink + "\">" + Qt::escape( str ) + "</a>";
00381           x += locator.mPos - start;
00382           continue;
00383         }
00384         str = locator.getEmailAddress();
00385         if ( !str.isEmpty() ) {
00386           // len is the length of the local part
00387           int len = str.indexOf( '@' );
00388           QString localPart = str.left( len );
00389 
00390           // remove the local part from the result (as '&'s have been expanded to
00391           // &amp; we have to take care of the 4 additional characters per '&')
00392           result.truncate( result.length() -
00393                            len - ( localPart.count( '&' ) * 4 ) );
00394           x -= len;
00395 
00396           result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
00397           x += str.length() - 1;
00398           continue;
00399         }
00400       }
00401       if ( flags & HighlightText ) {
00402         str = locator.highlightedText();
00403         if ( !str.isEmpty() ) {
00404           result += str;
00405           x += locator.mPos - start;
00406           continue;
00407         }
00408       }
00409       result += ch;
00410     }
00411   }
00412 
00413   if ( flags & ReplaceSmileys ) {
00414     QStringList exclude;
00415     exclude << "(c)" << "(C)" << "&gt;:-(" << "&gt;:(" << "(B)" << "(b)" << "(P)" << "(p)";
00416     exclude << "(O)" << "(o)" << "(D)" << "(d)" << "(E)" << "(e)" << "(K)" << "(k)";
00417     exclude << "(I)" << "(i)" << "(L)" << "(l)" << "(8)" << "(T)" << "(t)" << "(G)";
00418     exclude << "(g)" << "(F)" << "(f)" << "(H)";
00419     exclude << "8)" << "(N)" << "(n)" << "(Y)" << "(y)" << "(U)" << "(u)" << "(W)" << "(w)";
00420     static QString cachedEmoticonsThemeName;
00421     if ( cachedEmoticonsThemeName.isEmpty() ) {
00422       cachedEmoticonsThemeName = KEmoticons::currentThemeName();
00423     }
00424     result =
00425       sEmoticons->theme( cachedEmoticonsThemeName ).parseEmoticons(
00426         result, KEmoticonsTheme::StrictParse | KEmoticonsTheme::SkipHTML, exclude );
00427   }
00428 
00429   return result;
00430 }
00431 
00432 QString LinkLocator::pngToDataUrl( const QString &iconPath )
00433 {
00434   if ( iconPath.isEmpty() ) {
00435     return QString();
00436   }
00437 
00438   QFile pngFile( iconPath );
00439   if ( !pngFile.open( QIODevice::ReadOnly | QIODevice::Unbuffered ) ) {
00440     return QString();
00441   }
00442 
00443   QByteArray ba = pngFile.readAll();
00444   pngFile.close();
00445   return QString::fromLatin1( "data:image/png;base64,%1" ).arg( ba.toBase64().constData() );
00446 }
00447 
00448 QString LinkLocator::highlightedText()
00449 {
00450   // formating symbols must be prepended with a whitespace
00451   if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) {
00452     return QString();
00453   }
00454 
00455   const QChar ch = mText[mPos];
00456   if ( ch != '/' && ch != '*' && ch != '_' ) {
00457     return QString();
00458   }
00459 
00460   QRegExp re =
00461     QRegExp( QString( "\\%1((\\w+)([\\s-']\\w+)*( ?[,.:\\?!;])?)\\%2" ).arg( ch ).arg( ch ) );
00462   re.setMinimal(true);
00463   if ( re.indexIn( mText, mPos ) == mPos ) {
00464     int length = re.matchedLength();
00465     // there must be a whitespace after the closing formating symbol
00466     if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) {
00467       return QString();
00468     }
00469     mPos += length - 1;
00470     switch ( ch.toLatin1() ) {
00471     case '*':
00472       return "<b>" + re.cap( 1 ) + "</b>";
00473     case '_':
00474       return "<u>" + re.cap( 1 ) + "</u>";
00475     case '/':
00476       return "<i>" + re.cap( 1 ) + "</i>";
00477     }
00478   }
00479   return QString();
00480 }

kpimutils

Skip menu "kpimutils"
  • Main Page
  • Modules
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.7.4
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal