• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.10.2 API Reference
  • KDE Home
  • Contact Us
 

KHTML

  • khtml
khtml_filter.cpp
Go to the documentation of this file.
1 /* This file is part of the KDE project
2 
3  Copyright (C) 2005 Ivor Hewitt <ivor@kde.org>
4  Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
5  Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Library General Public
9  License as published by the Free Software Foundation; either
10  version 2 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Library General Public License for more details.
16 
17  You should have received a copy of the GNU Library General Public License
18  along with this library; see the file COPYING.LIB. If not, write to
19  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  Boston, MA 02110-1301, USA.
21 */
22 
23 #include "khtml_filter_p.h"
24 #include <QDebug>
25 
26 // rolling hash parameters
27 #define HASH_P (1997)
28 #define HASH_Q (17509)
29 // HASH_MOD = (HASH_P^7) % HASH_Q
30 #define HASH_MOD (523)
31 
32 namespace khtml {
33 
34 // We only want a subset of features of wildcards -- just the
35 // star, so we escape the rest before passing to QRegExp.
36 // The \ is escaped due to a QRegExp bug.
37 // ### we really should rather parse it ourselves, in order to
38 // handle adblock-special things like | and ^ properly.
39 static QRegExp fromAdBlockWildcard(const QString& wcStr) {
40  QRegExp rx;
41  rx.setPatternSyntax(QRegExp::Wildcard);
42 
43  QString out;
44  for (int p = 0; p < wcStr.length(); ++p) {
45  QChar c = wcStr[p];
46  if (c == QLatin1Char('?'))
47  out += QLatin1String("[?]");
48  else if (c == QLatin1Char('['))
49  out += QLatin1String("[[]");
50  else if (c == QLatin1Char('\\'))
51  out += QLatin1String("[\\]");
52  else
53  out += c;
54  }
55 
56  rx.setPattern(out);
57  return rx;
58 }
59 
60 void FilterSet::addFilter(const QString& filterStr)
61 {
62  QString filter = filterStr;
63 
65  QChar firstChar = filter.at(0);
66  if (firstChar == QLatin1Char('[') || firstChar == QLatin1Char('!') || firstChar == QLatin1Char('&') || firstChar == QLatin1Char('#') || filter.contains(QLatin1Char('#')))
67  return;
68 
69  // Strip leading @@
70  int first = 0;
71  int last = filter.length() - 1;
72  if (filter.startsWith(QLatin1String("@@")))
73  first = 2;
74 
75  // Strip options, we ignore them for now.
76  int dollar = filter.lastIndexOf(QLatin1Char('$'));
77  if (dollar != -1) {
78  last = dollar - 1;
79  // If only "*" is left after ignoring the options, disregard the rule.
80  if (first == last && firstChar == QLatin1Char('*'))
81  return;
82  }
83 
84  // Perhaps nothing left?
85  if (first > last)
86  return;
87 
88  filter = filter.mid(first, last - first + 1);
89 
90  // Is it a regexp filter?
91  if (filter.length()>2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/')))
92  {
93  QString inside = filter.mid(1, filter.length()-2);
94  QRegExp rx(inside);
95  reFilters.append(rx);
96 // qDebug() << "R:" << inside;
97  }
98  else
99  {
100  // Nope, a wildcard one.
101  // Note: For these, we also need to handle |.
102 
103  // Strip wildcards at the ends
104  first = 0;
105  last = filter.length() - 1;
106 
107  while (first < filter.length() && filter[first] == QLatin1Char('*'))
108  ++first;
109 
110  while (last >= 0 && filter[last] == QLatin1Char('*'))
111  --last;
112 
113  if (first > last)
114  filter = QLatin1String("*"); // erm... Well, they asked for it.
115  else
116  filter = filter.mid(first, last - first + 1);
117 
118  // Now, do we still have any wildcard stuff left?
119  if (filter.contains("*"))
120  {
121  // check if we can use RK first (and then check full RE for the rest) for better performance
122  int aPos = filter.indexOf('*');
123  if (aPos < 0)
124  aPos = filter.length();
125  if (aPos > 7) {
126  QRegExp rx = fromAdBlockWildcard(filter.mid(aPos) + QLatin1Char('*'));
127  // We pad the final r.e. with * so we can check for an exact match
128  stringFiltersMatcher.addWildedString(filter.mid(0, aPos), rx);
129  } else {
130  QRegExp rx = fromAdBlockWildcard(filter);
131  reFilters.append(rx);
132  }
133  }
134  else
135  {
136  // Fast path
137  stringFiltersMatcher.addString(filter);
138  }
139  }
140 }
141 
142 bool FilterSet::isUrlMatched(const QString& url)
143 {
144  if (stringFiltersMatcher.isMatched(url))
145  return true;
146 
147  for (int c = 0; c < reFilters.size(); ++c)
148  {
149  if (url.contains(reFilters[c]))
150  return true;
151  }
152 
153  return false;
154 }
155 
156 QString FilterSet::urlMatchedBy(const QString& url)
157 {
158  QString by;
159 
160  if (stringFiltersMatcher.isMatched(url, &by))
161  return by;
162 
163  for (int c = 0; c < reFilters.size(); ++c)
164  {
165  if (url.contains(reFilters[c]))
166  {
167  by = reFilters[c].pattern();
168  break;
169  }
170  }
171 
172  return by;
173 }
174 
175 void FilterSet::clear()
176 {
177  reFilters.clear();
178  stringFiltersMatcher.clear();
179 }
180 
181 
182 void StringsMatcher::addString(const QString& pattern)
183 {
184  if (pattern.length() < 8) {
185  // handle short string differently
186  shortStringFilters.append(pattern);
187  } else {
188  // use modified Rabin-Karp's algorithm with 8-length string hash
189  // i.e. store hash of first 8 chars in the HashMap for fast look-up
190  stringFilters.append(pattern);
191  int ind = stringFilters.size() - 1;
192  int current = 0;
193 
194  // compute hash using rolling hash
195  // hash for string: x0,x1,x2...xn-1 will be:
196  // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q
197  // where p and q some wisely-chosen integers
198  /*for (int k = 0; k < 8; ++k)*/
199  int len = pattern.length();
200  for (int k = len - 8; k < len; ++k)
201  current = (current * HASH_P + pattern[k].unicode()) % HASH_Q;
202 
203  // insert computed hash value into HashMap
204  WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
205  if (it == stringFiltersHash.end()) {
206  QVector<int> list;
207  list.append(ind);
208  stringFiltersHash.add(current + 1, list);
209  fastLookUp.setBit(current);
210  } else {
211  it->second.append(ind);
212  }
213  }
214 }
215 
216 void StringsMatcher::addWildedString(const QString& prefix, const QRegExp& rx)
217 {
218  rePrefixes.append(prefix);
219  reFilters.append(rx);
220  int index = -rePrefixes.size();
221 
222  int current = 0;
223  for (int k = 0; k < 8; ++k)
224  current = (current * HASH_P + prefix[k].unicode()) % HASH_Q;
225 
226  // insert computed hash value into HashMap
227  WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
228  if (it == stringFiltersHash.end()) {
229  QVector<int> list;
230  list.append(index);
231  stringFiltersHash.add(current + 1, list);
232  fastLookUp.setBit(current);
233  } else {
234  it->second.append(index);
235  }
236 }
237 
238 bool StringsMatcher::isMatched(const QString& str, QString *by) const
239 {
240  // check short strings first
241  for (int i = 0; i < shortStringFilters.size(); ++i) {
242  if (str.contains(shortStringFilters[i]))
243  {
244  if (by != 0) *by = shortStringFilters[i];
245  return true;
246  }
247  }
248 
249  int len = str.length();
250  int k;
251 
252  int current = 0;
253  int next = 0;
254  // compute hash for first 8 characters
255  for (k = 0; k < 8 && k < len; ++k)
256  current = (current * HASH_P + str[k].unicode()) % HASH_Q;
257 
258  WTF::HashMap<int, QVector<int> >::const_iterator hashEnd = stringFiltersHash.end();
259  // main Rabin-Karp's algorithm loop
260  for (k = 7; k < len; ++k, current = next) {
261  // roll the hash if not at the end
262  // (calculate hash for the next iteration)
263  if (k + 1 < len)
264  next = (HASH_P * ((current + HASH_Q - ((HASH_MOD * str[k - 7].unicode()) % HASH_Q)) % HASH_Q) + str[k + 1].unicode()) % HASH_Q;
265 
266  if (!fastLookUp.testBit(current))
267  continue;
268 
269  // look-up the hash in the HashMap and check all strings
270  WTF::HashMap<int, QVector<int> >::const_iterator it = stringFiltersHash.find(current + 1);
271 
272  // check possible strings
273  if (it != hashEnd) {
274  for (int j = 0; j < it->second.size(); ++j) {
275  int index = it->second[j];
276  // check if we got simple string or REs prefix
277  if (index >= 0) {
278  int flen = stringFilters[index].length();
279  if (k - flen + 1 >= 0 && stringFilters[index] == str.midRef(k - flen + 1 , flen))
280  {
281  if (by != 0) *by = stringFilters[index];
282  return true;
283  }
284  } else {
285  index = -index - 1;
286  int flen = rePrefixes[index].length();
287  if (k - 8 + flen < len && rePrefixes[index] == str.midRef(k - 7, flen))
288  {
289  int remStart = k - 7 + flen;
290  QString remainder = QString::fromRawData(str.unicode() + remStart,
291  str.length() - remStart);
292  if (reFilters[index].exactMatch(remainder)) {
293  if (by != 0) *by = rePrefixes[index]+reFilters[index].pattern();
294  return true;
295  }
296  }
297  }
298  }
299  }
300  }
301 
302  return false;
303 }
304 
305 void StringsMatcher::clear()
306 {
307  stringFilters.clear();
308  shortStringFilters.clear();
309  reFilters.clear();
310  rePrefixes.clear();
311  stringFiltersHash.clear();
312  fastLookUp.resize(HASH_Q);
313  fastLookUp.fill(0, 0, HASH_Q);
314 }
315 
316 }
317 
318 // kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;
This file is part of the KDE documentation.
Documentation copyright © 1996-2013 The KDE developers.
Generated on Sun Apr 28 2013 14:30:08 by doxygen 1.8.1.1 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KHTML

Skip menu "KHTML"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdelibs-4.10.2 API Reference

Skip menu "kdelibs-4.10.2 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal