kmimemagic.cpp
00001 /* This file is part of the KDE libraries 00002 Copyright (C) 2000 Fritz Elfert <fritz@kde.org> 00003 Copyright (C) 2004 Allan Sandfeld Jensen <kde@carewolf.com> 00004 00005 This library is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU Library General Public 00007 License version 2 as published by the Free Software Foundation. 00008 00009 This library is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 Library General Public License for more details. 00013 00014 You should have received a copy of the GNU Library General Public License 00015 along with this library; see the file COPYING.LIB. If not, write to 00016 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00017 Boston, MA 02110-1301, USA. 00018 */ 00019 #include "kmimemagic.h" 00020 #include <kdebug.h> 00021 #include <kapplication.h> 00022 #include <qfile.h> 00023 #include <ksimpleconfig.h> 00024 #include <kstandarddirs.h> 00025 #include <kstaticdeleter.h> 00026 #include <klargefile.h> 00027 #include <assert.h> 00028 00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb); 00030 static void process(struct config_rec* conf, const QString &); 00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes); 00032 static int tagmagic(unsigned char *buf, int nbytes); 00033 static int textmagic(struct config_rec* conf, unsigned char *, int); 00034 00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb); 00036 static int match(struct config_rec* conf, unsigned char *, int); 00037 00038 KMimeMagic* KMimeMagic::s_pSelf; 00039 static KStaticDeleter<KMimeMagic> kmimemagicsd; 00040 00041 KMimeMagic* KMimeMagic::self() 00042 { 00043 if( !s_pSelf ) 00044 initStatic(); 00045 return s_pSelf; 00046 } 00047 00048 void KMimeMagic::initStatic() 00049 { 00050 s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() ); 00051 s_pSelf->setFollowLinks( true ); 00052 } 00053 00054 #include <stdio.h> 00055 #include <unistd.h> 00056 #include <stdlib.h> 00057 #include <sys/wait.h> 00058 #include <sys/types.h> 00059 #include <sys/stat.h> 00060 #include <fcntl.h> 00061 #include <errno.h> 00062 #include <ctype.h> 00063 #include <time.h> 00064 #include <utime.h> 00065 #include <stdarg.h> 00066 #include <qregexp.h> 00067 #include <qstring.h> 00068 00069 //#define MIME_MAGIC_DEBUG_TABLE // untested 00070 00071 // Uncomment to debug the config-file parsing phase 00072 //#define DEBUG_APPRENTICE 00073 // Uncomment to debug the matching phase 00074 //#define DEBUG_MIMEMAGIC 00075 00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE) 00077 #define DEBUG_LINENUMBERS 00078 #endif 00079 00080 /* 00081 * Buitltin Mime types 00082 */ 00083 #define MIME_BINARY_UNKNOWN "application/octet-stream" 00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable" 00085 #define MIME_BINARY_ZEROSIZE "application/x-zerosize" 00086 #define MIME_TEXT_UNKNOWN "text/plain" 00087 #define MIME_TEXT_PLAIN "text/plain" 00088 #define MIME_INODE_DIR "inode/directory" 00089 #define MIME_INODE_CDEV "inode/chardevice" 00090 #define MIME_INODE_BDEV "inode/blockdevice" 00091 #define MIME_INODE_FIFO "inode/fifo" 00092 #define MIME_INODE_LINK "inode/link" 00093 #define MIME_INODE_SOCK "inode/socket" 00094 // Following should go in magic-file - Fritz 00095 #define MIME_APPL_TROFF "application/x-troff" 00096 #define MIME_APPL_TAR "application/x-tar" 00097 #define MIME_TEXT_FORTRAN "text/x-fortran" 00098 00099 #define MAXMIMESTRING 256 00100 00101 #define HOWMANY 4000 /* big enough to recognize most WWW files, and skip GPL-headers */ 00102 #define MAXDESC 50 /* max leng of text description */ 00103 #define MAXstring 64 /* max leng of "string" types */ 00104 00105 typedef union VALUETYPE { 00106 unsigned char b; 00107 unsigned short h; 00108 unsigned long l; 00109 char s[MAXstring]; 00110 unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ 00111 unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ 00112 } VALUETYPE; 00113 00114 struct magic { 00115 struct magic *next; /* link to next entry */ 00116 #ifdef DEBUG_LINENUMBERS 00117 int lineno; /* line number from magic file - doesn't say from which one ;) */ 00118 #endif 00119 00120 short flag; 00121 #define INDIR 1 /* if '>(...)' appears, */ 00122 #define UNSIGNED 2 /* comparison is unsigned */ 00123 short cont_level; /* level of ">" */ 00124 struct { 00125 char type; /* byte short long */ 00126 long offset; /* offset from indirection */ 00127 } in; 00128 long offset; /* offset to magic number */ 00129 unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ 00130 char type; /* int, short, long or string. */ 00131 char vallen; /* length of string value, if any */ 00132 #define BYTE 1 00133 #define SHORT 2 00134 #define LONG 4 00135 #define STRING 5 00136 #define DATE 6 00137 #define BESHORT 7 00138 #define BELONG 8 00139 #define BEDATE 9 00140 #define LESHORT 10 00141 #define LELONG 11 00142 #define LEDATE 12 00143 VALUETYPE value; /* either number or string */ 00144 unsigned long mask; /* mask before comparison with value */ 00145 char nospflag; /* suppress space character */ 00146 00147 /* NOTE: this string is suspected of overrunning - find it! */ 00148 char desc[MAXDESC]; /* description */ 00149 }; 00150 00151 /* 00152 * data structures for tar file recognition 00153 * -------------------------------------------------------------------------- 00154 * Header file for public domain tar (tape archive) program. 00155 * 00156 * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John 00157 * Gilmore, ihnp4!hoptoad!gnu. 00158 * 00159 * Header block on tape. 00160 * 00161 * I'm going to use traditional DP naming conventions here. A "block" is a big 00162 * chunk of stuff that we do I/O on. A "record" is a piece of info that we 00163 * care about. Typically many "record"s fit into a "block". 00164 */ 00165 #define RECORDSIZE 512 00166 #define NAMSIZ 100 00167 #define TUNMLEN 32 00168 #define TGNMLEN 32 00169 00170 union record { 00171 char charptr[RECORDSIZE]; 00172 struct header { 00173 char name[NAMSIZ]; 00174 char mode[8]; 00175 char uid[8]; 00176 char gid[8]; 00177 char size[12]; 00178 char mtime[12]; 00179 char chksum[8]; 00180 char linkflag; 00181 char linkname[NAMSIZ]; 00182 char magic[8]; 00183 char uname[TUNMLEN]; 00184 char gname[TGNMLEN]; 00185 char devmajor[8]; 00186 char devminor[8]; 00187 } header; 00188 }; 00189 00190 /* The magic field is filled with this if uname and gname are valid. */ 00191 #define TMAGIC "ustar " /* 7 chars and a null */ 00192 00193 /* 00194 * file-function prototypes 00195 */ 00196 static int is_tar(unsigned char *, int); 00197 static unsigned long signextend(struct magic *, unsigned long); 00198 static int getvalue(struct magic *, char **); 00199 static int hextoint(int); 00200 static char *getstr(char *, char *, int, int *); 00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int); 00202 static int mcheck(union VALUETYPE *, struct magic *); 00203 static int mconvert(union VALUETYPE *, struct magic *); 00204 static long from_oct(int, char *); 00205 00206 /* 00207 * includes for ASCII substring recognition formerly "names.h" in file 00208 * command 00209 * 00210 * Original notes: names and types used by ascmagic in file(1). 00211 * These tokens are 00212 * here because they can appear anywhere in the first HOWMANY bytes, while 00213 * tokens in /etc/magic must appear at fixed offsets into the file. Don't 00214 * make HOWMANY too high unless you have a very fast CPU. 00215 */ 00216 00217 /* these types are used calculate index to 'types': keep em in sync! */ 00218 /* HTML inserted in first because this is a web server module now */ 00219 /* ENG removed because stupid */ 00220 #define L_HTML 0x001 /* HTML */ 00221 #define L_C 0x002 /* first and foremost on UNIX */ 00222 #define L_MAKE 0x004 /* Makefiles */ 00223 #define L_PLI 0x008 /* PL/1 */ 00224 #define L_MACH 0x010 /* some kinda assembler */ 00225 #define L_PAS 0x020 /* Pascal */ 00226 #define L_JAVA 0x040 /* Java source */ 00227 #define L_CPP 0x080 /* C++ */ 00228 #define L_MAIL 0x100 /* Electronic mail */ 00229 #define L_NEWS 0x200 /* Usenet Netnews */ 00230 #define L_DIFF 0x400 /* Output of diff */ 00231 #define L_OBJC 0x800 /* Objective C */ 00232 00233 // Note: this is not a type, it's just used to mark items that should count more 00234 #define FLAG_STRONG 0x1000 00235 00236 #define P_HTML 0 /* HTML */ 00237 #define P_C 1 /* first and foremost on UNIX */ 00238 #define P_MAKE 2 /* Makefiles */ 00239 #define P_PLI 3 /* PL/1 */ 00240 #define P_MACH 4 /* some kinda assembler */ 00241 #define P_PAS 5 /* Pascal */ 00242 #define P_JAVA 6 /* Java source */ 00243 #define P_CPP 7 /* C++ */ 00244 #define P_MAIL 8 /* Electronic mail */ 00245 #define P_NEWS 9 /* Usenet Netnews */ 00246 #define P_DIFF 10 /* Output of diff */ 00247 #define P_OBJC 11 /* Objective C */ 00248 00249 typedef struct asc_type { 00250 const char *type; 00251 int kwords; 00252 double weight; 00253 } asc_type; 00254 00255 static const asc_type types[] = { 00256 { "text/html", 19, 2 }, // 10 items but 10 different words only 00257 { "text/x-c", 13, 1 }, 00258 { "text/x-makefile", 4, 1.9 }, 00259 { "text/x-pli", 1, 3 }, 00260 { "text/x-assembler", 6, 2.1 }, 00261 { "text/x-pascal", 1, 1 }, 00262 { "text/x-java", 12, 1 }, 00263 { "text/x-c++", 19, 1 }, 00264 { "message/rfc822", 4, 1.9 }, 00265 { "message/news", 3, 2 }, 00266 { "text/x-diff", 4, 2 }, 00267 { "text/x-objc", 10, 1 } 00268 }; 00269 00270 #define NTYPES (sizeof(types)/sizeof(asc_type)) 00271 00272 static struct names { 00273 const char *name; 00274 short type; 00275 } const names[] = { 00276 { 00277 "<html", L_HTML | FLAG_STRONG 00278 }, 00279 { 00280 "<HTML", L_HTML | FLAG_STRONG 00281 }, 00282 { 00283 "<head", L_HTML 00284 }, 00285 { 00286 "<HEAD", L_HTML 00287 }, 00288 { 00289 "<body", L_HTML 00290 }, 00291 { 00292 "<BODY", L_HTML 00293 }, 00294 { 00295 "<title", L_HTML 00296 }, 00297 { 00298 "<TITLE", L_HTML 00299 }, 00300 { 00301 "<h1", L_HTML 00302 }, 00303 { 00304 "<H1", L_HTML 00305 }, 00306 { 00307 "<a", L_HTML 00308 }, 00309 { 00310 "<A", L_HTML 00311 }, 00312 { 00313 "<img", L_HTML 00314 }, 00315 { 00316 "<IMG", L_HTML 00317 }, 00318 { 00319 "<!--", L_HTML 00320 }, 00321 { 00322 "<!doctype", L_HTML 00323 }, 00324 { 00325 "<!DOCTYPE", L_HTML 00326 }, 00327 { 00328 "<div", L_HTML 00329 }, 00330 { 00331 "<DIV", L_HTML 00332 }, 00333 { 00334 "<frame", L_HTML 00335 }, 00336 { 00337 "<FRAME", L_HTML 00338 }, 00339 { 00340 "<frameset", L_HTML 00341 }, 00342 { 00343 "<FRAMESET", L_HTML 00344 }, 00345 { 00346 "<script", L_HTML | FLAG_STRONG 00347 }, 00348 { 00349 "<SCRIPT", L_HTML | FLAG_STRONG 00350 }, 00351 { 00352 "/*", L_C|L_CPP|L_JAVA|L_OBJC 00353 }, 00354 { 00355 "//", L_C|L_CPP|L_JAVA|L_OBJC 00356 }, 00357 { 00358 "#include", L_C|L_CPP 00359 }, 00360 { 00361 "#ifdef", L_C|L_CPP 00362 }, 00363 { 00364 "#ifndef", L_C|L_CPP 00365 }, 00366 { 00367 "bool", L_C|L_CPP 00368 }, 00369 { 00370 "char", L_C|L_CPP|L_JAVA|L_OBJC 00371 }, 00372 { 00373 "int", L_C|L_CPP|L_JAVA|L_OBJC 00374 }, 00375 { 00376 "float", L_C|L_CPP|L_JAVA|L_OBJC 00377 }, 00378 { 00379 "void", L_C|L_CPP|L_JAVA|L_OBJC 00380 }, 00381 { 00382 "extern", L_C|L_CPP 00383 }, 00384 { 00385 "struct", L_C|L_CPP 00386 }, 00387 { 00388 "union", L_C|L_CPP 00389 }, 00390 { 00391 "implements", L_JAVA 00392 }, 00393 { 00394 "super", L_JAVA 00395 }, 00396 { 00397 "import", L_JAVA 00398 }, 00399 { 00400 "class", L_CPP|L_JAVA 00401 }, 00402 { 00403 "public", L_CPP|L_JAVA 00404 }, 00405 { 00406 "private", L_CPP|L_JAVA 00407 }, 00408 { 00409 "explicit", L_CPP 00410 }, 00411 { 00412 "virtual", L_CPP 00413 }, 00414 { 00415 "namespace", L_CPP 00416 }, 00417 { 00418 "#import", L_OBJC 00419 }, 00420 { 00421 "@interface", L_OBJC 00422 }, 00423 { 00424 "@implementation", L_OBJC 00425 }, 00426 { 00427 "@protocol", L_OBJC 00428 }, 00429 { 00430 "CFLAGS", L_MAKE 00431 }, 00432 { 00433 "LDFLAGS", L_MAKE 00434 }, 00435 { 00436 "all:", L_MAKE 00437 }, 00438 { 00439 ".PHONY:", L_MAKE 00440 }, 00441 { 00442 "srcdir", L_MAKE 00443 }, 00444 { 00445 "exec_prefix", L_MAKE 00446 }, 00447 /* 00448 * Too many files of text have these words in them. Find another way 00449 * to recognize Fortrash. 00450 */ 00451 { 00452 ".ascii", L_MACH 00453 }, 00454 { 00455 ".asciiz", L_MACH 00456 }, 00457 { 00458 ".byte", L_MACH 00459 }, 00460 { 00461 ".even", L_MACH 00462 }, 00463 { 00464 ".globl", L_MACH 00465 }, 00466 { 00467 "clr", L_MACH 00468 }, 00469 { 00470 "(input", L_PAS 00471 }, 00472 { 00473 "dcl", L_PLI 00474 }, 00475 { 00476 "Received:", L_MAIL 00477 }, 00478 /* we now stop at '>' for tokens, so this one won't work { 00479 ">From", L_MAIL 00480 },*/ 00481 { 00482 "Return-Path:", L_MAIL 00483 }, 00484 { 00485 "Cc:", L_MAIL 00486 }, 00487 { 00488 "Newsgroups:", L_NEWS 00489 }, 00490 { 00491 "Path:", L_NEWS 00492 }, 00493 { 00494 "Organization:", L_NEWS 00495 }, 00496 { 00497 "---", L_DIFF 00498 }, 00499 { 00500 "+++", L_DIFF 00501 }, 00502 { 00503 "***", L_DIFF 00504 }, 00505 { 00506 "@@", L_DIFF 00507 }, 00508 { 00509 NULL, 0 00510 } 00511 }; 00512 00523 class KMimeMagicUtimeConf 00524 { 00525 public: 00526 KMimeMagicUtimeConf() 00527 { 00528 tmpDirs << QString::fromLatin1("/tmp"); // default value 00529 00530 // The trick is that we also don't want the user to override globally set 00531 // directories. So we have to misuse KStandardDirs :} 00532 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" ); 00533 if ( !confDirs.isEmpty() ) 00534 { 00535 QString globalConf = confDirs.last() + "kmimemagicrc"; 00536 if ( QFile::exists( globalConf ) ) 00537 { 00538 KSimpleConfig cfg( globalConf ); 00539 cfg.setGroup( "Settings" ); 00540 tmpDirs = cfg.readListEntry( "atimeDirs" ); 00541 } 00542 if ( confDirs.count() > 1 ) 00543 { 00544 QString localConf = confDirs.first() + "kmimemagicrc"; 00545 if ( QFile::exists( localConf ) ) 00546 { 00547 KSimpleConfig cfg( localConf ); 00548 cfg.setGroup( "Settings" ); 00549 tmpDirs += cfg.readListEntry( "atimeDirs" ); 00550 } 00551 } 00552 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) 00553 { 00554 QString dir = *it; 00555 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' ) 00556 (*it) += '/'; 00557 } 00558 } 00559 #if 0 00560 // debug code 00561 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) 00562 kdDebug(7018) << " atimeDir: " << *it << endl; 00563 #endif 00564 } 00565 00566 bool restoreAccessTime( const QString & file ) const 00567 { 00568 QString dir = file.left( file.findRev( '/' ) ); 00569 bool res = tmpDirs.contains( dir ); 00570 //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl; 00571 return res; 00572 } 00573 QStringList tmpDirs; 00574 }; 00575 00576 /* current config */ 00577 struct config_rec { 00578 bool followLinks; 00579 QString resultBuf; 00580 int accuracy; 00581 00582 struct magic *magic, /* head of magic config list */ 00583 *last; 00584 KMimeMagicUtimeConf * utimeConf; 00585 }; 00586 00587 #ifdef MIME_MAGIC_DEBUG_TABLE 00588 static void 00589 test_table() 00590 { 00591 struct magic *m; 00592 struct magic *prevm = NULL; 00593 00594 kdDebug(7018) << "test_table : started" << endl; 00595 for (m = conf->magic; m; m = m->next) { 00596 if (isprint((((unsigned long) m) >> 24) & 255) && 00597 isprint((((unsigned long) m) >> 16) & 255) && 00598 isprint((((unsigned long) m) >> 8) & 255) && 00599 isprint(((unsigned long) m) & 255)) { 00600 //debug("test_table: POINTER CLOBBERED! " 00601 //"m=\"%c%c%c%c\" line=%d", 00602 (((unsigned long) m) >> 24) & 255, 00603 (((unsigned long) m) >> 16) & 255, 00604 (((unsigned long) m) >> 8) & 255, 00605 ((unsigned long) m) & 255, 00606 prevm ? prevm->lineno : -1); 00607 break; 00608 } 00609 prevm = m; 00610 } 00611 } 00612 #endif 00613 00614 #define EATAB {while (isascii((unsigned char) *l) && \ 00615 isspace((unsigned char) *l)) ++l;} 00616 00617 int KMimeMagic::parse_line(char *line, int *rule, int lineno) 00618 { 00619 int ws_offset; 00620 00621 /* delete newline */ 00622 if (line[0]) { 00623 line[strlen(line) - 1] = '\0'; 00624 } 00625 /* skip leading whitespace */ 00626 ws_offset = 0; 00627 while (line[ws_offset] && isspace(line[ws_offset])) { 00628 ws_offset++; 00629 } 00630 00631 /* skip blank lines */ 00632 if (line[ws_offset] == 0) { 00633 return 0; 00634 } 00635 /* comment, do not parse */ 00636 if (line[ws_offset] == '#') 00637 return 0; 00638 00639 /* if we get here, we're going to use it so count it */ 00640 (*rule)++; 00641 00642 /* parse it */ 00643 return (parse(line + ws_offset, lineno) != 0); 00644 } 00645 00646 /* 00647 * apprentice - load configuration from the magic file. 00648 */ 00649 int KMimeMagic::apprentice( const QString& magicfile ) 00650 { 00651 FILE *f; 00652 char line[BUFSIZ + 1]; 00653 int errs = 0; 00654 int lineno; 00655 int rule = 0; 00656 QCString fname; 00657 00658 if (magicfile.isEmpty()) 00659 return -1; 00660 fname = QFile::encodeName(magicfile); 00661 f = fopen(fname, "r"); 00662 if (f == NULL) { 00663 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl; 00664 return -1; 00665 } 00666 00667 /* parse it */ 00668 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++) 00669 if (parse_line(line, &rule, lineno)) 00670 errs++; 00671 00672 fclose(f); 00673 00674 #ifdef DEBUG_APPRENTICE 00675 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; 00676 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl; 00677 #endif 00678 00679 #ifdef MIME_MAGIC_DEBUG_TABLE 00680 test_table(); 00681 #endif 00682 00683 return (errs ? -1 : 0); 00684 } 00685 00686 int KMimeMagic::buff_apprentice(char *buff) 00687 { 00688 char line[BUFSIZ + 2]; 00689 int errs = 0; 00690 int lineno = 1; 00691 char *start = buff; 00692 char *end; 00693 int count = 0; 00694 int rule = 0; 00695 int len = strlen(buff) + 1; 00696 00697 /* parse it */ 00698 do { 00699 count = (len > BUFSIZ-1)?BUFSIZ-1:len; 00700 strncpy(line, start, count); 00701 line[count] = '\0'; 00702 if ((end = strchr(line, '\n'))) { 00703 *(++end) = '\0'; 00704 count = strlen(line); 00705 } else 00706 strcat(line, "\n"); 00707 start += count; 00708 len -= count; 00709 if (parse_line(line, &rule, lineno)) 00710 errs++; 00711 lineno++; 00712 } while (len > 0); 00713 00714 #ifdef DEBUG_APPRENTICE 00715 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; 00716 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl; 00717 #endif 00718 00719 #ifdef MIME_MAGIC_DEBUG_TABLE 00720 test_table(); 00721 #endif 00722 00723 return (errs ? -1 : 0); 00724 } 00725 00726 /* 00727 * extend the sign bit if the comparison is to be signed 00728 */ 00729 static unsigned long 00730 signextend(struct magic *m, unsigned long v) 00731 { 00732 if (!(m->flag & UNSIGNED)) 00733 switch (m->type) { 00734 /* 00735 * Do not remove the casts below. They are vital. 00736 * When later compared with the data, the sign 00737 * extension must have happened. 00738 */ 00739 case BYTE: 00740 v = (char) v; 00741 break; 00742 case SHORT: 00743 case BESHORT: 00744 case LESHORT: 00745 v = (short) v; 00746 break; 00747 case DATE: 00748 case BEDATE: 00749 case LEDATE: 00750 case LONG: 00751 case BELONG: 00752 case LELONG: 00753 v = (long) v; 00754 break; 00755 case STRING: 00756 break; 00757 default: 00758 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl; 00759 return 998; //good value 00760 } 00761 return v; 00762 } 00763 00764 /* 00765 * parse one line from magic file, put into magic[index++] if valid 00766 */ 00767 int KMimeMagic::parse(char *l, int 00768 #ifdef DEBUG_LINENUMBERS 00769 lineno 00770 #endif 00771 ) 00772 { 00773 int i = 0; 00774 struct magic *m; 00775 char *t, 00776 *s; 00777 /* allocate magic structure entry */ 00778 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) { 00779 kdError(7018) << "parse: Out of memory." << endl; 00780 return -1; 00781 } 00782 /* append to linked list */ 00783 m->next = NULL; 00784 if (!conf->magic || !conf->last) { 00785 conf->magic = conf->last = m; 00786 } else { 00787 conf->last->next = m; 00788 conf->last = m; 00789 } 00790 00791 /* set values in magic structure */ 00792 m->flag = 0; 00793 m->cont_level = 0; 00794 #ifdef DEBUG_LINENUMBERS 00795 m->lineno = lineno; 00796 #endif 00797 00798 while (*l == '>') { 00799 ++l; /* step over */ 00800 m->cont_level++; 00801 } 00802 00803 if (m->cont_level != 0 && *l == '(') { 00804 ++l; /* step over */ 00805 m->flag |= INDIR; 00806 } 00807 /* get offset, then skip over it */ 00808 m->offset = (int) strtol(l, &t, 0); 00809 if (l == t) { 00810 kdError(7018) << "parse: offset " << l << " invalid" << endl; 00811 } 00812 l = t; 00813 00814 if (m->flag & INDIR) { 00815 m->in.type = LONG; 00816 m->in.offset = 0; 00817 /* 00818 * read [.lbs][+-]nnnnn) 00819 */ 00820 if (*l == '.') { 00821 switch (*++l) { 00822 case 'l': 00823 m->in.type = LONG; 00824 break; 00825 case 's': 00826 m->in.type = SHORT; 00827 break; 00828 case 'b': 00829 m->in.type = BYTE; 00830 break; 00831 default: 00832 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl; 00833 break; 00834 } 00835 l++; 00836 } 00837 s = l; 00838 if (*l == '+' || *l == '-') 00839 l++; 00840 if (isdigit((unsigned char) *l)) { 00841 m->in.offset = strtol(l, &t, 0); 00842 if (*s == '-') 00843 m->in.offset = -m->in.offset; 00844 } else 00845 t = l; 00846 if (*t++ != ')') { 00847 kdError(7018) << "parse: missing ')' in indirect offset" << endl; 00848 } 00849 l = t; 00850 } 00851 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l)) 00852 ++l; 00853 EATAB; 00854 00855 #define NBYTE 4 00856 #define NSHORT 5 00857 #define NLONG 4 00858 #define NSTRING 6 00859 #define NDATE 4 00860 #define NBESHORT 7 00861 #define NBELONG 6 00862 #define NBEDATE 6 00863 #define NLESHORT 7 00864 #define NLELONG 6 00865 #define NLEDATE 6 00866 00867 if (*l == 'u') { 00868 ++l; 00869 m->flag |= UNSIGNED; 00870 } 00871 /* get type, skip it */ 00872 if (strncmp(l, "byte", NBYTE) == 0) { 00873 m->type = BYTE; 00874 l += NBYTE; 00875 } else if (strncmp(l, "short", NSHORT) == 0) { 00876 m->type = SHORT; 00877 l += NSHORT; 00878 } else if (strncmp(l, "long", NLONG) == 0) { 00879 m->type = LONG; 00880 l += NLONG; 00881 } else if (strncmp(l, "string", NSTRING) == 0) { 00882 m->type = STRING; 00883 l += NSTRING; 00884 } else if (strncmp(l, "date", NDATE) == 0) { 00885 m->type = DATE; 00886 l += NDATE; 00887 } else if (strncmp(l, "beshort", NBESHORT) == 0) { 00888 m->type = BESHORT; 00889 l += NBESHORT; 00890 } else if (strncmp(l, "belong", NBELONG) == 0) { 00891 m->type = BELONG; 00892 l += NBELONG; 00893 } else if (strncmp(l, "bedate", NBEDATE) == 0) { 00894 m->type = BEDATE; 00895 l += NBEDATE; 00896 } else if (strncmp(l, "leshort", NLESHORT) == 0) { 00897 m->type = LESHORT; 00898 l += NLESHORT; 00899 } else if (strncmp(l, "lelong", NLELONG) == 0) { 00900 m->type = LELONG; 00901 l += NLELONG; 00902 } else if (strncmp(l, "ledate", NLEDATE) == 0) { 00903 m->type = LEDATE; 00904 l += NLEDATE; 00905 } else { 00906 kdError(7018) << "parse: type " << l << " invalid" << endl; 00907 return -1; 00908 } 00909 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 00910 if (*l == '&') { 00911 ++l; 00912 m->mask = signextend(m, strtol(l, &l, 0)); 00913 } else 00914 m->mask = (unsigned long) ~0L; 00915 EATAB; 00916 00917 switch (*l) { 00918 case '>': 00919 case '<': 00920 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 00921 case '&': 00922 case '^': 00923 case '=': 00924 m->reln = *l; 00925 ++l; 00926 break; 00927 case '!': 00928 if (m->type != STRING) { 00929 m->reln = *l; 00930 ++l; 00931 break; 00932 } 00933 /* FALL THROUGH */ 00934 default: 00935 if (*l == 'x' && isascii((unsigned char) l[1]) && 00936 isspace((unsigned char) l[1])) { 00937 m->reln = *l; 00938 ++l; 00939 goto GetDesc; /* Bill The Cat */ 00940 } 00941 m->reln = '='; 00942 break; 00943 } 00944 EATAB; 00945 00946 if (getvalue(m, &l)) 00947 return -1; 00948 /* 00949 * now get last part - the description 00950 */ 00951 GetDesc: 00952 EATAB; 00953 if (l[0] == '\b') { 00954 ++l; 00955 m->nospflag = 1; 00956 } else if ((l[0] == '\\') && (l[1] == 'b')) { 00957 ++l; 00958 ++l; 00959 m->nospflag = 1; 00960 } else 00961 m->nospflag = 0; 00962 // Copy description - until EOL or '#' (for comments) 00963 while (*l != '\0' && *l != '#' && i < MAXDESC-1) 00964 m->desc[i++] = *l++; 00965 m->desc[i] = '\0'; 00966 // Remove trailing spaces 00967 while (--i>0 && isspace( m->desc[i] )) 00968 m->desc[i] = '\0'; 00969 00970 // old code 00971 //while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) /* NULLBODY */ ; 00972 00973 #ifdef DEBUG_APPRENTICE 00974 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl; 00975 #endif 00976 return 0; 00977 } 00978 00979 /* 00980 * Read a numeric value from a pointer, into the value union of a magic 00981 * pointer, according to the magic type. Update the string pointer to point 00982 * just after the number read. Return 0 for success, non-zero for failure. 00983 */ 00984 static int 00985 getvalue(struct magic *m, char **p) 00986 { 00987 int slen; 00988 00989 if (m->type == STRING) { 00990 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); 00991 m->vallen = slen; 00992 } else if (m->reln != 'x') 00993 m->value.l = signextend(m, strtol(*p, p, 0)); 00994 return 0; 00995 } 00996 00997 /* 00998 * Convert a string containing C character escapes. Stop at an unescaped 00999 * space or tab. Copy the converted version to "p", returning its length in 01000 * *slen. Return updated scan pointer as function result. 01001 */ 01002 static char * 01003 getstr(register char *s, register char *p, int plen, int *slen) 01004 { 01005 char *origs = s, 01006 *origp = p; 01007 char *pmax = p + plen - 1; 01008 register int c; 01009 register int val; 01010 01011 while ((c = *s++) != '\0') { 01012 if (isspace((unsigned char) c)) 01013 break; 01014 if (p >= pmax) { 01015 kdError(7018) << "String too long: " << origs << endl; 01016 break; 01017 } 01018 if (c == '\\') { 01019 switch (c = *s++) { 01020 01021 case '\0': 01022 goto out; 01023 01024 default: 01025 *p++ = (char) c; 01026 break; 01027 01028 case 'n': 01029 *p++ = '\n'; 01030 break; 01031 01032 case 'r': 01033 *p++ = '\r'; 01034 break; 01035 01036 case 'b': 01037 *p++ = '\b'; 01038 break; 01039 01040 case 't': 01041 *p++ = '\t'; 01042 break; 01043 01044 case 'f': 01045 *p++ = '\f'; 01046 break; 01047 01048 case 'v': 01049 *p++ = '\v'; 01050 break; 01051 01052 /* \ and up to 3 octal digits */ 01053 case '0': 01054 case '1': 01055 case '2': 01056 case '3': 01057 case '4': 01058 case '5': 01059 case '6': 01060 case '7': 01061 val = c - '0'; 01062 c = *s++; /* try for 2 */ 01063 if (c >= '0' && c <= '7') { 01064 val = (val << 3) | (c - '0'); 01065 c = *s++; /* try for 3 */ 01066 if (c >= '0' && c <= '7') 01067 val = (val << 3) | (c - '0'); 01068 else 01069 --s; 01070 } else 01071 --s; 01072 *p++ = (char) val; 01073 break; 01074 01075 /* \x and up to 3 hex digits */ 01076 case 'x': 01077 val = 'x'; /* Default if no digits */ 01078 c = hextoint(*s++); /* Get next char */ 01079 if (c >= 0) { 01080 val = c; 01081 c = hextoint(*s++); 01082 if (c >= 0) { 01083 val = (val << 4) + c; 01084 c = hextoint(*s++); 01085 if (c >= 0) { 01086 val = (val << 4) + c; 01087 } else 01088 --s; 01089 } else 01090 --s; 01091 } else 01092 --s; 01093 *p++ = (char) val; 01094 break; 01095 } 01096 } else 01097 *p++ = (char) c; 01098 } 01099 out: 01100 *p = '\0'; 01101 *slen = p - origp; 01102 //for ( char* foo = origp; foo < p ; ++foo ) 01103 // kdDebug(7018) << " " << *foo << endl; 01104 return s; 01105 } 01106 01107 01108 /* Single hex char to int; -1 if not a hex char. */ 01109 static int 01110 hextoint(int c) 01111 { 01112 if (!isascii((unsigned char) c)) 01113 return -1; 01114 if (isdigit((unsigned char) c)) 01115 return c - '0'; 01116 if ((c >= 'a') && (c <= 'f')) 01117 return c + 10 - 'a'; 01118 if ((c >= 'A') && (c <= 'F')) 01119 return c + 10 - 'A'; 01120 return -1; 01121 } 01122 01123 /* 01124 * Convert the byte order of the data we are looking at 01125 */ 01126 static int 01127 mconvert(union VALUETYPE *p, struct magic *m) 01128 { 01129 switch (m->type) { 01130 case BYTE: 01131 return 1; 01132 case STRING: 01133 /* Null terminate */ 01134 p->s[sizeof(p->s) - 1] = '\0'; 01135 return 1; 01136 #ifndef WORDS_BIGENDIAN 01137 case SHORT: 01138 #endif 01139 case BESHORT: 01140 p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); 01141 return 1; 01142 #ifndef WORDS_BIGENDIAN 01143 case LONG: 01144 case DATE: 01145 #endif 01146 case BELONG: 01147 case BEDATE: 01148 p->l = (long) 01149 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); 01150 return 1; 01151 #ifdef WORDS_BIGENDIAN 01152 case SHORT: 01153 #endif 01154 case LESHORT: 01155 p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); 01156 return 1; 01157 #ifdef WORDS_BIGENDIAN 01158 case LONG: 01159 case DATE: 01160 #endif 01161 case LELONG: 01162 case LEDATE: 01163 p->l = (long) 01164 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); 01165 return 1; 01166 default: 01167 kdError(7018) << "mconvert: invalid type " << m->type << endl; 01168 return 0; 01169 } 01170 } 01171 01172 01173 static int 01174 mget(union VALUETYPE *p, unsigned char *s, struct magic *m, 01175 int nbytes) 01176 { 01177 long offset = m->offset; 01178 switch ( m->type ) 01179 { 01180 case BYTE: 01181 if ( offset + 1 > nbytes-1 ) // nbytes = (size of file) + 1 01182 return 0; 01183 break; 01184 case SHORT: 01185 case BESHORT: 01186 case LESHORT: 01187 if ( offset + 2 > nbytes-1 ) 01188 return 0; 01189 break; 01190 case LONG: 01191 case BELONG: 01192 case LELONG: 01193 case DATE: 01194 case BEDATE: 01195 case LEDATE: 01196 if ( offset + 4 > nbytes-1 ) 01197 return 0; 01198 break; 01199 case STRING: 01200 break; 01201 } 01202 01203 // The file length might be < sizeof(union VALUETYPE) (David) 01204 // -> pad with zeros (the 'file' command does it this way) 01205 // Thanks to Stan Covington <stan@calderasystems.com> for detailed report 01206 if (offset + (int)sizeof(union VALUETYPE) > nbytes) 01207 { 01208 int have = nbytes - offset; 01209 memset(p, 0, sizeof(union VALUETYPE)); 01210 if (have > 0) 01211 memcpy(p, s + offset, have); 01212 } else 01213 memcpy(p, s + offset, sizeof(union VALUETYPE)); 01214 01215 if (!mconvert(p, m)) 01216 return 0; 01217 01218 if (m->flag & INDIR) { 01219 01220 switch (m->in.type) { 01221 case BYTE: 01222 offset = p->b + m->in.offset; 01223 break; 01224 case SHORT: 01225 offset = p->h + m->in.offset; 01226 break; 01227 case LONG: 01228 offset = p->l + m->in.offset; 01229 break; 01230 } 01231 01232 if (offset + (int)sizeof(union VALUETYPE) > nbytes) 01233 return 0; 01234 01235 memcpy(p, s + offset, sizeof(union VALUETYPE)); 01236 01237 if (!mconvert(p, m)) 01238 return 0; 01239 } 01240 return 1; 01241 } 01242 01243 static int 01244 mcheck(union VALUETYPE *p, struct magic *m) 01245 { 01246 register unsigned long l = m->value.l; 01247 register unsigned long v; 01248 int matched; 01249 01250 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { 01251 kdError(7018) << "BOINK" << endl; 01252 return 1; 01253 } 01254 switch (m->type) { 01255 case BYTE: 01256 v = p->b; 01257 break; 01258 01259 case SHORT: 01260 case BESHORT: 01261 case LESHORT: 01262 v = p->h; 01263 break; 01264 01265 case LONG: 01266 case BELONG: 01267 case LELONG: 01268 case DATE: 01269 case BEDATE: 01270 case LEDATE: 01271 v = p->l; 01272 break; 01273 01274 case STRING: 01275 l = 0; 01276 /* 01277 * What we want here is: v = strncmp(m->value.s, p->s, 01278 * m->vallen); but ignoring any nulls. bcmp doesn't give 01279 * -/+/0 and isn't universally available anyway. 01280 */ 01281 v = 0; 01282 { 01283 register unsigned char *a = (unsigned char *) m->value.s; 01284 register unsigned char *b = (unsigned char *) p->s; 01285 register int len = m->vallen; 01286 Q_ASSERT(len); 01287 01288 while (--len >= 0) 01289 if ((v = *b++ - *a++) != 0) 01290 break; 01291 } 01292 break; 01293 default: 01294 kdError(7018) << "mcheck: invalid type " << m->type << endl; 01295 return 0; /* NOTREACHED */ 01296 } 01297 #if 0 01298 qDebug("Before signextend %08x", v); 01299 #endif 01300 v = signextend(m, v) & m->mask; 01301 #if 0 01302 qDebug("After signextend %08x", v); 01303 #endif 01304 01305 switch (m->reln) { 01306 case 'x': 01307 matched = 1; 01308 break; 01309 01310 case '!': 01311 matched = v != l; 01312 break; 01313 01314 case '=': 01315 matched = v == l; 01316 break; 01317 01318 case '>': 01319 if (m->flag & UNSIGNED) 01320 matched = v > l; 01321 else 01322 matched = (long) v > (long) l; 01323 break; 01324 01325 case '<': 01326 if (m->flag & UNSIGNED) 01327 matched = v < l; 01328 else 01329 matched = (long) v < (long) l; 01330 break; 01331 01332 case '&': 01333 matched = (v & l) == l; 01334 break; 01335 01336 case '^': 01337 matched = (v & l) != l; 01338 break; 01339 01340 default: 01341 matched = 0; 01342 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl; 01343 break; /* NOTREACHED */ 01344 } 01345 01346 return matched; 01347 } 01348 01349 /* 01350 * magic_process - process input file fn. Opens the file and reads a 01351 * fixed-size buffer to begin processing the contents. 01352 */ 01353 01354 void process(struct config_rec* conf, const QString & fn) 01355 { 01356 int fd = 0; 01357 unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ 01358 KDE_struct_stat sb; 01359 int nbytes = 0; /* number of bytes read from a datafile */ 01360 int tagbytes = 0; /* size of prefixed tag */ 01361 QCString fileName = QFile::encodeName( fn ); 01362 01363 /* 01364 * first try judging the file based on its filesystem status 01365 */ 01366 if (fsmagic(conf, fileName, &sb) != 0) { 01367 //resultBuf += "\n"; 01368 return; 01369 } 01370 if ((fd = KDE_open(fileName, O_RDONLY)) < 0) { 01371 /* We can't open it, but we were able to stat it. */ 01372 /* 01373 * if (sb.st_mode & 0002) addResult("writable, "); 01374 * if (sb.st_mode & 0111) addResult("executable, "); 01375 */ 01376 //kdDebug(7018) << "can't read `" << fn << "' (" << strerror(errno) << ")." << endl; 01377 conf->resultBuf = MIME_BINARY_UNREADABLE; 01378 return; 01379 } 01380 /* 01381 * try looking at the first HOWMANY bytes 01382 */ 01383 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) { 01384 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl; 01385 conf->resultBuf = MIME_BINARY_UNREADABLE; 01386 (void)close(fd); 01387 return; 01388 } 01389 if ((tagbytes = tagmagic(buf, nbytes))) { 01390 // Read buffer at new position 01391 lseek(fd, tagbytes, SEEK_SET); 01392 nbytes = read(fd, (char*)buf, HOWMANY); 01393 if (nbytes < 0) { 01394 conf->resultBuf = MIME_BINARY_UNREADABLE; 01395 (void)close(fd); 01396 return; 01397 } 01398 } 01399 if (nbytes == 0) { 01400 conf->resultBuf = MIME_BINARY_ZEROSIZE; 01401 } else { 01402 buf[nbytes++] = '\0'; /* null-terminate it */ 01403 tryit(conf, buf, nbytes); 01404 } 01405 01406 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) ) 01407 { 01408 /* 01409 * Try to restore access, modification times if read it. 01410 * This changes the "change" time (ctime), but we can't do anything 01411 * about that. 01412 */ 01413 struct utimbuf utbuf; 01414 utbuf.actime = sb.st_atime; 01415 utbuf.modtime = sb.st_mtime; 01416 (void) utime(fileName, &utbuf); 01417 } 01418 (void) close(fd); 01419 } 01420 01421 01422 static void tryit(struct config_rec* conf, unsigned char *buf, int nb) 01423 { 01424 /* try tests in /etc/magic (or surrogate magic file) */ 01425 if (match(conf, buf, nb)) 01426 return; 01427 01428 /* try known keywords, check for ascii-ness too. */ 01429 if (ascmagic(conf, buf, nb) == 1) 01430 return; 01431 01432 /* see if it's plain text */ 01433 if (textmagic(conf, buf, nb)) 01434 return; 01435 01436 /* abandon hope, all ye who remain here */ 01437 conf->resultBuf = MIME_BINARY_UNKNOWN; 01438 conf->accuracy = 0; 01439 } 01440 01441 static int 01442 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb) 01443 { 01444 int ret = 0; 01445 01446 /* 01447 * Fstat is cheaper but fails for files you don't have read perms on. 01448 * On 4.2BSD and similar systems, use lstat() to identify symlinks. 01449 */ 01450 ret = KDE_lstat(fn, sb); /* don't merge into if; see "ret =" above */ 01451 01452 if (ret) { 01453 return 1; 01454 01455 } 01456 /* 01457 * if (sb->st_mode & S_ISUID) resultBuf += "setuid "; 01458 * if (sb->st_mode & S_ISGID) resultBuf += "setgid "; 01459 * if (sb->st_mode & S_ISVTX) resultBuf += "sticky "; 01460 */ 01461 01462 switch (sb->st_mode & S_IFMT) { 01463 case S_IFDIR: 01464 conf->resultBuf = MIME_INODE_DIR; 01465 return 1; 01466 case S_IFCHR: 01467 conf->resultBuf = MIME_INODE_CDEV; 01468 return 1; 01469 case S_IFBLK: 01470 conf->resultBuf = MIME_INODE_BDEV; 01471 return 1; 01472 /* TODO add code to handle V7 MUX and Blit MUX files */ 01473 #ifdef S_IFIFO 01474 case S_IFIFO: 01475 conf->resultBuf = MIME_INODE_FIFO; 01476 return 1; 01477 #endif 01478 #ifdef S_IFLNK 01479 case S_IFLNK: 01480 { 01481 char buf[BUFSIZ + BUFSIZ + 4]; 01482 register int nch; 01483 KDE_struct_stat tstatbuf; 01484 01485 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) { 01486 conf->resultBuf = MIME_INODE_LINK; 01487 //conf->resultBuf += "\nunreadable"; 01488 return 1; 01489 } 01490 buf[nch] = '\0'; /* readlink(2) forgets this */ 01491 /* If broken symlink, say so and quit early. */ 01492 if (*buf == '/') { 01493 if (KDE_stat(buf, &tstatbuf) < 0) { 01494 conf->resultBuf = MIME_INODE_LINK; 01495 //conf->resultBuf += "\nbroken"; 01496 return 1; 01497 } 01498 } else { 01499 char *tmp; 01500 char buf2[BUFSIZ + BUFSIZ + 4]; 01501 01502 strncpy(buf2, fn, BUFSIZ); 01503 buf2[BUFSIZ] = 0; 01504 01505 if ((tmp = strrchr(buf2, '/')) == NULL) { 01506 tmp = buf; /* in current dir */ 01507 } else { 01508 /* dir part plus (rel.) link */ 01509 *++tmp = '\0'; 01510 strcat(buf2, buf); 01511 tmp = buf2; 01512 } 01513 if (KDE_stat(tmp, &tstatbuf) < 0) { 01514 conf->resultBuf = MIME_INODE_LINK; 01515 //conf->resultBuf += "\nbroken"; 01516 return 1; 01517 } else 01518 strcpy(buf, tmp); 01519 } 01520 if (conf->followLinks) 01521 process( conf, QFile::decodeName( buf ) ); 01522 else 01523 conf->resultBuf = MIME_INODE_LINK; 01524 return 1; 01525 } 01526 return 1; 01527 #endif 01528 #ifdef S_IFSOCK 01529 #ifndef __COHERENT__ 01530 case S_IFSOCK: 01531 conf->resultBuf = MIME_INODE_SOCK; 01532 return 1; 01533 #endif 01534 #endif 01535 case S_IFREG: 01536 break; 01537 default: 01538 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl; 01539 /* NOTREACHED */ 01540 } 01541 01542 /* 01543 * regular file, check next possibility 01544 */ 01545 if (sb->st_size == 0) { 01546 conf->resultBuf = MIME_BINARY_ZEROSIZE; 01547 return 1; 01548 } 01549 return 0; 01550 } 01551 01552 /* 01553 * Go through the whole list, stopping if you find a match. Process all the 01554 * continuations of that match before returning. 01555 * 01556 * We support multi-level continuations: 01557 * 01558 * At any time when processing a successful top-level match, there is a current 01559 * continuation level; it represents the level of the last successfully 01560 * matched continuation. 01561 * 01562 * Continuations above that level are skipped as, if we see one, it means that 01563 * the continuation that controls them - i.e, the lower-level continuation 01564 * preceding them - failed to match. 01565 * 01566 * Continuations below that level are processed as, if we see one, it means 01567 * we've finished processing or skipping higher-level continuations under the 01568 * control of a successful or unsuccessful lower-level continuation, and are 01569 * now seeing the next lower-level continuation and should process it. The 01570 * current continuation level reverts to the level of the one we're seeing. 01571 * 01572 * Continuations at the current level are processed as, if we see one, there's 01573 * no lower-level continuation that may have failed. 01574 * 01575 * If a continuation matches, we bump the current continuation level so that 01576 * higher-level continuations are processed. 01577 */ 01578 static int 01579 match(struct config_rec* conf, unsigned char *s, int nbytes) 01580 { 01581 int cont_level = 0; 01582 union VALUETYPE p; 01583 struct magic *m; 01584 01585 #ifdef DEBUG_MIMEMAGIC 01586 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; 01587 for (m = conf->magic; m; m = m->next) { 01588 if (isprint((((unsigned long) m) >> 24) & 255) && 01589 isprint((((unsigned long) m) >> 16) & 255) && 01590 isprint((((unsigned long) m) >> 8) & 255) && 01591 isprint(((unsigned long) m) & 255)) { 01592 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl; 01593 break; 01594 } 01595 } 01596 #endif 01597 01598 for (m = conf->magic; m; m = m->next) { 01599 #ifdef DEBUG_MIMEMAGIC 01600 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl; 01601 #endif 01602 memset(&p, 0, sizeof(union VALUETYPE)); 01603 01604 /* check if main entry matches */ 01605 if (!mget(&p, s, m, nbytes) || 01606 !mcheck(&p, m)) { 01607 struct magic *m_cont; 01608 01609 /* 01610 * main entry didn't match, flush its continuations 01611 */ 01612 if (!m->next || (m->next->cont_level == 0)) { 01613 continue; 01614 } 01615 m_cont = m->next; 01616 while (m_cont && (m_cont->cont_level != 0)) { 01617 #ifdef DEBUG_MIMEMAGIC 01618 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl; 01619 #endif 01620 /* 01621 * this trick allows us to keep *m in sync 01622 * when the continue advances the pointer 01623 */ 01624 m = m_cont; 01625 m_cont = m_cont->next; 01626 } 01627 continue; 01628 } 01629 /* if we get here, the main entry rule was a match */ 01630 /* this will be the last run through the loop */ 01631 #ifdef DEBUG_MIMEMAGIC 01632 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl; 01633 #endif 01634 01635 /* remember the match */ 01636 conf->resultBuf = m->desc; 01637 01638 cont_level++; 01639 /* 01640 * while (m && m->next && m->next->cont_level != 0 && ( m = 01641 * m->next )) 01642 */ 01643 m = m->next; 01644 while (m && (m->cont_level != 0)) { 01645 #ifdef DEBUG_MIMEMAGIC 01646 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl; 01647 #endif 01648 if (cont_level >= m->cont_level) { 01649 if (cont_level > m->cont_level) { 01650 /* 01651 * We're at the end of the level 01652 * "cont_level" continuations. 01653 */ 01654 cont_level = m->cont_level; 01655 } 01656 if (mget(&p, s, m, nbytes) && 01657 mcheck(&p, m)) { 01658 /* 01659 * This continuation matched. Print 01660 * its message, with a blank before 01661 * it if the previous item printed 01662 * and this item isn't empty. 01663 */ 01664 #ifdef DEBUG_MIMEMAGIC 01665 kdDebug(7018) << "continuation matched" << endl; 01666 #endif 01667 conf->resultBuf = m->desc; 01668 cont_level++; 01669 } 01670 } 01671 /* move to next continuation record */ 01672 m = m->next; 01673 } 01674 // KDE-specific: need an actual mimetype for a real match 01675 // If we only matched a rule with continuations but no mimetype, it's not a match 01676 if ( !conf->resultBuf.isEmpty() ) 01677 { 01678 #ifdef DEBUG_MIMEMAGIC 01679 kdDebug(7018) << "match: matched" << endl; 01680 #endif 01681 return 1; /* all through */ 01682 } 01683 } 01684 #ifdef DEBUG_MIMEMAGIC 01685 kdDebug(7018) << "match: failed" << endl; 01686 #endif 01687 return 0; /* no match at all */ 01688 } 01689 01690 // Try to parse prefixed tags before matching on content 01691 // Sofar only ID3v2 tags (<=.4) are handled 01692 static int tagmagic(unsigned char *buf, int nbytes) 01693 { 01694 if(nbytes<40) return 0; 01695 if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') { 01696 int size = 10; 01697 // Sanity (known version, no unknown flags) 01698 if(buf[3] > 4) return 0; 01699 if(buf[5] & 0x0F) return 0; 01700 // Tag has v4 footer 01701 if(buf[5] & 0x10) size += 10; 01702 // Calculated syncsafe size 01703 size += buf[9]; 01704 size += buf[8] << 7; 01705 size += buf[7] << 14; 01706 size += buf[6] << 21; 01707 return size; 01708 } 01709 return 0; 01710 } 01711 01712 struct Token { 01713 char *data; 01714 int length; 01715 }; 01716 01717 struct Tokenizer 01718 { 01719 Tokenizer(char* buf, int nbytes) { 01720 data = buf; 01721 length = nbytes; 01722 pos = 0; 01723 } 01724 bool isNewLine() { 01725 return newline; 01726 } 01727 Token* nextToken() { 01728 if (pos == 0) 01729 newline = true; 01730 else 01731 newline = false; 01732 token.data = data+pos; 01733 token.length = 0; 01734 while(pos<length) { 01735 switch (data[pos]) { 01736 case '\n': 01737 newline = true; 01738 case '\0': 01739 case '\t': 01740 case ' ': 01741 case '\r': 01742 case '\f': 01743 case ',': 01744 case ';': 01745 case '>': 01746 if (token.length == 0) token.data++; 01747 else 01748 return &token; 01749 break; 01750 default: 01751 token.length++; 01752 } 01753 pos++; 01754 } 01755 return &token; 01756 } 01757 01758 private: 01759 Token token; 01760 char* data; 01761 int length; 01762 int pos; 01763 bool newline; 01764 }; 01765 01766 01767 /* an optimization over plain strcmp() */ 01768 //#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) 01769 static inline bool STREQ(const Token *token, const char *b) { 01770 const char *a = token->data; 01771 int len = token->length; 01772 if (a == b) return true; 01773 while(*a && *b && len > 0) { 01774 if (*a != *b) return false; 01775 a++; b++; len--; 01776 } 01777 return (len == 0 && *b == 0); 01778 } 01779 01780 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes) 01781 { 01782 int i; 01783 double pct, maxpct, pctsum; 01784 double pcts[NTYPES]; 01785 int mostaccurate, tokencount; 01786 int typeset, jonly, conly, jconly, objconly, cpponly; 01787 int has_escapes = 0; 01788 //unsigned char *s; 01789 //char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */ 01790 01791 /* these are easy, do them first */ 01792 conf->accuracy = 70; 01793 01794 /* 01795 * for troff, look for . + letter + letter or .\"; this must be done 01796 * to disambiguate tar archives' ./file and other trash from real 01797 * troff input. 01798 */ 01799 if (*buf == '.') { 01800 unsigned char *tp = buf + 1; 01801 01802 while (isascii(*tp) && isspace(*tp)) 01803 ++tp; /* skip leading whitespace */ 01804 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') && 01805 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) { 01806 conf->resultBuf = MIME_APPL_TROFF; 01807 return 1; 01808 } 01809 } 01810 if ((*buf == 'c' || *buf == 'C') && 01811 isascii(*(buf + 1)) && isspace(*(buf + 1))) { 01812 /* Fortran */ 01813 conf->resultBuf = MIME_TEXT_FORTRAN; 01814 return 1; 01815 } 01816 assert(nbytes-1 < HOWMANY + 1); 01817 /* look for tokens - this is expensive! */ 01818 has_escapes = (memchr(buf, '\033', nbytes) != NULL); 01819 Tokenizer tokenizer((char*)buf, nbytes); 01820 const Token* token; 01821 bool linecomment = false, blockcomment = false; 01822 const struct names *p; 01823 int typecount[NTYPES]; 01824 /* 01825 * Fritz: 01826 * Try a little harder on C/C++/Java. 01827 */ 01828 memset(&typecount, 0, sizeof(typecount)); 01829 typeset = 0; 01830 jonly = 0; 01831 conly = 0; 01832 jconly = 0; 01833 objconly = 0; 01834 cpponly = 0; 01835 tokencount = 0; 01836 bool foundClass = false; // mandatory for java 01837 // first collect all possible types and count matches 01838 // we stop at '>' too, because of "<title>blah</title>" on HTML pages 01839 while ((token = tokenizer.nextToken())->length > 0) { 01840 #ifdef DEBUG_MIMEMAGIC 01841 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl; 01842 #endif 01843 if (linecomment && tokenizer.isNewLine()) 01844 linecomment = false; 01845 if (blockcomment && STREQ(token, "*/")) { 01846 blockcomment = false; 01847 continue; 01848 } 01849 for (p = names; p->name ; p++) { 01850 if (STREQ(token, p->name)) { 01851 #ifdef DEBUG_MIMEMAGIC 01852 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl; 01853 #endif 01854 tokencount++; 01855 typeset |= p->type; 01856 if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) { 01857 if (linecomment || blockcomment) { 01858 continue; 01859 } 01860 else { 01861 switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) 01862 { 01863 case L_JAVA: 01864 jonly++; 01865 break; 01866 case L_OBJC: 01867 objconly++; 01868 break; 01869 case L_CPP: 01870 cpponly++; 01871 break; 01872 case (L_CPP|L_JAVA): 01873 jconly++; 01874 if ( !foundClass && STREQ(token, "class") ) 01875 foundClass = true; 01876 break; 01877 case (L_C|L_CPP): 01878 conly++; 01879 break; 01880 default: 01881 if (STREQ(token, "//")) linecomment = true; 01882 if (STREQ(token, "/*")) blockcomment = true; 01883 } 01884 } 01885 } 01886 for (i = 0; i < (int)NTYPES; i++) { 01887 if ((1 << i) & p->type) typecount[i]+= p->type & FLAG_STRONG ? 2 : 1; 01888 } 01889 } 01890 } 01891 } 01892 01893 if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) { 01894 conf->accuracy = 60; 01895 if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) { 01896 #ifdef DEBUG_MIMEMAGIC 01897 kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl; 01898 #endif 01899 if (jonly > 1 && foundClass) { 01900 // At least two java-only tokens have matched, including "class" 01901 conf->resultBuf = QString(types[P_JAVA].type); 01902 return 1; 01903 } 01904 if (jconly > 1) { 01905 // At least two non-C (only C++ or Java) token have matched. 01906 if (typecount[P_JAVA] < typecount[P_CPP]) 01907 conf->resultBuf = QString(types[P_CPP].type); 01908 else 01909 conf->resultBuf = QString(types[P_JAVA].type); 01910 return 1; 01911 } 01912 if (conly + cpponly > 1) { 01913 // Either C or C++. 01914 if (cpponly > 0) 01915 conf->resultBuf = QString(types[P_CPP].type); 01916 else 01917 conf->resultBuf = QString(types[P_C].type); 01918 return 1; 01919 } 01920 if (objconly > 0) { 01921 conf->resultBuf = QString(types[P_OBJC].type); 01922 return 1; 01923 } 01924 } 01925 } 01926 01927 /* Neither C, C++ or Java (or all of them without able to distinguish): 01928 * Simply take the token-class with the highest 01929 * matchcount > 0 01930 */ 01931 mostaccurate = -1; 01932 maxpct = pctsum = 0.0; 01933 for (i = 0; i < (int)NTYPES; i++) { 01934 if (typecount[i] > 1) { // one word is not enough, we need at least two 01935 pct = (double)typecount[i] / (double)types[i].kwords * 01936 (double)types[i].weight; 01937 pcts[i] = pct; 01938 pctsum += pct; 01939 if (pct > maxpct) { 01940 maxpct = pct; 01941 mostaccurate = i; 01942 } 01943 #ifdef DEBUG_MIMEMAGIC 01944 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl; 01945 #endif 01946 } 01947 } 01948 if (mostaccurate >= 0) { 01949 if ( mostaccurate != P_JAVA || foundClass ) // 'class' mandatory for java 01950 { 01951 conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60); 01952 #ifdef DEBUG_MIMEMAGIC 01953 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl; 01954 #endif 01955 conf->resultBuf = QString(types[mostaccurate].type); 01956 return 1; 01957 } 01958 } 01959 01960 switch (is_tar(buf, nbytes)) { 01961 case 1: 01962 /* V7 tar archive */ 01963 conf->resultBuf = MIME_APPL_TAR; 01964 conf->accuracy = 90; 01965 return 1; 01966 case 2: 01967 /* POSIX tar archive */ 01968 conf->resultBuf = MIME_APPL_TAR; 01969 conf->accuracy = 90; 01970 return 1; 01971 } 01972 01973 for (i = 0; i < nbytes; i++) { 01974 if (!isascii(*(buf + i))) 01975 return 0; /* not all ascii */ 01976 } 01977 01978 /* all else fails, but it is ascii... */ 01979 conf->accuracy = 90; 01980 if (has_escapes) { 01981 /* text with escape sequences */ 01982 /* we leave this open for further differentiation later */ 01983 conf->resultBuf = MIME_TEXT_UNKNOWN; 01984 } else { 01985 /* plain text */ 01986 conf->resultBuf = MIME_TEXT_PLAIN; 01987 } 01988 return 1; 01989 } 01990 01991 /* Maximal length of a line we consider "reasonable". */ 01992 #define TEXT_MAXLINELEN 300 01993 01994 // This code is taken from the "file" command, where it is licensed 01995 // in the "beer-ware license" :-) 01996 // Original author: <joerg@FreeBSD.ORG> 01997 // Simplified by David Faure to avoid the static array char[256]. 01998 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes) 01999 { 02000 int i; 02001 unsigned char *cp; 02002 02003 nbytes--; 02004 02005 /* First, look whether there are "unreasonable" characters. */ 02006 for (i = 0, cp = buf; i < nbytes; i++, cp++) 02007 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F)) 02008 return 0; 02009 02010 /* Now, look whether the file consists of lines of 02011 * "reasonable" length. */ 02012 02013 for (i = 0; i < nbytes;) { 02014 cp = (unsigned char *) memchr(buf, '\n', nbytes - i); 02015 if (cp == NULL) { 02016 /* Don't fail if we hit the end of buffer. */ 02017 if (i + TEXT_MAXLINELEN >= nbytes) 02018 break; 02019 else 02020 return 0; 02021 } 02022 if (cp - buf > TEXT_MAXLINELEN) 02023 return 0; 02024 i += (cp - buf + 1); 02025 buf = cp + 1; 02026 } 02027 conf->resultBuf = MIME_TEXT_PLAIN; 02028 return 1; 02029 } 02030 02031 02032 /* 02033 * is_tar() -- figure out whether file is a tar archive. 02034 * 02035 * Stolen (by author of file utility) from the public domain tar program: Public 02036 * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). 02037 * 02038 * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 02039 * 1997/06/24 00:41:02 ikluft Exp ikluft $ 02040 * 02041 * Comments changed and some code/comments reformatted for file command by Ian 02042 * Darwin. 02043 */ 02044 02045 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) 02046 02047 /* 02048 * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for 02049 * old UNIX tar file, 2 for Unix Std (POSIX) tar file. 02050 */ 02051 02052 static int 02053 is_tar(unsigned char *buf, int nbytes) 02054 { 02055 register union record *header = (union record *) buf; 02056 register int i; 02057 register long sum, 02058 recsum; 02059 register char *p; 02060 02061 if (nbytes < (int)sizeof(union record)) 02062 return 0; 02063 02064 recsum = from_oct(8, header->header.chksum); 02065 02066 sum = 0; 02067 p = header->charptr; 02068 for (i = sizeof(union record); --i >= 0;) { 02069 /* 02070 * We can't use unsigned char here because of old compilers, 02071 * e.g. V7. 02072 */ 02073 sum += 0xFF & *p++; 02074 } 02075 02076 /* Adjust checksum to count the "chksum" field as blanks. */ 02077 for (i = sizeof(header->header.chksum); --i >= 0;) 02078 sum -= 0xFF & header->header.chksum[i]; 02079 sum += ' ' * sizeof header->header.chksum; 02080 02081 if (sum != recsum) 02082 return 0; /* Not a tar archive */ 02083 02084 if (0 == strcmp(header->header.magic, TMAGIC)) 02085 return 2; /* Unix Standard tar archive */ 02086 02087 return 1; /* Old fashioned tar archive */ 02088 } 02089 02090 02091 /* 02092 * Quick and dirty octal conversion. 02093 * 02094 * Result is -1 if the field is invalid (all blank, or nonoctal). 02095 */ 02096 static long 02097 from_oct(int digs, char *where) 02098 { 02099 register long value; 02100 02101 while (isspace(*where)) { /* Skip spaces */ 02102 where++; 02103 if (--digs <= 0) 02104 return -1; /* All blank field */ 02105 } 02106 value = 0; 02107 while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ 02108 value = (value << 3) | (*where++ - '0'); 02109 --digs; 02110 } 02111 02112 if (digs > 0 && *where && !isspace(*where)) 02113 return -1; /* Ended on non-space/nul */ 02114 02115 return value; 02116 } 02117 02118 KMimeMagic::KMimeMagic() 02119 { 02120 // Magic file detection init 02121 QString mimefile = locate( "mime", "magic" ); 02122 init( mimefile ); 02123 // Add snippets from share/config/magic/* 02124 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true ); 02125 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it ) 02126 if ( !mergeConfig( *it ) ) 02127 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl; 02128 } 02129 02130 KMimeMagic::KMimeMagic(const QString & _configfile) 02131 { 02132 init( _configfile ); 02133 } 02134 02135 void KMimeMagic::init( const QString& _configfile ) 02136 { 02137 int result; 02138 conf = new config_rec; 02139 02140 /* set up the magic list (empty) */ 02141 conf->magic = conf->last = NULL; 02142 magicResult = NULL; 02143 conf->followLinks = false; 02144 02145 conf->utimeConf = 0L; // created on demand 02146 /* on the first time through we read the magic file */ 02147 result = apprentice(_configfile); 02148 if (result == -1) 02149 return; 02150 #ifdef MIME_MAGIC_DEBUG_TABLE 02151 test_table(); 02152 #endif 02153 } 02154 02155 /* 02156 * The destructor. 02157 * Free the magic-table and other resources. 02158 */ 02159 KMimeMagic::~KMimeMagic() 02160 { 02161 if (conf) { 02162 struct magic *p = conf->magic; 02163 struct magic *q; 02164 while (p) { 02165 q = p; 02166 p = p->next; 02167 free(q); 02168 } 02169 delete conf->utimeConf; 02170 delete conf; 02171 } 02172 delete magicResult; 02173 } 02174 02175 bool 02176 KMimeMagic::mergeConfig(const QString & _configfile) 02177 { 02178 kdDebug(7018) << k_funcinfo << _configfile << endl; 02179 int result; 02180 02181 if (_configfile.isEmpty()) 02182 return false; 02183 result = apprentice(_configfile); 02184 if (result == -1) { 02185 return false; 02186 } 02187 #ifdef MIME_MAGIC_DEBUG_TABLE 02188 test_table(); 02189 #endif 02190 return true; 02191 } 02192 02193 bool 02194 KMimeMagic::mergeBufConfig(char * _configbuf) 02195 { 02196 int result; 02197 02198 if (conf) { 02199 result = buff_apprentice(_configbuf); 02200 if (result == -1) 02201 return false; 02202 #ifdef MIME_MAGIC_DEBUG_TABLE 02203 test_table(); 02204 #endif 02205 return true; 02206 } 02207 return false; 02208 } 02209 02210 void 02211 KMimeMagic::setFollowLinks( bool _enable ) 02212 { 02213 conf->followLinks = _enable; 02214 } 02215 02216 KMimeMagicResult * 02217 KMimeMagic::findBufferType(const QByteArray &array) 02218 { 02219 unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ 02220 02221 conf->resultBuf = QString::null; 02222 if ( !magicResult ) 02223 magicResult = new KMimeMagicResult(); 02224 magicResult->setInvalid(); 02225 conf->accuracy = 100; 02226 02227 int nbytes = array.size(); 02228 02229 if (nbytes > HOWMANY) 02230 nbytes = HOWMANY; 02231 memcpy(buf, array.data(), nbytes); 02232 if (nbytes == 0) { 02233 conf->resultBuf = MIME_BINARY_ZEROSIZE; 02234 } else { 02235 buf[nbytes++] = '\0'; /* null-terminate it */ 02236 tryit(conf, buf, nbytes); 02237 } 02238 /* if we have any results, put them in the request structure */ 02239 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace()); 02240 magicResult->setAccuracy(conf->accuracy); 02241 return magicResult; 02242 } 02243 02244 static void 02245 refineResult(KMimeMagicResult *r, const QString & _filename) 02246 { 02247 QString tmp = r->mimeType(); 02248 if (tmp.isEmpty()) 02249 return; 02250 if ( tmp == "text/x-c" || tmp == "text/x-objc" ) 02251 { 02252 if ( _filename.right(2) == ".h" ) 02253 tmp += "hdr"; 02254 else 02255 tmp += "src"; 02256 r->setMimeType(tmp); 02257 } 02258 else 02259 if ( tmp == "text/x-c++" ) 02260 { 02261 if ( _filename.endsWith(".h") 02262 || _filename.endsWith(".hh") 02263 || _filename.endsWith(".H") 02264 || !_filename.right(4).contains('.')) 02265 tmp += "hdr"; 02266 else 02267 tmp += "src"; 02268 r->setMimeType(tmp); 02269 } 02270 else 02271 if ( tmp == "application/x-sharedlib" ) 02272 { 02273 if ( _filename.find( ".so" ) == -1 ) 02274 { 02275 tmp = "application/x-executable"; 02276 r->setMimeType( tmp ); 02277 } 02278 } 02279 } 02280 02281 KMimeMagicResult * 02282 KMimeMagic::findBufferFileType( const QByteArray &data, 02283 const QString &fn) 02284 { 02285 KMimeMagicResult * r = findBufferType( data ); 02286 refineResult(r, fn); 02287 return r; 02288 } 02289 02290 /* 02291 * Find the content-type of the given file. 02292 */ 02293 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn) 02294 { 02295 #ifdef DEBUG_MIMEMAGIC 02296 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl; 02297 #endif 02298 conf->resultBuf = QString::null; 02299 02300 if ( !magicResult ) 02301 magicResult = new KMimeMagicResult(); 02302 magicResult->setInvalid(); 02303 conf->accuracy = 100; 02304 02305 if ( !conf->utimeConf ) 02306 conf->utimeConf = new KMimeMagicUtimeConf(); 02307 02308 /* process it based on the file contents */ 02309 process(conf, fn ); 02310 02311 /* if we have any results, put them in the request structure */ 02312 //finishResult(); 02313 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace()); 02314 magicResult->setAccuracy(conf->accuracy); 02315 refineResult(magicResult, fn); 02316 return magicResult; 02317 }