KMIME Library
kmime_util.cpp
00001 /* 00002 kmime_util.cpp 00003 00004 KMime, the KDE Internet mail/usenet news message library. 00005 Copyright (c) 2001 the KMime authors. 00006 See file AUTHORS for details 00007 00008 This library is free software; you can redistribute it and/or 00009 modify it under the terms of the GNU Library General Public 00010 License as published by the Free Software Foundation; either 00011 version 2 of the License, or (at your option) any later version. 00012 00013 This library is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Library General Public License for more details. 00017 00018 You should have received a copy of the GNU Library General Public License 00019 along with this library; see the file COPYING.LIB. If not, write to 00020 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00021 Boston, MA 02110-1301, USA. 00022 */ 00023 00024 #include "kmime_util.h" 00025 #include "kmime_util_p.h" 00026 00027 #include "kmime_charfreq.h" 00028 #include "kmime_codecs.h" 00029 #include "kmime_header_parsing.h" 00030 #include "kmime_message.h" 00031 #include "kmime_warning.h" 00032 00033 #include <config-kmime.h> 00034 #include <kdefakes.h> // for strcasestr 00035 #include <kglobal.h> 00036 #include <klocale.h> 00037 #include <kcharsets.h> 00038 #include <kcodecs.h> 00039 #include <kdebug.h> 00040 00041 #include <QtCore/QList> 00042 #include <QtCore/QString> 00043 #include <QtCore/QTextCodec> 00044 00045 #include <ctype.h> 00046 #include <time.h> 00047 #include <stdlib.h> 00048 #include <unistd.h> 00049 #include <boost/concept_check.hpp> 00050 00051 using namespace KMime; 00052 00053 namespace KMime { 00054 00055 QList<QByteArray> c_harsetCache; 00056 QList<QByteArray> l_anguageCache; 00057 QString f_allbackCharEnc; 00058 bool u_seOutlookEncoding = false; 00059 00060 QByteArray cachedCharset( const QByteArray &name ) 00061 { 00062 foreach ( const QByteArray& charset, c_harsetCache ) { 00063 if ( qstricmp( name.data(), charset.data() ) == 0 ) { 00064 return charset; 00065 } 00066 } 00067 00068 c_harsetCache.append( name.toUpper() ); 00069 //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); 00070 return c_harsetCache.last(); 00071 } 00072 00073 QByteArray cachedLanguage( const QByteArray &name ) 00074 { 00075 foreach ( const QByteArray& language, l_anguageCache ) { 00076 if ( qstricmp( name.data(), language.data() ) == 0 ) { 00077 return language; 00078 } 00079 } 00080 00081 l_anguageCache.append( name.toUpper() ); 00082 //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); 00083 return l_anguageCache.last(); 00084 } 00085 00086 bool isUsAscii( const QString &s ) 00087 { 00088 uint sLength = s.length(); 00089 for ( uint i=0; i<sLength; i++ ) { 00090 if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii 00091 return false; 00092 } 00093 } 00094 return true; 00095 } 00096 00097 QString nameForEncoding( Headers::contentEncoding enc ) 00098 { 00099 switch( enc ) { 00100 case Headers::CE7Bit: return QString::fromLatin1( "7bit" ); 00101 case Headers::CE8Bit: return QString::fromLatin1( "8bit" ); 00102 case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" ); 00103 case Headers::CEbase64: return QString::fromLatin1( "base64" ); 00104 case Headers::CEuuenc: return QString::fromLatin1( "uuencode" ); 00105 case Headers::CEbinary: return QString::fromLatin1( "binary" ); 00106 default: return QString::fromLatin1( "unknown" ); 00107 } 00108 } 00109 00110 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data ) 00111 { 00112 QList<Headers::contentEncoding> allowed; 00113 CharFreq cf( data ); 00114 00115 switch ( cf.type() ) { 00116 case CharFreq::SevenBitText: 00117 allowed << Headers::CE7Bit; 00118 case CharFreq::EightBitText: 00119 allowed << Headers::CE8Bit; 00120 case CharFreq::SevenBitData: 00121 if ( cf.printableRatio() > 5.0/6.0 ) { 00122 // let n the length of data and p the number of printable chars. 00123 // Then base64 \approx 4n/3; qp \approx p + 3(n-p) 00124 // => qp < base64 iff p > 5n/6. 00125 allowed << Headers::CEquPr; 00126 allowed << Headers::CEbase64; 00127 } else { 00128 allowed << Headers::CEbase64; 00129 allowed << Headers::CEquPr; 00130 } 00131 break; 00132 case CharFreq::EightBitData: 00133 allowed << Headers::CEbase64; 00134 break; 00135 case CharFreq::None: 00136 default: 00137 Q_ASSERT( false ); 00138 } 00139 00140 return allowed; 00141 } 00142 00143 // "(),.:;<>@[\] 00144 const uchar specialsMap[16] = { 00145 0x00, 0x00, 0x00, 0x00, // CTLs 00146 0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?' 00147 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 00148 0x00, 0x00, 0x00, 0x00 // '`' ... DEL 00149 }; 00150 00151 // "(),:;<>@[\]/=? 00152 const uchar tSpecialsMap[16] = { 00153 0x00, 0x00, 0x00, 0x00, // CTLs 00154 0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?' 00155 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 00156 0x00, 0x00, 0x00, 0x00 // '`' ... DEL 00157 }; 00158 00159 // all except specials, CTLs, SPACE. 00160 const uchar aTextMap[16] = { 00161 0x00, 0x00, 0x00, 0x00, 00162 0x5F, 0x35, 0xFF, 0xC5, 00163 0x7F, 0xFF, 0xFF, 0xE3, 00164 0xFF, 0xFF, 0xFF, 0xFE 00165 }; 00166 00167 // all except tspecials, CTLs, SPACE. 00168 const uchar tTextMap[16] = { 00169 0x00, 0x00, 0x00, 0x00, 00170 0x5F, 0x36, 0xFF, 0xC0, 00171 0x7F, 0xFF, 0xFF, 0xE3, 00172 0xFF, 0xFF, 0xFF, 0xFE 00173 }; 00174 00175 // none except a-zA-Z0-9!*+-/ 00176 const uchar eTextMap[16] = { 00177 0x00, 0x00, 0x00, 0x00, 00178 0x40, 0x35, 0xFF, 0xC0, 00179 0x7F, 0xFF, 0xFF, 0xE0, 00180 0x7F, 0xFF, 0xFF, 0xE0 00181 }; 00182 00183 void setFallbackCharEncoding(const QString& fallbackCharEnc) 00184 { 00185 f_allbackCharEnc = fallbackCharEnc; 00186 } 00187 00188 QString fallbackCharEncoding() 00189 { 00190 return f_allbackCharEnc; 00191 } 00192 00193 void setUseOutlookAttachmentEncoding( bool violateStandard ) 00194 { 00195 u_seOutlookEncoding = violateStandard; 00196 } 00197 00198 bool useOutlookAttachmentEncoding() 00199 { 00200 return u_seOutlookEncoding; 00201 } 00202 00203 00204 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS, 00205 const QByteArray &defaultCS, bool forceCS ) 00206 { 00207 QByteArray result; 00208 QByteArray spaceBuffer; 00209 const char *scursor = src.constData(); 00210 const char *send = scursor + src.length(); 00211 bool onlySpacesSinceLastWord = false; 00212 00213 while ( scursor != send ) { 00214 // space 00215 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) { 00216 spaceBuffer += *scursor++; 00217 continue; 00218 } 00219 00220 // possible start of an encoded word 00221 if ( *scursor == '=' ) { 00222 QByteArray language; 00223 QString decoded; 00224 ++scursor; 00225 const char *start = scursor; 00226 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) { 00227 result += decoded.toUtf8(); 00228 onlySpacesSinceLastWord = true; 00229 spaceBuffer.clear(); 00230 } else { 00231 if ( onlySpacesSinceLastWord ) { 00232 result += spaceBuffer; 00233 onlySpacesSinceLastWord = false; 00234 } 00235 result += '='; 00236 scursor = start; // reset cursor after parsing failure 00237 } 00238 continue; 00239 } else { 00240 // unencoded data 00241 if ( onlySpacesSinceLastWord ) { 00242 result += spaceBuffer; 00243 onlySpacesSinceLastWord = false; 00244 } 00245 result += *scursor; 00246 ++scursor; 00247 } 00248 } 00249 // If there are any chars that couldn't be decoded in UTF-8, 00250 // use the fallback charset if it exists 00251 const QString tryUtf8 = QString::fromUtf8( result ); 00252 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) { 00253 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc ); 00254 return codec->toUnicode( result ); 00255 } else { 00256 return tryUtf8; 00257 } 00258 } 00259 00260 QString decodeRFC2047String( const QByteArray &src ) 00261 { 00262 QByteArray usedCS; 00263 return decodeRFC2047String( src, usedCS, "utf-8", false ); 00264 } 00265 00266 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset, 00267 bool addressHeader, bool allow8BitHeaders ) 00268 { 00269 QByteArray encoded8Bit, result; 00270 int start=0, end=0; 00271 bool nonAscii=false, ok=true, useQEncoding=false; 00272 00273 // fromLatin1() is safe here, codecForName() uses toLatin1() internally 00274 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok ); 00275 00276 QByteArray usedCS; 00277 if ( !ok ) { 00278 //no codec available => try local8Bit and hope the best ;-) 00279 usedCS = KGlobal::locale()->encoding(); 00280 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok ); 00281 } 00282 else { 00283 Q_ASSERT( codec ); 00284 if ( charset.isEmpty() ) 00285 usedCS = codec->name(); 00286 else 00287 usedCS = charset; 00288 } 00289 00290 if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets 00291 useQEncoding = true; 00292 } 00293 00294 encoded8Bit = codec->fromUnicode( src ); 00295 00296 if ( allow8BitHeaders ) { 00297 return encoded8Bit; 00298 } 00299 00300 uint encoded8BitLength = encoded8Bit.length(); 00301 for ( unsigned int i=0; i<encoded8BitLength; i++ ) { 00302 if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries 00303 start = i + 1; 00304 } 00305 00306 // encode escape character, for japanese encodings... 00307 if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) || 00308 ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) { 00309 end = start; // non us-ascii char found, now we determine where to stop encoding 00310 nonAscii = true; 00311 break; 00312 } 00313 } 00314 00315 if ( nonAscii ) { 00316 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { 00317 // we encode complete words 00318 end++; 00319 } 00320 00321 for ( int x=end; x<encoded8Bit.length(); x++ ) { 00322 if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) || 00323 ( addressHeader && ( strchr("\"()<>@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) { 00324 end = x; // we found another non-ascii word 00325 00326 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { 00327 // we encode complete words 00328 end++; 00329 } 00330 } 00331 } 00332 00333 result = encoded8Bit.left( start ) + "=?" + usedCS; 00334 00335 if ( useQEncoding ) { 00336 result += "?Q?"; 00337 00338 char c, hexcode;// "Q"-encoding implementation described in RFC 2047 00339 for ( int i=start; i<end; i++ ) { 00340 c = encoded8Bit[i]; 00341 if ( c == ' ' ) { // make the result readable with not MIME-capable readers 00342 result += '_'; 00343 } else { 00344 if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems 00345 ( ( c >= 'A' ) && ( c <= 'Z' ) ) || // with "From" & "To" headers 00346 ( ( c >= '0' ) && ( c <= '9' ) ) ) { 00347 result += c; 00348 } else { 00349 result += '='; // "stolen" from KMail ;-) 00350 hexcode = ((c & 0xF0) >> 4) + 48; 00351 if ( hexcode >= 58 ) { 00352 hexcode += 7; 00353 } 00354 result += hexcode; 00355 hexcode = (c & 0x0F) + 48; 00356 if ( hexcode >= 58 ) { 00357 hexcode += 7; 00358 } 00359 result += hexcode; 00360 } 00361 } 00362 } 00363 } else { 00364 result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64(); 00365 } 00366 00367 result +="?="; 00368 result += encoded8Bit.right( encoded8Bit.length() - end ); 00369 } else { 00370 result = encoded8Bit; 00371 } 00372 00373 return result; 00374 } 00375 00376 00377 //----------------------------------------------------------------------------- 00378 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset ) 00379 { 00380 if ( str.isEmpty() ) 00381 return QByteArray(); 00382 00383 00384 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) ); 00385 QByteArray latin; 00386 if ( charset == "us-ascii" ) 00387 latin = str.toAscii(); 00388 else if ( codec ) 00389 latin = codec->fromUnicode( str ); 00390 else 00391 latin = str.toLocal8Bit(); 00392 00393 char *l; 00394 for ( l = latin.data(); *l; ++l ) { 00395 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) 00396 // *l is control character or 8-bit char 00397 break; 00398 } 00399 if ( !*l ) 00400 return latin; 00401 00402 QByteArray result = charset + "''"; 00403 for ( l = latin.data(); *l; ++l ) { 00404 bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' ); 00405 if( !needsQuoting ) { 00406 const QByteArray especials = "()<>@,;:\"/[]?.= \033"; 00407 int len = especials.length(); 00408 for ( int i = 0; i < len; i++ ) 00409 if ( *l == especials[i] ) { 00410 needsQuoting = true; 00411 break; 00412 } 00413 } 00414 if ( needsQuoting ) { 00415 result += '%'; 00416 unsigned char hexcode; 00417 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48; 00418 if ( hexcode >= 58 ) 00419 hexcode += 7; 00420 result += hexcode; 00421 hexcode = ( *l & 0x0F ) + 48; 00422 if ( hexcode >= 58 ) 00423 hexcode += 7; 00424 result += hexcode; 00425 } else { 00426 result += *l; 00427 } 00428 } 00429 return result; 00430 } 00431 00432 00433 //----------------------------------------------------------------------------- 00434 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS, 00435 bool forceCS ) 00436 { 00437 int p = str.indexOf('\''); 00438 if (p < 0) return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ))->toUnicode( str ); 00439 00440 00441 QByteArray charset = str.left(p); 00442 00443 QByteArray st = str.mid( str.lastIndexOf('\'') + 1 ); 00444 00445 char ch, ch2; 00446 p = 0; 00447 while (p < (int)st.length()) 00448 { 00449 if (st.at(p) == 37) 00450 { 00451 // Only try to decode the percent-encoded character if the percent sign 00452 // is really followed by two other characters, see testcase at bug 163024 00453 if ( p + 2 < st.length() ) { 00454 ch = st.at(p+1) - 48; 00455 if (ch > 16) 00456 ch -= 7; 00457 ch2 = st.at(p+2) - 48; 00458 if (ch2 > 16) 00459 ch2 -= 7; 00460 st[p] = ch * 16 + ch2; 00461 st.remove( p+1, 2 ); 00462 } 00463 } 00464 p++; 00465 } 00466 kDebug() << "Got pre-decoded:" << st; 00467 QString result; 00468 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) ); 00469 if ( !charsetcodec || forceCS ) 00470 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) ); 00471 00472 usedCS = charsetcodec->name(); 00473 return charsetcodec->toUnicode( st ); 00474 } 00475 00476 QString decodeRFC2231String( const QByteArray &src ) 00477 { 00478 QByteArray usedCS; 00479 return decodeRFC2231String( src, usedCS, "utf-8", false ); 00480 } 00481 00482 QByteArray uniqueString() 00483 { 00484 static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 00485 time_t now; 00486 char p[11]; 00487 int pos, ran; 00488 unsigned int timeval; 00489 00490 p[10] = '\0'; 00491 now = time( 0 ); 00492 ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0)); 00493 timeval = (now / ran) + getpid(); 00494 00495 for ( int i=0; i<10; i++ ) { 00496 pos = (int) (61.0*rand() / (RAND_MAX + 1.0)); 00497 //kDebug() << pos; 00498 p[i] = chars[pos]; 00499 } 00500 00501 QByteArray ret; 00502 ret.setNum( timeval ); 00503 ret += '.'; 00504 ret += p; 00505 00506 return ret; 00507 } 00508 00509 QByteArray multiPartBoundary() 00510 { 00511 return "nextPart" + uniqueString(); 00512 } 00513 00514 QByteArray unfoldHeader( const QByteArray &header ) 00515 { 00516 QByteArray result; 00517 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0; 00518 while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) { 00519 foldBegin = foldEnd = foldMid; 00520 // find the first space before the line-break 00521 while ( foldBegin > 0 ) { 00522 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) { 00523 break; 00524 } 00525 --foldBegin; 00526 } 00527 // find the first non-space after the line-break 00528 while ( foldEnd <= header.length() - 1 ) { 00529 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) { 00530 ++foldEnd; 00531 } 00532 else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' && 00533 header[foldEnd] == '=' && foldEnd + 2 < header.length() && 00534 ( ( header[foldEnd + 1] == '0' && 00535 header[foldEnd + 2] == '9' ) || 00536 ( header[foldEnd + 1] == '2' && 00537 header[foldEnd + 2] == '0' ) ) ) { 00538 // bug #86302: malformed header continuation starting with =09/=20 00539 foldEnd += 3; 00540 } 00541 else { 00542 break; 00543 } 00544 } 00545 00546 result += header.mid( pos, foldBegin - pos ); 00547 if ( foldEnd < header.length() -1 ) 00548 result += ' '; 00549 pos = foldEnd; 00550 } 00551 result += header.mid( pos, header.length() - pos ); 00552 return result; 00553 } 00554 00555 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded ) 00556 { 00557 int end = dataBegin; 00558 int len = src.length() - 1; 00559 00560 if ( folded ) 00561 *folded = false; 00562 00563 if ( dataBegin < 0 ) { 00564 // Not found 00565 return -1; 00566 } 00567 00568 if ( dataBegin > len ) { 00569 // No data available 00570 return len + 1; 00571 } 00572 00573 // If the first line contains nothing, but the next line starts with a space 00574 // or a tab, that means a stupid mail client has made the first header field line 00575 // entirely empty, and has folded the rest to the next line(s). 00576 if ( src.at(end) == '\n' && end + 1 < len && 00577 ( src[end+1] == ' ' || src[end+1] == '\t' ) ) { 00578 00579 // Skip \n and first whitespace 00580 dataBegin += 2; 00581 end += 2; 00582 } 00583 00584 if ( src.at(end) != '\n' ) { // check if the header is not empty 00585 while ( true ) { 00586 end = src.indexOf( '\n', end + 1 ); 00587 if ( end == -1 || end == len ) { 00588 // end of string 00589 break; 00590 } 00591 else if ( src[end+1] == ' ' || src[end+1] == '\t' || 00592 ( src[end+1] == '=' && end+3 <= len && 00593 ( ( src[end+2] == '0' && src[end+3] == '9' ) || 00594 ( src[end+2] == '2' && src[end+3] == '0' ) ) ) ) { 00595 // next line is header continuation or starts with =09/=20 (bug #86302) 00596 if ( folded ) 00597 *folded = true; 00598 } else { 00599 // end of header (no header continuation) 00600 break; 00601 } 00602 } 00603 } 00604 00605 if ( end < 0 ) { 00606 end = len + 1; //take the rest of the string 00607 } 00608 return end; 00609 } 00610 00611 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded ) 00612 { 00613 QByteArray n = name; 00614 n.append( ':' ); 00615 int begin = -1; 00616 00617 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) { 00618 begin = 0; 00619 } else { 00620 n.prepend('\n'); 00621 const char *p = strcasestr( src.constData(), n.constData() ); 00622 if ( !p ) { 00623 begin = -1; 00624 } else { 00625 begin = p - src.constData(); 00626 ++begin; 00627 } 00628 } 00629 00630 if ( begin > -1) { //there is a header with the given name 00631 dataBegin = begin + name.length() + 1; //skip the name 00632 // skip the usual space after the colon 00633 if ( src.at( dataBegin ) == ' ' ) { 00634 ++dataBegin; 00635 } 00636 end = findHeaderLineEnd( src, dataBegin, folded ); 00637 return begin; 00638 00639 } else { 00640 dataBegin = -1; 00641 return -1; //header not found 00642 } 00643 } 00644 00645 QByteArray extractHeader( const QByteArray &src, const QByteArray &name ) 00646 { 00647 int begin, end; 00648 bool folded; 00649 indexOfHeader( src, name, end, begin, &folded ); 00650 00651 if ( begin >= 0 ) { 00652 if ( !folded ) { 00653 return src.mid( begin, end - begin ); 00654 } else { 00655 QByteArray hdrValue = src.mid( begin, end - begin ); 00656 return unfoldHeader( hdrValue ); 00657 } 00658 } else { 00659 return QByteArray(); //header not found 00660 } 00661 } 00662 00663 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name ) 00664 { 00665 int begin, end; 00666 bool folded; 00667 QList<QByteArray> result; 00668 QByteArray copySrc( src ); 00669 00670 indexOfHeader( copySrc, name, end, begin, &folded ); 00671 while ( begin >= 0 ) { 00672 if ( !folded ) { 00673 result.append( copySrc.mid( begin, end - begin ) ); 00674 } else { 00675 QByteArray hdrValue = copySrc.mid( begin, end - begin ); 00676 result.append( unfoldHeader( hdrValue ) ); 00677 } 00678 00679 // get the next one, a tiny bit ugly, but we don't want the previous to be found again... 00680 copySrc = copySrc.mid( end ); 00681 indexOfHeader( copySrc, name, end, begin, &folded ); 00682 } 00683 00684 return result; 00685 } 00686 00687 void removeHeader( QByteArray &header, const QByteArray &name ) 00688 { 00689 int begin, end, dummy; 00690 begin = indexOfHeader( header, name, end, dummy ); 00691 if ( begin >= 0 ) { 00692 header.remove( begin, end - begin + 1 ); 00693 } 00694 } 00695 00696 QByteArray CRLFtoLF( const QByteArray &s ) 00697 { 00698 QByteArray ret = s; 00699 ret.replace( "\r\n", "\n" ); 00700 return ret; 00701 } 00702 00703 QByteArray CRLFtoLF( const char *s ) 00704 { 00705 QByteArray ret = s; 00706 return CRLFtoLF( ret ); 00707 } 00708 00709 QByteArray LFtoCRLF( const QByteArray &s ) 00710 { 00711 QByteArray ret = s; 00712 ret.replace( '\n', "\r\n" ); 00713 return ret; 00714 } 00715 00716 QByteArray LFtoCRLF( const char *s ) 00717 { 00718 QByteArray ret = s; 00719 return LFtoCRLF( ret ); 00720 } 00721 00722 namespace { 00723 template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str ) 00724 { 00725 bool inQuote = false; 00726 for ( int i = 0; i < str.length(); ++i ) { 00727 if ( str[i] == CharType( '"' ) ) { 00728 str.remove( i, 1 ); 00729 i--; 00730 inQuote = !inQuote; 00731 } else { 00732 if ( inQuote && ( str[i] == CharType( '\\' ) ) ) { 00733 str.remove( i, 1 ); 00734 } 00735 } 00736 } 00737 } 00738 } 00739 00740 void removeQuots( QByteArray &str ) 00741 { 00742 removeQuotesGeneric<QByteArray,char>( str ); 00743 } 00744 00745 void removeQuots( QString &str ) 00746 { 00747 removeQuotesGeneric<QString,QLatin1Char>( str ); 00748 } 00749 00750 template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString> 00751 void addQuotes_impl( StringType &str, bool forceQuotes ) 00752 { 00753 bool needsQuotes=false; 00754 for ( int i=0; i < str.length(); i++ ) { 00755 const CharType cur = str.at( i ); 00756 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) { 00757 needsQuotes = true; 00758 } 00759 if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) { 00760 str.insert( i, CharConverterType( '\\' ) ); 00761 i++; 00762 } 00763 } 00764 00765 if ( needsQuotes || forceQuotes ) { 00766 str.insert( 0, CharConverterType( '\"' ) ); 00767 str.append( StringConverterType( "\"" ) ); 00768 } 00769 } 00770 00771 void addQuotes( QByteArray &str, bool forceQuotes ) 00772 { 00773 addQuotes_impl<QByteArray,char,char,char*,QLatin1String>( str, forceQuotes ); 00774 } 00775 00776 void addQuotes( QString &str, bool forceQuotes ) 00777 { 00778 addQuotes_impl<QString,QChar,QLatin1Char,QLatin1String,QString>( str, forceQuotes ); 00779 } 00780 00781 KMIME_EXPORT QString balanceBidiState( const QString &input ) 00782 { 00783 const int LRO = 0x202D; 00784 const int RLO = 0x202E; 00785 const int LRE = 0x202A; 00786 const int RLE = 0x202B; 00787 const int PDF = 0x202C; 00788 00789 QString result = input; 00790 00791 int openDirChangers = 0; 00792 int numPDFsRemoved = 0; 00793 for ( int i = 0; i < input.length(); i++ ) { 00794 const ushort &code = input.at( i ).unicode(); 00795 if ( code == LRO || code == RLO || code == LRE || code == RLE ) { 00796 openDirChangers++; 00797 } 00798 else if ( code == PDF ) { 00799 if ( openDirChangers > 0 ) { 00800 openDirChangers--; 00801 } 00802 else { 00803 // One PDF too much, remove it 00804 kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input; 00805 result.remove( i - numPDFsRemoved, 1 ); 00806 numPDFsRemoved++; 00807 } 00808 } 00809 } 00810 00811 if ( openDirChangers > 0 ) { 00812 kWarning() << "Possible Unicode spoofing detected in" << input; 00813 00814 // At PDF chars to the end until the correct state is restored. 00815 // As a special exception, when encountering quoted strings, place the PDF before 00816 // the last quote. 00817 for ( int i = openDirChangers; i > 0; i-- ) { 00818 if ( result.endsWith( QLatin1Char( '"' ) ) ) 00819 result.insert( result.length() - 1, QChar( PDF ) ); 00820 else 00821 result += QChar( PDF ); 00822 } 00823 } 00824 00825 return result; 00826 } 00827 00828 QString removeBidiControlChars( const QString &input ) 00829 { 00830 const int LRO = 0x202D; 00831 const int RLO = 0x202E; 00832 const int LRE = 0x202A; 00833 const int RLE = 0x202B; 00834 QString result = input; 00835 result.remove( LRO ); 00836 result.remove( RLO ); 00837 result.remove( LRE ); 00838 result.remove( RLE ); 00839 return result; 00840 } 00841 00842 static bool isCryptoPart( Content* content ) 00843 { 00844 if( !content->contentType( false ) ) 00845 return false; 00846 00847 if( content->contentType()->subType().toLower() == "octet-stream" && 00848 !content->contentDisposition( false ) ) 00849 return false; 00850 00851 const Headers::ContentType *contentType = content->contentType(); 00852 const QByteArray lowerSubType = contentType->subType().toLower(); 00853 return ( contentType->mediaType().toLower() == "application" && 00854 ( lowerSubType == "pgp-encrypted" || 00855 lowerSubType == "pgp-signature" || 00856 lowerSubType == "pkcs7-mime" || 00857 lowerSubType == "pkcs7-signature" || 00858 lowerSubType == "x-pkcs7-signature" || 00859 ( lowerSubType == "octet-stream" && 00860 content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) ); 00861 } 00862 00863 bool hasAttachment( Content* content ) 00864 { 00865 if( !content ) 00866 return false; 00867 00868 bool emptyFilename = true; 00869 if( content->contentDisposition( false ) && !content->contentDisposition()->filename().isEmpty() ) 00870 emptyFilename = false; 00871 00872 if( emptyFilename && content->contentType( false ) && !content->contentType()->name().isEmpty() ) 00873 emptyFilename = false; 00874 00875 // ignore crypto parts 00876 if( !emptyFilename && !isCryptoPart( content ) ) 00877 return true; 00878 00879 // Ok, content itself is not an attachment. now we deal with multiparts 00880 if( content->contentType()->isMultipart() ) { 00881 Q_FOREACH( Content* child, content->contents() ) { 00882 if( hasAttachment( child ) ) 00883 return true; 00884 } 00885 } 00886 00887 return false; 00888 } 00889 00890 bool isSigned( Message *message ) 00891 { 00892 if ( !message ) 00893 return false; 00894 00895 const KMime::Headers::ContentType* const contentType = message->contentType(); 00896 if ( contentType->isSubtype( "signed" ) || 00897 contentType->isSubtype( "pgp-signature" ) || 00898 contentType->isSubtype( "pkcs7-signature" ) || 00899 contentType->isSubtype( "x-pkcs7-signature" ) || 00900 message->mainBodyPart( "multipart/signed" ) || 00901 message->mainBodyPart( "application/pgp-signature" ) || 00902 message->mainBodyPart( "application/pkcs7-signature" ) || 00903 message->mainBodyPart( "application/x-pkcs7-signature" ) ) { 00904 return true; 00905 } 00906 00907 return false; 00908 } 00909 00910 bool isEncrypted( Message *message ) 00911 { 00912 if ( !message ) 00913 return false; 00914 00915 const KMime::Headers::ContentType* const contentType = message->contentType(); 00916 if ( contentType->isSubtype( "encrypted" ) || 00917 contentType->isSubtype( "pgp-encrypted" ) || 00918 contentType->isSubtype( "pkcs7-mime" ) || 00919 message->mainBodyPart( "multipart/encrypted" ) || 00920 message->mainBodyPart( "application/pgp-encrypted" ) || 00921 message->mainBodyPart( "application/pkcs7-mime" ) ) { 00922 return true; 00923 } 00924 00925 return false; 00926 } 00927 00928 bool isInvitation( Content *content ) 00929 { 00930 if ( !content ) 00931 return false; 00932 00933 const KMime::Headers::ContentType* const contentType = content->contentType( false ); 00934 00935 if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) ) 00936 return true; 00937 00938 return false; 00939 } 00940 00941 } // namespace KMime