00001 //--------------------------------------------------------------------------
00002 // Copyright (C) 2014-2017 Cisco and/or its affiliates. All rights reserved.
00003 //
00004 // This program is free software; you can redistribute it and/or modify it
00005 // under the terms of the GNU General Public License Version 2 as published
00006 // by the Free Software Foundation. You may not use, modify or distribute
00007 // this program under any other version of the GNU General Public License.
00008 //
00009 // This program is distributed in the hope that it will be useful, but
00010 // WITHOUT ANY WARRANTY; without even the implied warranty of
00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00012 // General Public License for more details.
00013 //
00014 // You should have received a copy of the GNU General Public License along
00015 // with this program; if not, write to the Free Software Foundation, Inc.,
00016 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017 //--------------------------------------------------------------------------
00018 // http_cutter.cc author Tom Peters <thopeter@cisco.com>
00019
00020 #ifdef HAVE_CONFIG_H
00021 #include "config.h"
00022 #endif
00023
00024 #include "http_cutter.h"
00025
00026 using namespace HttpEnums;
00027
00028 ScanResult HttpStartCutter::cut(const uint8_t* buffer, uint32_t length,
00029 HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t)
00030 {
00031 for (uint32_t k = 0; k < length; k++)
00032 {
00033 // Discard magic six white space characters CR, LF, Tab, VT, FF, and SP when they occur
00034 // before the start line.
00035 // If we have seen nothing but white space so far ...
00036 if (num_crlf == octets_seen + k)
00037 {
00038 if (is_sp_tab_cr_lf_vt_ff[buffer[k]])
00039 {
00040 if (!is_cr_lf[buffer[k]])
00041 {
00042 // tab, VT, FF, or space between messages
00043 *infractions += INF_WS_BETWEEN_MSGS;
00044 events->create_event(EVENT_WS_BETWEEN_MSGS);
00045 }
00046 if (num_crlf < MAX_LEADING_WHITESPACE)
00047 {
00048 num_crlf++;
00049 continue;
00050 }
00051 else
00052 {
00053 *infractions += INF_TOO_MUCH_LEADING_WS;
00054 events->generate_misformatted_http(buffer, length);
00055 return SCAN_ABORT;
00056 }
00057 }
00058 if (num_crlf > 0)
00059 {
00060 num_flush = k; // current octet not flushed with white space
00061 return SCAN_DISCARD;
00062 }
00063 }
00064
00065 // If we get this far then the leading white space issue is behind us and num_crlf was
00066 // reset to zero
00067 if (!validated)
00068 {
00069 // The purpose of validate() is to quickly and efficiently dispose of obviously wrong
00070 // bindings. Passing is no guarantee that the connection is really HTTP, but failing
00071 // makes it clear that it isn't.
00072 switch (validate(buffer[k], infractions, events))
00073 {
00074 case V_GOOD:
00075 validated = true;
00076 break;
00077 case V_BAD:
00078 *infractions += INF_NOT_HTTP;
00079 events->generate_misformatted_http(buffer, length);
00080 return SCAN_ABORT;
00081 case V_TBD:
00082 break;
00083 }
00084 }
00085 if (buffer[k] == '\n')
00086 {
00087 num_crlf++;
00088 if (num_crlf == 1)
00089 {
00090 // There was no CR before this
00091 *infractions += INF_LF_WITHOUT_CR;
00092 events->create_event(EVENT_LF_WITHOUT_CR);
00093 }
00094 num_flush = k+1;
00095 return SCAN_FOUND;
00096 }
00097 if (num_crlf == 1)
00098 { // CR not followed by LF
00099 *infractions += INF_CR_WITHOUT_LF;
00100 events->create_event(EVENT_CR_WITHOUT_LF);
00101 num_flush = k; // current octet not flushed
00102 return SCAN_FOUND;
00103 }
00104 if (buffer[k] == '\r')
00105 {
00106 num_crlf = 1;
00107 }
00108 }
00109 octets_seen += length;
00110 return SCAN_NOTFOUND;
00111 }
00112
00113 HttpStartCutter::ValidationResult HttpRequestCutter::validate(uint8_t octet, HttpInfractions*,
00114 HttpEventGen*)
00115 {
00116 // Request line must begin with a method. There is no list of all possible methods because
00117 // extension is allowed, so there is no absolute way to tell whether something is a method.
00118 // Instead we verify that all its characters are drawn from the RFC list of valid token
00119 // characters, that it is followed by a whitespace character, and that it is at most 80
00120 // characters long. There is nothing special or specified about 80. It is just more than any
00121 // reasonable method name would be.
00122
00123 static const int max_method_length = 80;
00124
00125 if ((octet == ' ') || (octet == '\t'))
00126 return V_GOOD;
00127 if (!token_char[octet] || ++octets_checked > max_method_length)
00128 return V_BAD;
00129 return V_TBD;
00130 }
00131
00132 HttpStartCutter::ValidationResult HttpStatusCutter::validate(uint8_t octet,
00133 HttpInfractions* infractions, HttpEventGen* events)
00134 {
00135 // Status line must begin "HTTP/"
00136 static const int match_size = 5;
00137 static const uint8_t primary_match[match_size] = { 'H', 'T', 'T', 'P', '/' };
00138 static const uint8_t secondary_match[match_size] = { 'h', 't', 't', 'p', '/' };
00139
00140 if (octet != primary_match[octets_checked])
00141 {
00142 if (octet == secondary_match[octets_checked])
00143 {
00144 // Lower case is wrong but we can still parse the message
00145 *infractions += INF_VERSION_NOT_UPPERCASE;
00146 events->create_event(EVENT_VERSION_NOT_UPPERCASE);
00147 }
00148 else
00149 return V_BAD;
00150 }
00151 if (++octets_checked >= match_size)
00152 return V_GOOD;
00153 return V_TBD;
00154 }
00155
00156 ScanResult HttpHeaderCutter::cut(const uint8_t* buffer, uint32_t length,
00157 HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t)
00158 {
00159 // Header separators: leading \r\n, leading \n, nonleading \r\n\r\n, nonleading \n\r\n,
00160 // nonleading \r\n\n, and nonleading \n\n. The separator itself becomes num_excess which is
00161 // discarded during reassemble().
00162 // \r without \n can (improperly) end the start line or a header line, but not the entire
00163 // header block.
00164 for (uint32_t k = 0; k < length; k++)
00165 {
00166 switch (state)
00167 {
00168 case ZERO:
00169 if (buffer[k] == '\r')
00170 {
00171 state = HALF;
00172 num_crlf++;
00173 }
00174 else if (buffer[k] == '\n')
00175 {
00176 *infractions += INF_LF_WITHOUT_CR;
00177 events->create_event(EVENT_LF_WITHOUT_CR);
00178 state = ONE;
00179 num_crlf++;
00180 }
00181 break;
00182 case HALF:
00183 if (buffer[k] == '\r')
00184 {
00185 *infractions += INF_CR_WITHOUT_LF;
00186 events->create_event(EVENT_CR_WITHOUT_LF);
00187 state = THREEHALF;
00188 num_crlf++;
00189 }
00190 else if (buffer[k] == '\n')
00191 {
00192 state = ONE;
00193 num_crlf++;
00194 }
00195 else
00196 {
00197 *infractions += INF_CR_WITHOUT_LF;
00198 events->create_event(EVENT_CR_WITHOUT_LF);
00199 state = ZERO;
00200 num_crlf = 0;
00201 num_head_lines++;
00202 }
00203 break;
00204 case ONE:
00205 if (buffer[k] == '\r')
00206 {
00207 state = THREEHALF;
00208 num_crlf++;
00209 }
00210 else if (buffer[k] == '\n')
00211 {
00212 *infractions += INF_LF_WITHOUT_CR;
00213 events->create_event(EVENT_LF_WITHOUT_CR);
00214 num_crlf++;
00215 num_flush = k + 1;
00216 return SCAN_FOUND;
00217 }
00218 else
00219 {
00220 state = ZERO;
00221 num_crlf = 0;
00222 num_head_lines++;
00223 }
00224 break;
00225 case THREEHALF:
00226 if (buffer[k] == '\r')
00227 {
00228 *infractions += INF_CR_WITHOUT_LF;
00229 events->create_event(EVENT_CR_WITHOUT_LF);
00230 num_crlf++;
00231 }
00232 else if (buffer[k] == '\n')
00233 {
00234 num_crlf++;
00235 num_flush = k + 1;
00236 return SCAN_FOUND;
00237 }
00238 else
00239 {
00240 *infractions += INF_CR_WITHOUT_LF;
00241 events->create_event(EVENT_CR_WITHOUT_LF);
00242 state = ZERO;
00243 num_crlf = 0;
00244 num_head_lines++;
00245 }
00246 break;
00247 }
00248 }
00249 octets_seen += length;
00250 return SCAN_NOTFOUND;
00251 }
00252
00253 ScanResult HttpBodyClCutter::cut(const uint8_t*, uint32_t length, HttpInfractions*,
00254 HttpEventGen*, uint32_t flow_target, uint32_t flow_max)
00255 {
00256 assert(remaining > 0);
00257
00258 // Are we skipping to the next message?
00259 if (flow_target == 0)
00260 {
00261 if (remaining <= length)
00262 {
00263 num_flush = remaining;
00264 remaining = 0;
00265 return SCAN_DISCARD;
00266 }
00267 else
00268 {
00269 num_flush = length;
00270 remaining -= num_flush;
00271 return SCAN_DISCARD_PIECE;
00272 }
00273 }
00274
00275 // The normal body section size is flow_target. But if there are only flow_max or less
00276 // remaining we take the whole thing rather than leave a small final section.
00277 if (remaining <= flow_max)
00278 {
00279 num_flush = remaining;
00280 remaining = 0;
00281 return SCAN_FOUND;
00282 }
00283 else
00284 {
00285 num_flush = flow_target;
00286 remaining -= num_flush;
00287 return SCAN_FOUND_PIECE;
00288 }
00289 }
00290
00291 ScanResult HttpBodyOldCutter::cut(const uint8_t*, uint32_t, HttpInfractions*, HttpEventGen*,
00292 uint32_t flow_target, uint32_t)
00293 {
00294 if (flow_target == 0)
00295 {
00296 // With other types of body we could skip to the next message now. But this body will run
00297 // to connection close so we just stop.
00298 return SCAN_END;
00299 }
00300
00301 num_flush = flow_target;
00302 return SCAN_FOUND_PIECE;
00303 }
00304
00305 ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
00306 HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t)
00307 {
00308 // Are we skipping through the rest of this chunked body to the trailers and the next message?
00309 const bool discard_mode = (flow_target == 0);
00310
00311 if (new_section)
00312 {
00313 new_section = false;
00314 octets_seen = 0;
00315 num_good_chunks = 0;
00316 }
00317
00318 for (int32_t k=0; k < static_cast<int32_t>(length); k++)
00319 {
00320 switch (curr_state)
00321 {
00322 case CHUNK_NEWLINES:
00323 // Looking for improper CRLFs before the chunk header
00324 if (is_cr_lf[buffer[k]])
00325 {
00326 *infractions += INF_CHUNK_BAD_SEP;
00327 events->create_event(EVENT_CHUNK_BAD_SEP);
00328 break;
00329 }
00330 curr_state = CHUNK_ZEROS;
00331 k--; // Reprocess this octet in the next state
00332 break;
00333 case CHUNK_ZEROS:
00334 // Looking for leading zeros in the chunk size.
00335 if (buffer[k] == '0')
00336 {
00337 num_zeros++;
00338 if (num_zeros == 5)
00339 {
00340 *infractions += INF_CHUNK_ZEROS;
00341 events->create_event(EVENT_CHUNK_ZEROS);
00342 }
00343 break;
00344 }
00345 curr_state = CHUNK_NUMBER;
00346 k--;
00347 break;
00348 case CHUNK_NUMBER:
00349 // Reading the chunk size
00350 if (buffer[k] == '\r')
00351 {
00352 curr_state = CHUNK_HCRLF;
00353 }
00354 else if (buffer[k] == '\n')
00355 {
00356 *infractions += INF_CHUNK_BARE_LF;
00357 events->create_event(EVENT_CHUNK_BARE_LF);
00358 curr_state = CHUNK_HCRLF;
00359 k--;
00360 }
00361 else if (is_sp_tab[buffer[k]])
00362 {
00363 *infractions += INF_CHUNK_WHITESPACE;
00364 events->create_event(EVENT_CHUNK_WHITESPACE);
00365 curr_state = CHUNK_WHITESPACE;
00366 }
00367 else if (buffer[k] == ';')
00368 {
00369 *infractions += INF_CHUNK_OPTIONS;
00370 events->create_event(EVENT_CHUNK_OPTIONS);
00371 curr_state = CHUNK_OPTIONS;
00372 }
00373 else if (as_hex[buffer[k]] == -1)
00374 {
00375 // illegal character present in chunk length
00376 *infractions += INF_CHUNK_BAD_CHAR;
00377 events->create_event(EVENT_BROKEN_CHUNK);
00378 curr_state = CHUNK_BAD;
00379 }
00380 else
00381 {
00382 expected = expected * 16 + as_hex[buffer[k]];
00383 if (++digits_seen > 8)
00384 {
00385 // overflow protection: must fit into 32 bits
00386 *infractions += INF_CHUNK_TOO_LARGE;
00387 events->create_event(EVENT_BROKEN_CHUNK);
00388 curr_state = CHUNK_BAD;
00389 }
00390 }
00391 break;
00392 case CHUNK_WHITESPACE:
00393 // Skipping over improper whitespace following the chunk size
00394 if (buffer[k] == '\r')
00395 {
00396 curr_state = CHUNK_HCRLF;
00397 }
00398 else if (buffer[k] == '\n')
00399 {
00400 *infractions += INF_CHUNK_BARE_LF;
00401 events->create_event(EVENT_CHUNK_BARE_LF);
00402 curr_state = CHUNK_HCRLF;
00403 k--;
00404 }
00405 else if (buffer[k] == ';')
00406 {
00407 *infractions += INF_CHUNK_OPTIONS;
00408 events->create_event(EVENT_CHUNK_OPTIONS);
00409 curr_state = CHUNK_OPTIONS;
00410 }
00411 else if (!is_sp_tab[buffer[k]])
00412 {
00413 // illegal character present in chunk length
00414 *infractions += INF_CHUNK_BAD_CHAR;
00415 events->create_event(EVENT_BROKEN_CHUNK);
00416 curr_state = CHUNK_BAD;
00417 }
00418 break;
00419 case CHUNK_OPTIONS:
00420 // The RFC permits options to follow the chunk size. No one normally does this.
00421 if (buffer[k] == '\r')
00422 {
00423 curr_state = CHUNK_HCRLF;
00424 }
00425 else if (buffer[k] == '\n')
00426 {
00427 *infractions += INF_CHUNK_BARE_LF;
00428 events->create_event(EVENT_CHUNK_BARE_LF);
00429 curr_state = CHUNK_HCRLF;
00430 k--;
00431 }
00432 break;
00433 case CHUNK_HCRLF:
00434 // The chunk header should end in CRLF and this should be the LF
00435 if (buffer[k] != '\n')
00436 {
00437 // This is qualitatively different from similar bare CR issues because it doesn't
00438 // provide a transparent data channel. A recipient is much less likely to implement
00439 // tolerance for this irregularity because a chunk that begins with LF is
00440 // ambiguous.
00441 *infractions += INF_CHUNK_LONE_CR;
00442 events->create_event(EVENT_BROKEN_CHUNK);
00443 curr_state = CHUNK_BAD;
00444 break;
00445 }
00446 if (expected > 0)
00447 {
00448 curr_state = CHUNK_DATA;
00449 }
00450 else if (num_zeros > 0)
00451 {
00452 // Terminating zero-length chunk
00453 num_good_chunks++;
00454 num_flush = k+1;
00455 return !discard_mode ? SCAN_FOUND : SCAN_DISCARD;
00456 }
00457 else
00458 {
00459 *infractions += INF_CHUNK_NO_LENGTH;
00460 events->create_event(EVENT_BROKEN_CHUNK);
00461 curr_state = CHUNK_BAD;
00462 }
00463 break;
00464 case CHUNK_DATA:
00465 // Moving through the chunk data
00466 {
00467 uint32_t skip_amount = (length-k <= expected) ? length-k : expected;
00468 if (!discard_mode && (skip_amount > flow_target-data_seen))
00469 { // Do not exceed requested section size
00470 skip_amount = flow_target-data_seen;
00471 }
00472 k += skip_amount - 1;
00473 if ((expected -= skip_amount) == 0)
00474 {
00475 curr_state = CHUNK_DCRLF1;
00476 }
00477 if ((data_seen += skip_amount) == flow_target)
00478 {
00479 // FIXIT-M need to randomize slice point
00480 data_seen = 0;
00481 num_flush = k+1;
00482 new_section = true;
00483 return SCAN_FOUND_PIECE;
00484 }
00485 break;
00486 }
00487 case CHUNK_DCRLF1:
00488 // The CR from the end-of-chunk CRLF should be here
00489 if (buffer[k] == '\r')
00490 {
00491 curr_state = CHUNK_DCRLF2;
00492 }
00493 else if (buffer[k] == '\n')
00494 {
00495 *infractions += INF_CHUNK_BAD_SEP;
00496 events->create_event(EVENT_CHUNK_BAD_SEP);
00497 curr_state = CHUNK_DCRLF2;
00498 k--;
00499 }
00500 else
00501 {
00502 *infractions += INF_CHUNK_BAD_END;
00503 events->create_event(EVENT_BROKEN_CHUNK);
00504 curr_state = CHUNK_BAD;
00505 }
00506 break;
00507 case CHUNK_DCRLF2:
00508 // The LF from the end-of-chunk CRLF should be here
00509 num_good_chunks++;
00510 num_zeros = 0;
00511 expected = 0;
00512 digits_seen = 0;
00513 curr_state = CHUNK_NEWLINES;
00514 if (buffer[k] == '\n')
00515 break;
00516 *infractions += INF_CHUNK_BAD_SEP;
00517 events->create_event(EVENT_CHUNK_BAD_SEP);
00518 if (buffer[k] != '\r')
00519 k--;
00520 break;
00521 case CHUNK_BAD:
00522 // Chunk reassembly has failed. This is a terminal state but inspection of the body
00523 // must go on.
00524 // If we are skipping to the trailers and next message the broken chunk thwarts us
00525 if (discard_mode)
00526 {
00527 return SCAN_ABORT;
00528 }
00529 uint32_t skip_amount = length-k;
00530 skip_amount = (skip_amount <= flow_target-data_seen) ? skip_amount :
00531 flow_target-data_seen;
00532 k += skip_amount - 1;
00533 if ((data_seen += skip_amount) == flow_target)
00534 {
00535 // FIXIT-M need to randomize slice point
00536 data_seen = 0;
00537 num_flush = k+1;
00538 new_section = true;
00539 return SCAN_FOUND_PIECE;
00540 }
00541 break;
00542 }
00543 }
00544 if (discard_mode)
00545 {
00546 num_flush = length;
00547 return SCAN_DISCARD_PIECE;
00548 }
00549 octets_seen += length;
00550 return SCAN_NOTFOUND;
00551 }
00552
END OF CODE