00001 //--------------------------------------------------------------------------
00002 // Copyright (C) 2014-2017 Cisco and/or its affiliates. All rights reserved.
00003 //
00004 // This program is free software; you can redistribute it and/or modify it
00005 // under the terms of the GNU General Public License Version 2 as published
00006 // by the Free Software Foundation. You may not use, modify or distribute
00007 // this program under any other version of the GNU General Public License.
00008 //
00009 // This program is distributed in the hope that it will be useful, but
00010 // WITHOUT ANY WARRANTY; without even the implied warranty of
00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00012 // General Public License for more details.
00013 //
00014 // You should have received a copy of the GNU General Public License along
00015 // with this program; if not, write to the Free Software Foundation, Inc.,
00016 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017 //--------------------------------------------------------------------------
00018 // http_stream_splitter_reassemble.cc author Tom Peters <thopeter@cisco.com>
00019
00020 #ifdef HAVE_CONFIG_H
00021 #include "config.h"
00022 #endif
00023
00024 #include "protocols/packet.h"
00025
00026 #include "http_inspect.h"
00027 #include "http_stream_splitter.h"
00028 #include "http_test_input.h"
00029
00030 using namespace HttpEnums;
00031
00032 void HttpStreamSplitter::chunk_spray(HttpFlowData* session_data, uint8_t* buffer,
00033 const uint8_t* data, unsigned length) const
00034 {
00035 ChunkState& curr_state = session_data->chunk_state[source_id];
00036 uint32_t& expected = session_data->chunk_expected_length[source_id];
00037 bool& is_broken_chunk = session_data->is_broken_chunk[source_id];
00038 uint32_t& num_good_chunks = session_data->num_good_chunks[source_id];
00039
00040 if (is_broken_chunk && (num_good_chunks == 0))
00041 curr_state = CHUNK_BAD;
00042
00043 for (int32_t k=0; k < static_cast<int32_t>(length); k++)
00044 {
00045 switch (curr_state)
00046 {
00047 case CHUNK_NEWLINES:
00048 if (!is_cr_lf[data[k]])
00049 {
00050 curr_state = CHUNK_NUMBER;
00051 k--;
00052 }
00053 break;
00054 case CHUNK_ZEROS:
00055 case CHUNK_NUMBER:
00056 // CHUNK_ZEROS is not a distinct state in reassemble(). Here to avoid compiler warning.
00057 if (data[k] == '\r')
00058 curr_state = CHUNK_HCRLF;
00059 else if (data[k] == '\n')
00060 {
00061 curr_state = CHUNK_HCRLF;
00062 k--;
00063 }
00064 else if (data[k] == ';')
00065 curr_state = CHUNK_OPTIONS;
00066 else if (is_sp_tab[data[k]])
00067 curr_state = CHUNK_WHITESPACE;
00068 else
00069 expected = expected * 16 + as_hex[data[k]];
00070 break;
00071 case CHUNK_OPTIONS:
00072 case CHUNK_WHITESPACE:
00073 // No practical difference between white space and options in reassemble()
00074 if (data[k] == '\r')
00075 curr_state = CHUNK_HCRLF;
00076 else if (data[k] == '\n')
00077 {
00078 curr_state = CHUNK_HCRLF;
00079 k--;
00080 }
00081 break;
00082 case CHUNK_HCRLF:
00083 if (expected > 0)
00084 curr_state = CHUNK_DATA;
00085 else
00086 {
00087 // Terminating zero-length chunk
00088 assert(k+1 == static_cast<int32_t>(length));
00089 curr_state = CHUNK_NEWLINES;
00090 }
00091 break;
00092 case CHUNK_DATA:
00093 {
00094 const uint32_t skip_amount = (length-k <= expected) ? length-k : expected;
00095 const bool at_start = (session_data->body_octets[source_id] == 0) &&
00096 (session_data->section_offset[source_id] == 0);
00097 decompress_copy(buffer, session_data->section_offset[source_id], data+k, skip_amount,
00098 session_data->compression[source_id], session_data->compress_stream[source_id],
00099 at_start, session_data->get_infractions(source_id),
00100 session_data->get_events(source_id));
00101 if ((expected -= skip_amount) == 0)
00102 curr_state = CHUNK_DCRLF1;
00103 k += skip_amount-1;
00104 break;
00105 }
00106 case CHUNK_DCRLF1:
00107 curr_state = CHUNK_DCRLF2;
00108 if (data[k] == '\n')
00109 k--;
00110 break;
00111 case CHUNK_DCRLF2:
00112 if (is_broken_chunk && (--num_good_chunks == 0))
00113 curr_state = CHUNK_BAD;
00114 else
00115 {
00116 curr_state = CHUNK_NEWLINES;
00117 expected = 0;
00118 }
00119 if (!is_cr_lf[data[k]])
00120 k--;
00121 break;
00122 case CHUNK_BAD:
00123 {
00124 const uint32_t skip_amount = length-k;
00125 const bool at_start = (session_data->body_octets[source_id] == 0) &&
00126 (session_data->section_offset[source_id] == 0);
00127 decompress_copy(buffer, session_data->section_offset[source_id], data+k, skip_amount,
00128 session_data->compression[source_id], session_data->compress_stream[source_id],
00129 at_start, session_data->get_infractions(source_id),
00130 session_data->get_events(source_id));
00131 k += skip_amount-1;
00132 break;
00133 }
00134 }
00135 }
00136 }
00137
00138 void HttpStreamSplitter::decompress_copy(uint8_t* buffer, uint32_t& offset, const uint8_t* data,
00139 uint32_t length, HttpEnums::CompressId& compression, z_stream*& compress_stream,
00140 bool at_start, HttpInfractions* infractions, HttpEventGen* events)
00141 {
00142 if ((compression == CMP_GZIP) || (compression == CMP_DEFLATE))
00143 {
00144 compress_stream->next_in = (Bytef*)data;
00145 compress_stream->avail_in = length;
00146 compress_stream->next_out = buffer + offset;
00147 compress_stream->avail_out = MAX_OCTETS - offset;
00148 int ret_val = inflate(compress_stream, Z_SYNC_FLUSH);
00149
00150 if ((ret_val == Z_OK) || (ret_val == Z_STREAM_END))
00151 {
00152 offset = MAX_OCTETS - compress_stream->avail_out;
00153 if (compress_stream->avail_in > 0)
00154 {
00155 // There are two ways not to consume all the input
00156 if (ret_val == Z_STREAM_END)
00157 {
00158 // The zipped data stream ended but there is more input data
00159 *infractions += INF_GZIP_EARLY_END;
00160 events->create_event(EVENT_GZIP_FAILURE);
00161 const uInt num_copy =
00162 (compress_stream->avail_in <= compress_stream->avail_out) ?
00163 compress_stream->avail_in : compress_stream->avail_out;
00164 memcpy(buffer + offset, data, num_copy);
00165 offset += num_copy;
00166 }
00167 else
00168 {
00169 assert(compress_stream->avail_out == 0);
00170 // The data expanded too much
00171 *infractions += INF_GZIP_OVERRUN;
00172 events->create_event(EVENT_GZIP_OVERRUN);
00173 }
00174 compression = CMP_NONE;
00175 inflateEnd(compress_stream);
00176 delete compress_stream;
00177 compress_stream = nullptr;
00178 }
00179 return;
00180 }
00181 else if ((compression == CMP_DEFLATE) && at_start && (ret_val == Z_DATA_ERROR))
00182 {
00183 // Some incorrect implementations of deflate don't use the expected header. Feed a
00184 // dummy header to zlib and retry the inflate.
00185 static constexpr char zlib_header[2] = { 0x78, 0x01 };
00186
00187 inflateReset(compress_stream);
00188 compress_stream->next_in = (Bytef*)zlib_header;
00189 compress_stream->avail_in = sizeof(zlib_header);
00190 inflate(compress_stream, Z_SYNC_FLUSH);
00191
00192 // Start over at the beginning
00193 decompress_copy(buffer, offset, data, length, compression, compress_stream, false,
00194 infractions, events);
00195 return;
00196 }
00197 else
00198 {
00199 *infractions += INF_GZIP_FAILURE;
00200 events->create_event(EVENT_GZIP_FAILURE);
00201 compression = CMP_NONE;
00202 inflateEnd(compress_stream);
00203 delete compress_stream;
00204 compress_stream = nullptr;
00205 // Since we failed to uncompress the data, fall through
00206 }
00207 }
00208
00209 // The following precaution is necessary because mixed compressed and uncompressed data can
00210 // cause the buffer to overrun even though we are not decompressing right now
00211 if (length > MAX_OCTETS - offset)
00212 {
00213 length = MAX_OCTETS - offset;
00214 *infractions += INF_GZIP_OVERRUN;
00215 events->create_event(EVENT_GZIP_OVERRUN);
00216 }
00217 memcpy(buffer + offset, data, length);
00218 offset += length;
00219 }
00220
00221 const StreamBuffer HttpStreamSplitter::reassemble(Flow* flow, unsigned total, unsigned,
00222 const uint8_t* data, unsigned len, uint32_t flags, unsigned& copied)
00223 {
00224 StreamBuffer http_buf { nullptr, 0 };
00225
00226 copied = len;
00227
00228 HttpFlowData* session_data = (HttpFlowData*)flow->get_flow_data(HttpFlowData::inspector_id);
00229 assert(session_data != nullptr);
00230
00231 #ifdef REG_TEST
00232 if (HttpTestManager::use_test_output())
00233 {
00234 if (HttpTestManager::use_test_input())
00235 {
00236 if (!(flags & PKT_PDU_TAIL))
00237 {
00238 return http_buf;
00239 }
00240 bool tcp_close;
00241 uint8_t* test_buffer;
00242 HttpTestManager::get_test_input_source()->reassemble(&test_buffer, len, source_id,
00243 tcp_close);
00244 if (tcp_close)
00245 {
00246 finish(flow);
00247 }
00248 if (test_buffer == nullptr)
00249 {
00250 // Source ID does not match test data, no test data was flushed, or there is no
00251 // more test data
00252 return http_buf;
00253 }
00254 data = test_buffer;
00255 total = len;
00256 }
00257 else
00258 {
00259 printf("Reassemble from flow data %" PRIu64 " direction %d total %u length %u\n",
00260 session_data->seq_num, source_id, total, len);
00261 fflush(stdout);
00262 }
00263 }
00264 #endif
00265
00266 // Sometimes it is necessary to reassemble zero bytes when a connection is closing to trigger
00267 // proper clean up. But even a zero-length buffer cannot be processed with a nullptr lest we
00268 // get in trouble with memcpy() (undefined behavior) or some library.
00269 assert((data != nullptr) || (len == 0));
00270 if (data == nullptr)
00271 data = (const uint8_t*)"";
00272
00273 // FIXIT-H Workaround for TP Bug 149662
00274 if (session_data->section_type[source_id] == SEC__NOT_COMPUTE)
00275 {
00276 return { nullptr, 0 };
00277 }
00278
00279 assert(session_data->section_type[source_id] != SEC__NOT_COMPUTE);
00280 assert(total <= MAX_OCTETS);
00281
00282 session_data->running_total[source_id] += len;
00283 assert(session_data->running_total[source_id] <= total);
00284
00285 // FIXIT-P stream should be enhanced to do discarding for us. For now flush-then-discard here
00286 // is how scan() handles things we don't need to examine.
00287 if (session_data->section_type[source_id] == SEC_DISCARD)
00288 {
00289 #ifdef REG_TEST
00290 if (HttpTestManager::use_test_output())
00291 {
00292 fprintf(HttpTestManager::get_output_file(), "Discarded %u octets\n\n", len);
00293 fflush(HttpTestManager::get_output_file());
00294 }
00295 #endif
00296 if (flags & PKT_PDU_TAIL)
00297 {
00298 assert(session_data->running_total[source_id] == total);
00299 assert(
00300 (session_data->octets_expected[source_id] == total) ||
00301 (!session_data->strict_length[source_id] &&
00302 (total <= session_data->octets_expected[source_id])));
00303 session_data->running_total[source_id] = 0;
00304 session_data->section_type[source_id] = SEC__NOT_COMPUTE;
00305
00306 // When we are skipping through a message body beyond flow depth this is the end of
00307 // the line. Here we do the message section's normal job of updating the flow for the
00308 // next stage.
00309 if (session_data->cutter[source_id] == nullptr)
00310 {
00311 if (session_data->type_expected[source_id] == SEC_BODY_CL)
00312 {
00313 session_data->half_reset(source_id);
00314 }
00315 else if (session_data->type_expected[source_id] == SEC_BODY_CHUNK)
00316 {
00317 session_data->trailer_prep(source_id);
00318 }
00319 }
00320 }
00321 return http_buf;
00322 }
00323
00324 HttpModule::increment_peg_counts(PEG_REASSEMBLE);
00325
00326 const bool is_body = (session_data->section_type[source_id] == SEC_BODY_CHUNK) ||
00327 (session_data->section_type[source_id] == SEC_BODY_CL) ||
00328 (session_data->section_type[source_id] == SEC_BODY_OLD);
00329 uint8_t*& buffer = session_data->section_buffer[source_id];
00330 if (buffer == nullptr)
00331 {
00332 // Body sections need extra space to accommodate unzipping
00333 if (is_body)
00334 buffer = new uint8_t[MAX_OCTETS];
00335 else
00336 buffer = new uint8_t[total];
00337 session_data->section_total[source_id] = total;
00338 }
00339 else
00340 assert(session_data->section_total[source_id] == total);
00341
00342 if (session_data->section_type[source_id] != SEC_BODY_CHUNK)
00343 {
00344 const bool at_start = (session_data->body_octets[source_id] == 0) &&
00345 (session_data->section_offset[source_id] == 0);
00346 decompress_copy(buffer, session_data->section_offset[source_id], data, len,
00347 session_data->compression[source_id], session_data->compress_stream[source_id],
00348 at_start, session_data->get_infractions(source_id),
00349 session_data->get_events(source_id));
00350 }
00351 else
00352 {
00353 chunk_spray(session_data, buffer, data, len);
00354 }
00355
00356 if (flags & PKT_PDU_TAIL)
00357 {
00358 uint32_t& running_total = session_data->running_total[source_id];
00359 assert(running_total == total);
00360 assert((session_data->octets_expected[source_id] == total) ||
00361 (!session_data->strict_length[source_id] &&
00362 (total <= session_data->octets_expected[source_id])));
00363 running_total = 0;
00364 const uint16_t buf_size =
00365 session_data->section_offset[source_id] - session_data->num_excess[source_id];
00366
00367 // FIXIT-M kludge until we work out issues with returning an empty buffer
00368 http_buf.data = buffer;
00369 if (buf_size > 0)
00370 {
00371 http_buf.length = buf_size;
00372 session_data->zero_byte_workaround[source_id] = false;
00373 }
00374 else
00375 {
00376 buffer[0] = '\0';
00377 http_buf.length = 1;
00378 session_data->zero_byte_workaround[source_id] = true;
00379 }
00380 buffer = nullptr;
00381 session_data->section_offset[source_id] = 0;
00382 }
00383 return http_buf;
00384 }
00385
END OF CODE