00001 //--------------------------------------------------------------------------
00002 // Copyright (C) 2014-2017 Cisco and/or its affiliates. All rights reserved.
00003 // Copyright (C) 2005-2013 Sourcefire, Inc.
00004 //
00005 // This program is free software; you can redistribute it and/or modify it
00006 // under the terms of the GNU General Public License Version 2 as published
00007 // by the Free Software Foundation. You may not use, modify or distribute
00008 // this program under any other version of the GNU General Public License.
00009 //
00010 // This program is distributed in the hope that it will be useful, but
00011 // WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00013 // General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU General Public License along
00016 // with this program; if not, write to the Free Software Foundation, Inc.,
00017 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 //--------------------------------------------------------------------------
00019 /*
00020 ** bnfa_search.c
00021 **
00022 ** Basic multi-pattern search engine using Aho-Corasick NFA construction.
00023 **
00024 ** Version 3.0 (based on acsmx.c and acsmx2.c)
00025 **
00026 ** author: marc norton
00027 ** date: started 12/21/05
00028 **
00029 ** General Design
00030 ** Aho-Corasick based NFA state machine.
00031 ** Compacted sparse storage mode for better performance.
00032 ** Up to 16 Million states + transitions (combined) in compacted sparse mode.
00033 **
00034 ** ** Compacted sparse array storage **
00035 **
00036 ** The primary data is held in one array.
00037 ** The patterns themselves are stored separately.
00038 ** The matching lists of patterns for each state are stored separately as well.
00039 ** The compacted sparse format improves caching/performance.
00040 **
00041 ** word 1 : state ( only low 24 bits are used )
00042 ** word 2 : control word = cb << 24 | fs
00043 ** cb: control byte
00044 ** cb = mb | fb | nt
00045 ** mb : 8th bit - if set state has matching patterns bit
00046 ** fb : 7th bit - if set full storage array bit (256 entries used),
00047 else sparse
00048 ** nt : 0-63= number of transitions (more than 63 requires full storage)
00049 ** fs: 24 bits for failure state transition index.
00050 ** word 3+ : transition word = input<<24 | next-state-index
00051 ** input : 8 bit character, input to state machine from search text
00052 ** next-state-index: 24 bits for index of next state
00053 ** (if we really need 16M states, we can add a state->index lookup array)
00054 ** ...repeat for each state ...
00055 **
00056 ** * if a state is empty it has words 1 and 2, but no transition words.
00057 **
00058 ** Construction:
00059 **
00060 ** Patterns are added to a list based trie.
00061 ** The list based trie is compiled into a list based NFA with failure states.
00062 ** The list based NFA is converted to full or sparse format NFA.
00063 ** The Zero'th state sparse transitions may be stored in full format for
00064 ** performance.
00065 ** Sparse transition arrays are searched using linear and binary search
00066 ** strategies depending on the number of entries to search through in
00067 ** each state.
00068 ** The state machine in sparse mode is compacted into a single vector for
00069 ** better performance.
00070 **
00071 ** Notes:
00072 **
00073 ** The NFA can require twice the state transitions that a DFA uses. However,
00074 ** the construction of a DFA generates many additional transitions in each
00075 ** state which consumes significant additional memory. This particular
00076 ** implementation is best suited to environments where the very large memory
00077 ** requirements of a full state table implementation is not possible and/or
00078 ** the speed trade off is warranted to maintain a small memory footprint.
00079 **
00080 ** Each state of an NFA usually has very few transitions but can have up to
00081 ** 256. It is important to not degenerate into a linear search so we utilize
00082 ** a binary search if there are more than 5 elements in the state to test for
00083 ** a match. This allows us to use a simple sparse memory design with an
00084 ** acceptable worst case search scenario. The binary search over 256 elements
00085 ** is limited to a max of 8 tests. The zero'th state may use a full 256 state
00086 ** array, so a quick index lookup provides the next state transition. The
00087 ** zero'th state is generally visited much more than other states.
00088 **
00089 ** Compiling : gcc, Intel C/C++, Microsoft C/C++, each optimize differently.
00090 ** My studies have shown Intel C/C++ 9,8,7 to be the fastest, Microsoft 8,7,6
00091 ** is next fastest, and gcc 4.x,3.x,2.x is the slowest of the three. My
00092 ** testing has been mainly on x86. In general gcc does a poor job with
00093 ** optimizing this state machine for performance, compared to other less cache
00094 ** and prefetch sensitive algorithms. I've documented this behavior in a
00095 ** paper 'Optimizing Pattern Matching for IDS' (www.sourcefire.com,
00096 ** www.idsresearch.org).
00097 **
00098 ** The code is sensitive to cache optimization and prefetching, as well as
00099 ** instruction pipelining. Aren't we all. To this end, the number of
00100 ** patterns, length of search text, and cpu cache L1,L2,L3 all affect
00101 ** performance. The relative performance of the sparse and full format NFA and
00102 ** DFA varies as you vary the pattern characteristics, and search text length,
00103 ** but strong performance trends are present and stable.
00104 **
00105 **
00106 ** BNFA API SUMMARY
00107 **
00108 ** bnfa=bnfaNew(); create a state machine
00109 ** bnfaAddPattern(bnfa,..); add a pattern to the state machine
00110 ** bnfaCompile (bnfa,..) compile the state machine
00111 ** bnfaPrintInfo(bnfa); print memory usage and state info
00112 ** bnfaPrint(bnfa); print the state machine in total
00113 ** state=bnfaSearch(bnfa, ...,state); search a data buffer for a pattern match
00114 ** bnfaFree (bnfa); free the bnfa
00115 **
00116 **
00117 ** Reference - Efficient String matching: An Aid to Bibliographic Search
00118 ** Alfred V Aho and Margaret J Corasick
00119 ** Bell Laboratories
00120 ** Copyright (C) 1975 Association for Computing Machinery,Inc
00121 **
00122 ** 12/4/06 - man - modified summary
00123 ** 6/26/07 - man - Added last_match tracking, and accounted for nocase/case by
00124 ** presetting the last match state, and reverting if we fail the
00125 ** case memcmp test for any rule in the states matching rule
00126 ** list. The states in the default matcher represent either
00127 ** case or nocase states, so they are dual mode, that makes
00128 ** this a bit tricky. When we sue the pure exact match, or
00129 ** pure don't care matching routines, we just track the last
00130 ** state, and never need to revert. This only tracks the
00131 ** single repeated states and repeated data.
00132 ** 01/2008 - man - added 2 phase pattern matcher using a pattern match queue.
00133 ** Text is scanned and matching states are queued, duplicate
00134 ** matches are dropped, and after the complete buffer scan the
00135 ** queued matches are processed. This improves caching
00136 ** performance, and reduces duplicate rule processing. The
00137 ** queue is limited in size and is flushed if it becomes full
00138 ** during the scan. This allows simple insertions. Tracking
00139 ** queue ops is optional, as this can impose a modest
00140 ** performance hit of a few percent.
00141 */
00142
00143 #ifdef HAVE_CONFIG_H
00144 #include "config.h"
00145 #endif
00146
00147 #include "bnfa_search.h"
00148
00149 #include <list>
00150
00151 #include "log/messages.h"
00152 #include "utils/stats.h"
00153 #include "utils/util.h"
00154
00155 /*
00156 * Used to initialize last state, states are limited to 0-16M
00157 * so this will not conflict.
00158 */
00159 #define LAST_STATE_INIT 0xffffffff
00160
00161 #define printf LogMessage
00162
00163 /*
00164 * Case Translation Table - this guarantees we use
00165 * indexed lookups for case conversion
00166 */
00167 static unsigned xlatinit = 1;
00168 static uint8_t xlatcase[BNFA_MAX_ALPHABET_SIZE];
00169
00170 void bnfa_init_xlatcase()
00171 {
00172 if ( !xlatinit )
00173 return;
00174
00175 for (int i=0; i<BNFA_MAX_ALPHABET_SIZE; i++)
00176 {
00177 xlatcase[i] = (uint8_t)toupper(i);
00178 }
00179 xlatinit = 0;
00180 }
00181
00182 /*
00183 * Custom memory allocator
00184 */
00185 static void* bnfa_alloc(int n, int* m)
00186 {
00187 if ( !n )
00188 return nullptr;
00189
00190 void* p = snort_calloc(n);
00191
00192 if (m)
00193 m[0] += n;
00194
00195 return p;
00196 }
00197
00198 static void bnfa_free(void* p, int n, int* m)
00199 {
00200 if ( p )
00201 {
00202 snort_free(p);
00203 if (m)
00204 {
00205 m[0] -= n;
00206 }
00207 }
00208 }
00209
00210 #define BNFA_MALLOC(n,memory) (bnfa_state_t*)bnfa_alloc(n,&(memory))
00211 #define BNFA_FREE(p,n,memory) bnfa_free(p,n,&(memory))
00212
00213 /*
00214 * Get next state from transition list
00215 */
00216 static int _bnfa_list_get_next_state(bnfa_struct_t* bnfa, int state, int input)
00217 {
00218 if ( state == 0 ) /* Full set of states always */
00219 {
00220 bnfa_state_t* p = (bnfa_state_t*)bnfa->bnfaTransTable[0];
00221 if (!p)
00222 {
00223 return 0;
00224 }
00225 return p[input];
00226 }
00227 else
00228 {
00229 bnfa_trans_node_t* t = bnfa->bnfaTransTable[state];
00230 while ( t )
00231 {
00232 if ( t->key == (unsigned)input )
00233 {
00234 return t->next_state;
00235 }
00236 t=t->next;
00237 }
00238 return BNFA_FAIL_STATE; /* Fail state */
00239 }
00240 }
00241
00242 /*
00243 * Put next state - head insertion, and transition updates
00244 */
00245 static int _bnfa_list_put_next_state(bnfa_struct_t* bnfa, int state, int input, int next_state)
00246 {
00247 if ( state >= bnfa->bnfaMaxStates )
00248 {
00249 return -1;
00250 }
00251
00252 if ( input >= bnfa->bnfaAlphabetSize )
00253 {
00254 return -1;
00255 }
00256
00257 if ( state == 0 )
00258 {
00259 bnfa_state_t* p;
00260
00261 p = (bnfa_state_t*)bnfa->bnfaTransTable[0];
00262 if ( !p )
00263 {
00264 p = (bnfa_state_t*)BNFA_MALLOC(sizeof(bnfa_state_t)*bnfa->bnfaAlphabetSize,
00265 bnfa->list_memory);
00266 if ( !p )
00267 {
00268 return -1;
00269 }
00270
00271 bnfa->bnfaTransTable[0] = (bnfa_trans_node_t*)p;
00272 }
00273 if ( p[input] )
00274 {
00275 p[input] = next_state;
00276 return 0;
00277 }
00278 p[input] = next_state;
00279 }
00280 else
00281 {
00282 bnfa_trans_node_t* p;
00283 bnfa_trans_node_t* tnew;
00284
00285 /* Check if the transition already exists, if so just update the next_state */
00286 p = bnfa->bnfaTransTable[state];
00287 while ( p )
00288 {
00289 if ( p->key == (unsigned)input ) /* transition already exists- reset the next state
00290 */
00291 {
00292 p->next_state = next_state;
00293 return 0;
00294 }
00295 p=p->next;
00296 }
00297
00298 /* Definitely not an existing transition - add it */
00299 tnew = (bnfa_trans_node_t*)BNFA_MALLOC(sizeof(bnfa_trans_node_t),bnfa->list_memory);
00300 if ( !tnew )
00301 {
00302 return -1;
00303 }
00304
00305 tnew->key = input;
00306 tnew->next_state = next_state;
00307 tnew->next = bnfa->bnfaTransTable[state];
00308
00309 bnfa->bnfaTransTable[state] = tnew;
00310 }
00311
00312 bnfa->bnfaNumTrans++;
00313
00314 return 0;
00315 }
00316
00317 /*
00318 * Free the entire transition list table
00319 */
00320 static int _bnfa_list_free_table(bnfa_struct_t* bnfa)
00321 {
00322 int i;
00323 bnfa_trans_node_t* t, * p;
00324
00325 if ( !bnfa->bnfaTransTable )
00326 return 0;
00327
00328 if ( bnfa->bnfaTransTable[0] )
00329 {
00330 BNFA_FREE(bnfa->bnfaTransTable[0],sizeof(bnfa_state_t)*bnfa->bnfaAlphabetSize,
00331 bnfa->list_memory);
00332 }
00333
00334 for (i=1; i<bnfa->bnfaMaxStates; i++)
00335 {
00336 t = bnfa->bnfaTransTable[i];
00337
00338 while ( t )
00339 {
00340 p = t;
00341 t = t->next;
00342 BNFA_FREE(p,sizeof(bnfa_trans_node_t),bnfa->list_memory);
00343 }
00344 }
00345
00346 if ( bnfa->bnfaTransTable )
00347 {
00348 BNFA_FREE(bnfa->bnfaTransTable,sizeof(bnfa_trans_node_t*)*bnfa->bnfaMaxStates,
00349 bnfa->list_memory);
00350 bnfa->bnfaTransTable = nullptr;
00351 }
00352
00353 return 0;
00354 }
00355
00356 static void bnfaBuildMatchStateTrees(SnortConfig* sc, bnfa_struct_t* bnfa)
00357 {
00358 bnfa_match_node_t* mn;
00359 bnfa_match_node_t** MatchList = bnfa->bnfaMatchList;
00360 bnfa_pattern_t* patrn;
00361
00362 for (int i=0; i<bnfa->bnfaNumStates; i++)
00363 {
00364 for (mn = MatchList[i]; mn!= nullptr; mn = mn->next )
00365 {
00366 patrn = (bnfa_pattern_t*)mn->data;
00367
00368 if (patrn->userdata)
00369 {
00370 if (patrn->negative)
00371 {
00372 bnfa->agent->negate_list(patrn->userdata, &MatchList[i]->neg_list);
00373 }
00374 else
00375 {
00376 bnfa->agent->build_tree(sc, patrn->userdata, &MatchList[i]->rule_option_tree);
00377 }
00378 }
00379 }
00380
00381 /* Last call to finalize the tree */
00382 if (MatchList[i])
00383 {
00384 bnfa->agent->build_tree(sc, nullptr, &MatchList[i]->rule_option_tree);
00385 }
00386 }
00387 }
00388
00389 #ifdef ALLOW_LIST_PRINT
00390 /*
00391 * Print the transition list table to stdout
00392 */
00393 static int _bnfa_list_print_table(bnfa_struct_t* bnfa)
00394 {
00395 int i;
00396 bnfa_trans_node_t* t;
00397 bnfa_match_node_t* mn;
00398 bnfa_pattern_t* patrn;
00399
00400 if ( !bnfa->bnfaTransTable )
00401 {
00402 return 0;
00403 }
00404
00405 printf("Print Transition Table- %d active states\n",bnfa->bnfaNumStates);
00406
00407 for (i=0; i< bnfa->bnfaNumStates; i++)
00408 {
00409 printf("state %3d: ",i);
00410
00411 if ( i == 0 )
00412 {
00413 int k;
00414 bnfa_state_t* p = (bnfa_state_t*)bnfa->bnfaTransTable[0];
00415 if (!p)
00416 continue;
00417
00418 for (k=0; k<bnfa->bnfaAlphabetSize; k++)
00419 {
00420 if ( p[k] == 0 )
00421 continue;
00422
00423 if ( isascii((int)p[k]) && isprint((int)p[k]) )
00424 printf("%3c->%-5d\t",k,p[k]);
00425 else
00426 printf("%3d->%-5d\t",k,p[k]);
00427 }
00428 }
00429 else
00430 {
00431 t = bnfa->bnfaTransTable[i];
00432 while ( t )
00433 {
00434 if ( isascii((int)t->key) && isprint((int)t->key) )
00435 printf("%3c->%-5d\t",t->key,t->next_state);
00436 else
00437 printf("%3d->%-5d\t",t->key,t->next_state);
00438 t = t->next;
00439 }
00440 }
00441 mn =bnfa->bnfaMatchList[i];
00442 while ( mn )
00443 {
00444 patrn =(bnfa_pattern_t*)mn->data;
00445 printf("%.*s ",patrn->n,patrn->casepatrn);
00446 mn = mn->next;
00447 }
00448 printf("\n");
00449 }
00450 return 0;
00451 }
00452
00453 #endif
00454 /*
00455 * Converts a single row of states from list format to a full format
00456 */
00457 static int _bnfa_list_conv_row_to_full(bnfa_struct_t* bnfa, bnfa_state_t state, bnfa_state_t* full)
00458 {
00459 if ( (int)state >= bnfa->bnfaMaxStates ) /* protects 'full' against overflow */
00460 {
00461 return -1;
00462 }
00463
00464 if ( state == 0 )
00465 {
00466 if ( bnfa->bnfaTransTable[0] )
00467 memcpy(full,bnfa->bnfaTransTable[0],sizeof(bnfa_state_t)*bnfa->bnfaAlphabetSize);
00468 else
00469 memset(full,0,sizeof(bnfa_state_t)*bnfa->bnfaAlphabetSize);
00470
00471 return bnfa->bnfaAlphabetSize;
00472 }
00473 else
00474 {
00475 int tcnt = 0;
00476
00477 bnfa_trans_node_t* t = bnfa->bnfaTransTable[ state ];
00478
00479 memset(full,0,sizeof(bnfa_state_t)*bnfa->bnfaAlphabetSize);
00480
00481 if ( !t )
00482 {
00483 return 0;
00484 }
00485
00486 while (t && (t->key < BNFA_MAX_ALPHABET_SIZE ) )
00487 {
00488 full[ t->key ] = t->next_state;
00489 tcnt++;
00490 t = t->next;
00491 }
00492 return tcnt;
00493 }
00494 }
00495
00496 /*
00497 * Add pattern characters to the initial upper case trie
00498 * unless Exact has been specified, in which case all patterns
00499 * are assumed to be case specific.
00500 */
00501 static int _bnfa_add_pattern_states(bnfa_struct_t* bnfa, bnfa_pattern_t* p)
00502 {
00503 int state, next, n;
00504 uint8_t* pattern;
00505 bnfa_match_node_t* pmn;
00506
00507 n = p->n;
00508 pattern = p->casepatrn;
00509 state = 0;
00510
00511 /*
00512 * Match up pattern with existing states
00513 */
00514 for (; n > 0; pattern++, n--)
00515 {
00516 if ( bnfa->bnfaCaseMode == BNFA_CASE )
00517 next = _bnfa_list_get_next_state(bnfa,state,*pattern);
00518 else
00519 next = _bnfa_list_get_next_state(bnfa,state,xlatcase[*pattern]);
00520
00521 if ( next == (int)BNFA_FAIL_STATE || next == 0 )
00522 {
00523 break;
00524 }
00525 state = next;
00526 }
00527
00528 /*
00529 * Add new states for the rest of the pattern bytes, 1 state per byte, uppercase
00530 */
00531 for (; n > 0; pattern++, n--)
00532 {
00533 bnfa->bnfaNumStates++;
00534
00535 if ( bnfa->bnfaCaseMode == BNFA_CASE )
00536 {
00537 if ( _bnfa_list_put_next_state(bnfa,state,*pattern,bnfa->bnfaNumStates) < 0 )
00538 return -1;
00539 }
00540 else
00541 {
00542 if ( _bnfa_list_put_next_state(bnfa,state,xlatcase[*pattern],bnfa->bnfaNumStates) <
00543 0 )
00544 return -1;
00545 }
00546 state = bnfa->bnfaNumStates;
00547
00548 if ( bnfa->bnfaNumStates >= bnfa->bnfaMaxStates )
00549 {
00550 return -1;
00551 }
00552 }
00553
00554 /* Add a pattern to the list of patterns terminated at this state */
00555 pmn = (bnfa_match_node_t*)BNFA_MALLOC(sizeof(bnfa_match_node_t),bnfa->matchlist_memory);
00556 if ( !pmn )
00557 {
00558 return -1;
00559 }
00560
00561 pmn->data = p;
00562 pmn->next = bnfa->bnfaMatchList[state];
00563
00564 bnfa->bnfaMatchList[state] = pmn;
00565
00566 return 0;
00567 }
00568
00569 #ifdef XXXXX
00570 int _bnfa_list_get_next_state(bnfa_struct_t* bnfa, int state, int input)
00571 {
00572 if ( state == 0 ) /* Full set of states always */
00573 {
00574 bnfa_state_t* p = (bnfa_state_t*)bnfa->bnfaTransTable[0];
00575 if (!p)
00576 {
00577 return 0;
00578 }
00579 return p[input];
00580 }
00581 else
00582 {
00583 bnfa_trans_node_t* t = bnfa->bnfaTransTable[state];
00584 while ( t )
00585 {
00586 if ( t->key == (unsigned)input )
00587 {
00588 return t->next_state;
00589 }
00590 t=t->next;
00591 }
00592 return BNFA_FAIL_STATE; /* Fail state */
00593 }
00594 }
00595
00596 #endif
00597 /* used only by KcontainsJ() */
00598 static int _bnfa_conv_node_to_full(bnfa_trans_node_t* t, bnfa_state_t* full)
00599 {
00600 int tcnt = 0;
00601
00602 memset(full,0,sizeof(bnfa_state_t)*BNFA_MAX_ALPHABET_SIZE);
00603
00604 if ( !t )
00605 {
00606 return 0;
00607 }
00608
00609 while (t && (t->key < BNFA_MAX_ALPHABET_SIZE ) )
00610 {
00611 full[ t->key ] = t->next_state;
00612 tcnt++;
00613 t = t->next;
00614 }
00615 return tcnt;
00616 }
00617
00618 /*
00619 * containment test -
00620 * test if all of tj transitions are in tk
00621 */
00622 #ifdef XXXX
00623 static int KcontainsJx(bnfa_trans_node_t* tk, bnfa_trans_node_t* tj)
00624 {
00625 bnfa_trans_node_t* t;
00626 int found;
00627
00628 while ( tj )
00629 {
00630 found=0;
00631 for ( t=tk; t; t=t->next )
00632 {
00633 if ( tj->key == t->key )
00634 {
00635 found=1;
00636 break;
00637 }
00638 }
00639 if ( !found )
00640 return 0;
00641
00642 tj=tj->next; /* get next tj key */
00643 }
00644 return 1;
00645 }
00646
00647 #endif
00648 static int KcontainsJ(bnfa_trans_node_t* tk, bnfa_trans_node_t* tj)
00649 {
00650 bnfa_state_t full[BNFA_MAX_ALPHABET_SIZE];
00651
00652 if ( !_bnfa_conv_node_to_full(tk,full) )
00653 return 1; /* empty state */
00654
00655 while ( tj )
00656 {
00657 if ( !full[tj->key] )
00658 return 0;
00659
00660 tj=tj->next; /* get next tj key */
00661 }
00662 return 1;
00663 }
00664
00665 /*
00666 * 1st optimization - eliminate duplicate fail states
00667 *
00668 * check if a fail state is a subset of the current state,
00669 * if so recurse to the next fail state, and so on.
00670 */
00671 static int _bnfa_opt_nfa(bnfa_struct_t* bnfa)
00672 {
00673 #if 0
00674 int cnt=0;
00675 #endif
00676 int k, fs, fr;
00677 bnfa_state_t* FailState = bnfa->bnfaFailState;
00678
00679 for (k=2; k<bnfa->bnfaNumStates; k++)
00680 {
00681 fr = fs = FailState[k];
00682 while ( fs && KcontainsJ(bnfa->bnfaTransTable[k],bnfa->bnfaTransTable[fs]) )
00683 {
00684 fs = FailState[fs];
00685 }
00686 if ( fr != fs )
00687 {
00688 #if 0
00689 cnt++;
00690 #endif
00691 FailState[ k ] = fs;
00692 }
00693 }
00694 #if 0
00695 if ( cnt)
00696 LogMessage("ac-bnfa: %d nfa optimizations found in %d states\n",cnt,bnfa->bnfaNumStates);
00697 #endif
00698 return 0;
00699 }
00700
00701 /*
00702 * Build a non-deterministic finite automata using Aho-Corasick construction
00703 * The keyword trie must already be built via _bnfa_add_pattern_states()
00704 */
00705 static int _bnfa_build_nfa(bnfa_struct_t* bnfa)
00706 {
00707 bnfa_state_t* FailState = bnfa->bnfaFailState;
00708 bnfa_match_node_t** MatchList = bnfa->bnfaMatchList;
00709 bnfa_match_node_t* mlist;
00710 bnfa_match_node_t* px;
00711
00712 std::list<int> queue;
00713
00714 /* Add the state 0 transitions 1st,
00715 * the states at depth 1, fail to state 0
00716 */
00717 for (int i = 0; i < bnfa->bnfaAlphabetSize; i++)
00718 {
00719 /* note that state zero deos not fail,
00720 * it just returns 0..nstates-1
00721 */
00722 int s = _bnfa_list_get_next_state(bnfa,0,i);
00723 if ( s ) /* don't bother adding state zero */
00724 {
00725 queue.push_back(s);
00726 FailState[s] = 0;
00727 }
00728 }
00729
00730 /* Build the fail state successive layer of transitions */
00731 for ( auto r : queue )
00732 {
00733 /* Find Final States for any Failure */
00734 for (int i = 0; i<bnfa->bnfaAlphabetSize; i++)
00735 {
00736 int fs, next;
00737
00738 int s = _bnfa_list_get_next_state(bnfa,r,i);
00739
00740 if ( s == (int)BNFA_FAIL_STATE )
00741 continue;
00742
00743 queue.push_back(s);
00744
00745 fs = FailState[r];
00746
00747 /*
00748 * Locate the next valid state for 'i' starting at fs
00749 */
00750 while ( (next=_bnfa_list_get_next_state(bnfa,fs,i)) == (int)BNFA_FAIL_STATE )
00751 {
00752 fs = FailState[fs];
00753 }
00754
00755 /*
00756 * Update 's' state failure state to point to the next valid state
00757 */
00758 FailState[s] = next;
00759
00760 /*
00761 * Copy 'next' states MatchList into 's' states MatchList,
00762 * we just create a new list nodes, the patterns are not copied.
00763 */
00764 for ( mlist = MatchList[next]; mlist; mlist = mlist->next)
00765 {
00766 /* Dup the node, don't copy the data */
00767 px = (bnfa_match_node_t*)BNFA_MALLOC(sizeof(bnfa_match_node_t),
00768 bnfa->matchlist_memory);
00769 if ( !px )
00770 {
00771 return 0;
00772 }
00773
00774 px->data = mlist->data;
00775
00776 px->next = MatchList[s]; /* insert at head */
00777
00778 MatchList[s] = px;
00779 }
00780 }
00781 }
00782
00783 /* optimize the failure states */
00784 if ( bnfa->bnfaOpt )
00785 _bnfa_opt_nfa(bnfa);
00786
00787 return 0;
00788 }
00789
00790 #ifdef ALLOW_NFA_FULL
00791 /*
00792 * Convert state machine to full format
00793 */
00794 static int _bnfa_conv_list_to_full(bnfa_struct_t* bnfa)
00795 {
00796 int k;
00797 bnfa_state_t* p;
00798 bnfa_state_t** NextState = bnfa->bnfaNextState;
00799
00800 for (k=0; k<bnfa->bnfaNumStates; k++)
00801 {
00802 p = BNFA_MALLOC(sizeof(bnfa_state_t)*bnfa->bnfaAlphabetSize,bnfa->nextstate_memory);
00803 if (!p)
00804 {
00805 return -1;
00806 }
00807 _bnfa_list_conv_row_to_full(bnfa, (bnfa_state_t)k, p);
00808
00809 NextState[k] = p; /* now we have a full format row vector */
00810 }
00811
00812 return 0;
00813 }
00814
00815 #endif
00816
00817 /*
00818 * Convert state machine to csparse format
00819 *
00820 * Merges state/transition/failure arrays into one.
00821 *
00822 * For each state we use a state-word followed by the transition list for
00823 * the state sw(state 0 )...tl(state 0) sw(state 1)...tl(state1) sw(state2)...
00824 * tl(state2) ....
00825 *
00826 * The transition and failure states are replaced with the start index of
00827 * transition state, this eliminates the NextState[] lookup....
00828 *
00829 * The compaction of multiple arrays into a single array reduces the total
00830 * number of states that can be handled since the max index is 2^24-1,
00831 * whereas without compaction we had 2^24-1 states.
00832 */
00833 static int _bnfa_conv_list_to_csparse_array(bnfa_struct_t* bnfa)
00834 {
00835 int m, k, i, nc;
00836 bnfa_state_t state;
00837 bnfa_state_t* FailState = (bnfa_state_t*)bnfa->bnfaFailState;
00838 bnfa_state_t* ps; /* transition list */
00839 bnfa_state_t* pi; /* state indexes into ps */
00840 bnfa_state_t ps_index=0;
00841 unsigned nps;
00842 bnfa_state_t full[BNFA_MAX_ALPHABET_SIZE];
00843
00844 /* count total state transitions, account for state and control words */
00845 nps = 0;
00846 for (k=0; k<bnfa->bnfaNumStates; k++)
00847 {
00848 nps++; /* state word */
00849 nps++; /* control word */
00850
00851 /* count transitions */
00852 nc = 0;
00853 _bnfa_list_conv_row_to_full(bnfa, (bnfa_state_t)k, full);
00854 for ( i=0; i<bnfa->bnfaAlphabetSize; i++ )
00855 {
00856 state = full[i] & BNFA_SPARSE_MAX_STATE;
00857 if ( state != 0 )
00858 {
00859 nc++;
00860 }
00861 }
00862
00863 /* add in transition count */
00864 if ( (k == 0 && bnfa->bnfaForceFullZeroState) || nc > BNFA_SPARSE_MAX_ROW_TRANSITIONS )
00865 {
00866 nps += BNFA_MAX_ALPHABET_SIZE;
00867 }
00868 else
00869 {
00870 for ( i=0; i<bnfa->bnfaAlphabetSize; i++ )
00871 {
00872 state = full[i] & BNFA_SPARSE_MAX_STATE;
00873 if ( state != 0 )
00874 {
00875 nps++;
00876 }
00877 }
00878 }
00879 }
00880
00881 /* check if we have too many states + transitions */
00882 if ( nps > BNFA_SPARSE_MAX_STATE )
00883 {
00884 /* Fatal */
00885 return -1;
00886 }
00887
00888 /*
00889 Alloc The Transition List - we need an array of bnfa_state_t items of size 'nps'
00890 */
00891 ps = BNFA_MALLOC(nps*sizeof(bnfa_state_t),bnfa->nextstate_memory);
00892 if ( !ps )
00893 {
00894 /* Fatal */
00895 return -1;
00896 }
00897 bnfa->bnfaTransList = ps;
00898
00899 /*
00900 State Index list for pi - we need an array of bnfa_state_t items of size 'NumStates'
00901 */
00902 pi = BNFA_MALLOC(bnfa->bnfaNumStates*sizeof(bnfa_state_t),bnfa->nextstate_memory);
00903 if ( !pi )
00904 {
00905 /* Fatal */
00906 return -1;
00907 }
00908
00909 /*
00910 Build the Transition List Array
00911 */
00912 for (k=0; k<bnfa->bnfaNumStates; k++)
00913 {
00914 pi[k] = ps_index; /* save index of start of state 'k' */
00915
00916 ps[ ps_index ] = k; /* save the state were in as the 1st word */
00917
00918 ps_index++; /* skip past state word */
00919
00920 /* convert state 'k' to full format */
00921 _bnfa_list_conv_row_to_full(bnfa, (bnfa_state_t)k, full);
00922
00923 /* count transitions */
00924 nc = 0;
00925 for ( i=0; i<bnfa->bnfaAlphabetSize; i++ )
00926 {
00927 state = full[i] & BNFA_SPARSE_MAX_STATE;
00928 if ( state != 0 )
00929 {
00930 nc++;
00931 }
00932 }
00933
00934 /* add a full state or a sparse state */
00935 if ( (k == 0 && bnfa->bnfaForceFullZeroState) ||
00936 nc > BNFA_SPARSE_MAX_ROW_TRANSITIONS )
00937 {
00938 /* set the control word */
00939 ps[ps_index] = BNFA_SPARSE_FULL_BIT;
00940 ps[ps_index] |= FailState[k] & BNFA_SPARSE_MAX_STATE;
00941 if ( bnfa->bnfaMatchList[k] )
00942 {
00943 ps[ps_index] |= BNFA_SPARSE_MATCH_BIT;
00944 }
00945 ps_index++;
00946
00947 /* copy the transitions */
00948 _bnfa_list_conv_row_to_full(bnfa, (bnfa_state_t)k, &ps[ps_index]);
00949
00950 ps_index += BNFA_MAX_ALPHABET_SIZE; /* add in 256 transitions */
00951 }
00952 else
00953 {
00954 /* set the control word */
00955 ps[ps_index] = nc<<BNFA_SPARSE_COUNT_SHIFT;
00956 ps[ps_index] |= FailState[k]&BNFA_SPARSE_MAX_STATE;
00957 if ( bnfa->bnfaMatchList[k] )
00958 {
00959 ps[ps_index] |= BNFA_SPARSE_MATCH_BIT;
00960 }
00961 ps_index++;
00962
00963 /* add in the transitions */
00964 for ( m=0, i=0; i<bnfa->bnfaAlphabetSize && m<nc; i++ )
00965 {
00966 state = full[i] & BNFA_SPARSE_MAX_STATE;
00967 if ( state != 0 )
00968 {
00969 ps[ps_index++] = (i<<BNFA_SPARSE_VALUE_SHIFT) | state;
00970 m++;
00971 }
00972 }
00973 }
00974 }
00975
00976 /* sanity check we have not overflowed our buffer */
00977 if ( ps_index > nps )
00978 {
00979 /* Fatal */
00980 BNFA_FREE(pi,bnfa->bnfaNumStates*sizeof(bnfa_state_t),bnfa->nextstate_memory);
00981 return -1;
00982 }
00983
00984 /*
00985 Replace Transition states with Transition Indices.
00986 This allows us to skip using NextState[] to locate the next state
00987 This limits us to <16M transitions due to 24 bit state sizes, and the fact
00988 we have now converted next-state fields to next-index fields in this array,
00989 and we have merged the next-state and state arrays.
00990 */
00991 ps_index=0;
00992 for (k=0; k< bnfa->bnfaNumStates; k++ )
00993 {
00994 if ( pi[k] >= nps )
00995 {
00996 /* Fatal */
00997 return -1;
00998 }
00999
01000 //ps_index = pi[k]; /* get index of next state */
01001 ps_index++; /* skip state id */
01002
01003 /* Full Format */
01004 if ( ps[ps_index] & BNFA_SPARSE_FULL_BIT )
01005 {
01006 /* Do the fail-state */
01007 ps[ps_index] = ( ps[ps_index] & 0xff000000 ) |
01008 ( pi[ ps[ps_index] & BNFA_SPARSE_MAX_STATE ] );
01009 ps_index++;
01010
01011 /* Do the transition-states */
01012 for (i=0; i<BNFA_MAX_ALPHABET_SIZE; i++)
01013 {
01014 ps[ps_index] = ( ps[ps_index] & 0xff000000 ) |
01015 ( pi[ ps[ps_index] & BNFA_SPARSE_MAX_STATE ] );
01016 ps_index++;
01017 }
01018 }
01019 /* Sparse Format */
01020 else
01021 {
01022 nc = (ps[ps_index] & BNFA_SPARSE_COUNT_BITS)>>BNFA_SPARSE_COUNT_SHIFT;
01023
01024 /* Do the cw = [cb | fail-state] */
01025 ps[ps_index] = ( ps[ps_index] & 0xff000000 ) |
01026 ( pi[ ps[ps_index] & BNFA_SPARSE_MAX_STATE ] );
01027 ps_index++;
01028
01029 /* Do the transition-states */
01030 for (i=0; i<nc; i++)
01031 {
01032 ps[ps_index] = ( ps[ps_index] & 0xff000000 ) |
01033 ( pi[ ps[ps_index] & BNFA_SPARSE_MAX_STATE ] );
01034 ps_index++;
01035 }
01036 }
01037
01038 /* check for buffer overflow again */
01039 if ( ps_index > nps )
01040 {
01041 /* Fatal */
01042 BNFA_FREE(pi,bnfa->bnfaNumStates*sizeof(bnfa_state_t),bnfa->nextstate_memory);
01043 return -1;
01044 }
01045 }
01046
01047 BNFA_FREE(pi,bnfa->bnfaNumStates*sizeof(bnfa_state_t),bnfa->nextstate_memory);
01048
01049 return 0;
01050 }
01051
01052 /*
01053 * Print the state machine - rather verbose
01054 */
01055 void bnfaPrint(bnfa_struct_t* bnfa)
01056 {
01057 int k;
01058 bnfa_match_node_t** MatchList;
01059 bnfa_match_node_t* mlist;
01060 int ps_index=0;
01061 bnfa_state_t* ps=nullptr;
01062
01063 if ( !bnfa )
01064 return;
01065
01066 MatchList = bnfa->bnfaMatchList;
01067
01068 if ( !bnfa->bnfaNumStates )
01069 return;
01070
01071 if ( bnfa->bnfaFormat ==BNFA_SPARSE )
01072 {
01073 printf("Print NFA-SPARSE state machine : %d active states\n", bnfa->bnfaNumStates);
01074 ps = bnfa->bnfaTransList;
01075 if ( !ps )
01076 return;
01077 }
01078
01079 #ifdef ALLOW_NFA_FULL
01080 else if ( bnfa->bnfaFormat ==BNFA_FULL )
01081 {
01082 printf("Print NFA-FULL state machine : %d active states\n", bnfa->bnfaNumStates);
01083 }
01084 #endif
01085
01086 for (k=0; k<bnfa->bnfaNumStates; k++)
01087 {
01088 printf(" state %-4d fmt=%d ",k,bnfa->bnfaFormat);
01089
01090 if ( bnfa->bnfaFormat == BNFA_SPARSE )
01091 {
01092 unsigned i,cw,fs,nt,fb,mb;
01093
01094 ps_index++; /* skip state number */
01095
01096 cw = ps[ps_index]; /* control word */
01097 fb = (cw & BNFA_SPARSE_FULL_BIT)>>BNFA_SPARSE_VALUE_SHIFT; /* full storage bit */
01098 mb = (cw & BNFA_SPARSE_MATCH_BIT)>>BNFA_SPARSE_VALUE_SHIFT; /* matching state bit */
01099 nt = (cw & BNFA_SPARSE_COUNT_BITS)>>BNFA_SPARSE_VALUE_SHIFT; /* number of transitions
01100 0-63 */
01101 fs = (cw & BNFA_SPARSE_MAX_STATE)>>BNFA_SPARSE_VALUE_SHIFT; /* fail state */
01102
01103 ps_index++; /* skip control word */
01104
01105 printf("mb=%3u fb=%3u fs=%-4u ",mb,fb,fs);
01106
01107 if ( fb )
01108 {
01109 printf(" nt=%-3d : ",bnfa->bnfaAlphabetSize);
01110
01111 for ( i=0; i<(unsigned)bnfa->bnfaAlphabetSize; i++, ps_index++ )
01112 {
01113 if ( ps[ps_index] == 0 )
01114 continue;
01115
01116 if ( isascii((int)i) && isprint((int)i) )
01117 printf("%3c->%-6d\t",i,ps[ps_index]);
01118 else
01119 printf("%3d->%-6d\t",i,ps[ps_index]);
01120 }
01121 }
01122 else
01123 {
01124 printf(" nt=%-3d : ",nt);
01125
01126 for ( i=0; i<nt; i++, ps_index++ )
01127 {
01128 if ( isascii(ps[ps_index]>>BNFA_SPARSE_VALUE_SHIFT) &&
01129 isprint(ps[ps_index]>>BNFA_SPARSE_VALUE_SHIFT) )
01130 printf("%3c->%-6d\t",ps[ps_index]>>BNFA_SPARSE_VALUE_SHIFT,ps[ps_index] &
01131 BNFA_SPARSE_MAX_STATE);
01132 else
01133 printf("%3d->%-6d\t",ps[ps_index]>>BNFA_SPARSE_VALUE_SHIFT,ps[ps_index] &
01134 BNFA_SPARSE_MAX_STATE);
01135 }
01136 }
01137 }
01138 #ifdef ALLOW_NFA_FULL
01139 else if ( bnfa->bnfaFormat == BNFA_FULL )
01140 {
01141 int i;
01142 bnfa_state_t state;
01143 bnfa_state_t* p;
01144 bnfa_state_t** NextState;
01145
01146 NextState = (bnfa_state_t**)bnfa->bnfaNextState;
01147 if ( !NextState )
01148 continue;
01149
01150 p = NextState[k];
01151
01152 printf("fs=%-4d nc=256 ",bnfa->bnfaFailState[k]);
01153
01154 for ( i=0; i<bnfa->bnfaAlphabetSize; i++ )
01155 {
01156 state = p[i];
01157
01158 if ( state != 0 && state != BNFA_FAIL_STATE )
01159 {
01160 if ( isascii(i) && isprint(i) )
01161 printf("%3c->%-5d\t",i,state);
01162 else
01163 printf("%3d->%-5d\t",i,state);
01164 }
01165 }
01166 }
01167 #endif
01168
01169 printf("\n");
01170
01171 if ( MatchList[k] )
01172 printf("---MatchList For State %d\n",k);
01173
01174 for ( mlist = MatchList[k];
01175 mlist!= nullptr;
01176 mlist = mlist->next )
01177 {
01178 bnfa_pattern_t* pat;
01179 pat = (bnfa_pattern_t*)mlist->data;
01180 printf("---pattern : %.*s\n",pat->n,pat->casepatrn);
01181 }
01182 }
01183 }
01184
01185 /*
01186 * Create a new AC state machine
01187 */
01188 bnfa_struct_t* bnfaNew(const MpseAgent* agent)
01189 {
01190 int bnfa_memory=0;
01191 bnfa_struct_t* p = (bnfa_struct_t*)BNFA_MALLOC(sizeof(bnfa_struct_t),bnfa_memory);
01192 if (!p)
01193 return nullptr;
01194
01195 if ( p )
01196 {
01197 p->bnfaOpt = 0;
01198 p->bnfaCaseMode = BNFA_PER_PAT_CASE;
01199 p->bnfaFormat = BNFA_SPARSE;
01200 p->bnfaAlphabetSize = BNFA_MAX_ALPHABET_SIZE;
01201 p->bnfaForceFullZeroState = 1;
01202 p->bnfa_memory = sizeof(bnfa_struct_t);
01203 p->agent = agent;
01204 }
01205
01206 return p;
01207 }
01208
01209 void bnfaSetOpt(bnfa_struct_t* p, int flag)
01210 {
01211 p->bnfaOpt=flag;
01212 }
01213
01214 void bnfaSetCase(bnfa_struct_t* p, int flag)
01215 {
01216 if ( flag == BNFA_PER_PAT_CASE )
01217 p->bnfaCaseMode = flag;
01218 if ( flag == BNFA_CASE )
01219 p->bnfaCaseMode = flag;
01220 if ( flag == BNFA_NOCASE )
01221 p->bnfaCaseMode = flag;
01222 }
01223
01224 /*
01225 * Fee all memory
01226 */
01227 void bnfaFree(bnfa_struct_t* bnfa)
01228 {
01229 int i;
01230 bnfa_pattern_t* patrn, * ipatrn;
01231 bnfa_match_node_t* mlist, * ilist;
01232
01233 for (i = 0; i < bnfa->bnfaNumStates; i++)
01234 {
01235 /* free match list entries */
01236 mlist = bnfa->bnfaMatchList[i];
01237
01238 while (mlist)
01239 {
01240 ilist = mlist;
01241 mlist = mlist->next;
01242 if (ilist->rule_option_tree && bnfa->agent)
01243 {
01244 bnfa->agent->tree_free(&(ilist->rule_option_tree));
01245 }
01246
01247 if (ilist->neg_list && bnfa->agent)
01248 {
01249 bnfa->agent->list_free(&(ilist->neg_list));
01250 }
01251
01252 BNFA_FREE(ilist,sizeof(bnfa_match_node_t),bnfa->matchlist_memory);
01253 }
01254 bnfa->bnfaMatchList[i] = nullptr;
01255
01256 #ifdef ALLOW_NFA_FULL
01257 /* free next state entries */
01258 if ( bnfa->bnfaFormat==BNFA_FULL ) /* Full format */
01259 {
01260 if ( bnfa->bnfaNextState[i] )
01261 {
01262 BNFA_FREE(bnfa->bnfaNextState[i],bnfa->bnfaAlphabetSize*sizeof(bnfa_state_t),
01263 bnfa->nextstate_memory);
01264 }
01265 }
01266 #endif
01267 }
01268
01269 /* Free patterns */
01270 patrn = bnfa->bnfaPatterns;
01271 while (patrn)
01272 {
01273 ipatrn=patrn;
01274 patrn=patrn->next;
01275 BNFA_FREE(ipatrn->casepatrn,ipatrn->n,bnfa->pat_memory);
01276 if (bnfa->agent && ipatrn->userdata)
01277 bnfa->agent->user_free(ipatrn->userdata);
01278 BNFA_FREE(ipatrn,sizeof(bnfa_pattern_t),bnfa->pat_memory);
01279 }
01280
01281 /* Free arrays */
01282 BNFA_FREE(bnfa->bnfaFailState,bnfa->bnfaNumStates*sizeof(bnfa_state_t),bnfa->failstate_memory);
01283 BNFA_FREE(bnfa->bnfaMatchList,bnfa->bnfaNumStates*sizeof(bnfa_pattern_t*),
01284 bnfa->matchlist_memory);
01285 BNFA_FREE(bnfa->bnfaNextState,bnfa->bnfaNumStates*sizeof(bnfa_state_t*),
01286 bnfa->nextstate_memory);
01287 BNFA_FREE(bnfa->bnfaTransList,(2*bnfa->bnfaNumStates+bnfa->bnfaNumTrans)*sizeof(bnfa_state_t*),
01288 bnfa->nextstate_memory);
01289 snort_free(bnfa); /* cannot update memory tracker when deleting bnfa so just 'free' it !*/
01290 }
01291
01292 /*
01293 * Add a pattern to the pattern list
01294 */
01295 int bnfaAddPattern(
01296 bnfa_struct_t* p,
01297 const uint8_t* pat,
01298 unsigned n,
01299 bool nocase,
01300 bool negative,
01301 void* userdata)
01302 {
01303 bnfa_pattern_t* plist;
01304
01305 plist = (bnfa_pattern_t*)BNFA_MALLOC(sizeof(bnfa_pattern_t),p->pat_memory);
01306 if (!plist)
01307 return -1;
01308
01309 plist->casepatrn = (uint8_t*)BNFA_MALLOC(n,p->pat_memory);
01310 if (!plist->casepatrn)
01311 {
01312 BNFA_FREE(plist,sizeof(bnfa_pattern_t),p->pat_memory);
01313 return -1;
01314 }
01315
01316 memcpy (plist->casepatrn, pat, n);
01317
01318 plist->n = n;
01319 plist->nocase = nocase;
01320 plist->negative = negative;
01321 plist->userdata = userdata;
01322
01323 plist->next = p->bnfaPatterns; /* insert at front of list */
01324 p->bnfaPatterns = plist;
01325
01326 p->bnfaPatternCnt++;
01327
01328 return 0;
01329 }
01330
01331 /*
01332 * Compile the patterns into an nfa state machine
01333 */
01334 static inline int _bnfaCompile(bnfa_struct_t* bnfa)
01335 {
01336 bnfa_pattern_t* plist;
01337 bnfa_match_node_t** tmpMatchList;
01338 unsigned cntMatchStates;
01339 int i;
01340
01341 /* Count number of states */
01342 for (plist = bnfa->bnfaPatterns; plist != nullptr; plist = plist->next)
01343 {
01344 bnfa->bnfaMaxStates += plist->n;
01345 }
01346 bnfa->bnfaMaxStates++; /* one extra */
01347
01348 /* Alloc a List based State Transition table */
01349 bnfa->bnfaTransTable =(bnfa_trans_node_t**)BNFA_MALLOC(sizeof(bnfa_trans_node_t*) *
01350 bnfa->bnfaMaxStates,bnfa->list_memory);
01351 if (!bnfa->bnfaTransTable)
01352 {
01353 return -1;
01354 }
01355
01356 /* Alloc a MatchList table - this has a list of pattern matches for each state */
01357 bnfa->bnfaMatchList=(bnfa_match_node_t**)BNFA_MALLOC(sizeof(void*)*bnfa->bnfaMaxStates,
01358 bnfa->matchlist_memory);
01359 if (!bnfa->bnfaMatchList)
01360 {
01361 return -1;
01362 }
01363
01364 /* Add each Pattern to the State Table - This forms a keyword trie using lists */
01365 bnfa->bnfaNumStates = 0;
01366 for (plist = bnfa->bnfaPatterns; plist != nullptr; plist = plist->next)
01367 {
01368 _bnfa_add_pattern_states (bnfa, plist);
01369 }
01370 bnfa->bnfaNumStates++;
01371
01372 if ( bnfa->bnfaNumStates > BNFA_SPARSE_MAX_STATE )
01373 {
01374 return -1; /* Call bnfaFree to clean up */
01375 }
01376
01377 /* ReAlloc a smaller MatchList table - only need NumStates */
01378 tmpMatchList=bnfa->bnfaMatchList;
01379
01380 bnfa->bnfaMatchList=(bnfa_match_node_t**)BNFA_MALLOC(sizeof(void*) * bnfa->bnfaNumStates,
01381 bnfa->matchlist_memory);
01382 if (!bnfa->bnfaMatchList)
01383 {
01384 return -1;
01385 }
01386
01387 memcpy(bnfa->bnfaMatchList,tmpMatchList,sizeof(void*) * bnfa->bnfaNumStates);
01388
01389 BNFA_FREE(tmpMatchList,sizeof(void*) * bnfa->bnfaMaxStates,bnfa->matchlist_memory);
01390
01391 /* Alloc a failure state table - only need NumStates */
01392 bnfa->bnfaFailState =(bnfa_state_t*)BNFA_MALLOC(sizeof(bnfa_state_t) * bnfa->bnfaNumStates,
01393 bnfa->failstate_memory);
01394 if (!bnfa->bnfaFailState)
01395 {
01396 return -1;
01397 }
01398
01399 #ifdef ALLOW_NFA_FULL
01400 if ( bnfa->bnfaFormat == BNFA_FULL )
01401 {
01402 /* Alloc a state transition table - only need NumStates */
01403 bnfa->bnfaNextState=(bnfa_state_t**)BNFA_MALLOC(sizeof(bnfa_state_t*) *
01404 bnfa->bnfaNumStates,bnfa->nextstate_memory);
01405 if (!bnfa->bnfaNextState)
01406 {
01407 return -1;
01408 }
01409 }
01410 #endif
01411
01412 /* Build the nfa w/failure states - time the nfa construction */
01413 if ( _bnfa_build_nfa (bnfa) )
01414 {
01415 return -1;
01416 }
01417
01418 /* Convert nfa storage format from list to full or sparse */
01419 if ( bnfa->bnfaFormat == BNFA_SPARSE )
01420 {
01421 if ( _bnfa_conv_list_to_csparse_array(bnfa) )
01422 {
01423 return -1;
01424 }
01425 BNFA_FREE(bnfa->bnfaFailState,sizeof(bnfa_state_t)*bnfa->bnfaNumStates,
01426 bnfa->failstate_memory);
01427 bnfa->bnfaFailState=nullptr;
01428 }
01429 #ifdef ALLOW_NFA_FULL
01430 else if ( bnfa->bnfaFormat == BNFA_FULL )
01431 {
01432 if ( _bnfa_conv_list_to_full(bnfa) )
01433 {
01434 return -1;
01435 }
01436 }
01437 #endif
01438 else
01439 {
01440 return -1;
01441 }
01442
01443 /* Free up the Table Of Transition Lists */
01444 _bnfa_list_free_table(bnfa);
01445
01446 /* Count states with Pattern Matches */
01447 cntMatchStates=0;
01448 for (i=0; i<bnfa->bnfaNumStates; i++)
01449 {
01450 if ( bnfa->bnfaMatchList[i] )
01451 cntMatchStates++;
01452 }
01453
01454 bnfa->bnfaMatchStates = cntMatchStates;
01455
01456 bnfaAccumInfo(bnfa);
01457
01458 return 0;
01459 }
01460
01461 int bnfaCompile(
01462 SnortConfig* sc, bnfa_struct_t* bnfa)
01463 {
01464 if ( int rval = _bnfaCompile (bnfa) )
01465 return rval;
01466
01467 if ( bnfa->agent )
01468 bnfaBuildMatchStateTrees(sc, bnfa);
01469
01470 return 0;
01471 }
01472
01473 #ifdef ALLOW_NFA_FULL
01474
01475 /*
01476 * Full Matrix Format Search
01477 */
01478 static inline unsigned _bnfa_search_full_nfa(
01479 bnfa_struct_t* bnfa, uint8_t* Tx, int n, MpseMatch match,
01480 void* context, bnfa_state_t state, int* current_state)
01481 {
01482 uint8_t* Tend;
01483 uint8_t* T;
01484 uint8_t Tchar;
01485 unsigned index;
01486 bnfa_state_t** NextState= bnfa->bnfaNextState;
01487 bnfa_state_t* FailState= bnfa->bnfaFailState;
01488 bnfa_match_node_t** MatchList= bnfa->bnfaMatchList;
01489 bnfa_state_t* pcs;
01490 bnfa_match_node_t* mlist;
01491 bnfa_pattern_t* patrn;
01492 unsigned nfound = 0;
01493 int res;
01494 unsigned last_match=LAST_STATE_INIT;
01495 unsigned last_match_saved=LAST_STATE_INIT;
01496
01497 T = Tx;
01498 Tend = T + n;
01499
01500 for (; T < Tend; T++ )
01501 {
01502 Tchar = xlatcase[ *T ];
01503
01504 for (;; )
01505 {
01506 pcs = NextState[state];
01507 if ( pcs[Tchar] == 0 && state > 0 )
01508 {
01509 state = FailState[state];
01510 }
01511 else
01512 {
01513 state = pcs[Tchar];
01514 break;
01515 }
01516 }
01517
01518 if ( state )
01519 {
01520 if ( state == last_match )
01521 continue;
01522
01523 last_match_saved=last_match;
01524 last_match = state;
01525
01526 {
01527 mlist = MatchList[state];
01528 if (!mlist)
01529 {
01530 continue;
01531 }
01532 patrn = (bnfa_pattern_t*)mlist->data;
01533 index = T - Tx + 1;
01534 nfound++;
01535 /* Don't do anything specific for case sensitive patterns and not,
01536 * since that will be covered by the rule tree itself. Each tree
01537 * might have both case sensitive & case insensitive patterns.
01538 */
01539 res = match(patrn->userdata, mlist->rule_option_tree, index, context,
01540 mlist->neg_list);
01541 if ( res > 0 )
01542 {
01543 *current_state = state;
01544 return nfound;
01545 }
01546 else if ( res < 0 )
01547 {
01548 last_match = last_match_saved;
01549 }
01550 }
01551 }
01552 }
01553 *current_state = state;
01554 return nfound;
01555 }
01556
01557 /*
01558 * Full Matrix Format Search - Exact matching patterns only
01559 */
01560 static inline unsigned _bnfa_search_full_nfa_case(
01561 bnfa_struct_t* bnfa, uint8_t* Tx, int n, MpseMatch match,
01562 void* context, bnfa_state_t state, int* current_state)
01563 {
01564 uint8_t* Tend;
01565 uint8_t* T;
01566 uint8_t Tchar;
01567 unsigned index;
01568 bnfa_state_t** NextState= bnfa->bnfaNextState;
01569 bnfa_state_t* FailState= bnfa->bnfaFailState;
01570 bnfa_match_node_t** MatchList= bnfa->bnfaMatchList;
01571 bnfa_state_t* pcs;
01572 bnfa_match_node_t* mlist;
01573 bnfa_pattern_t* patrn;
01574 unsigned nfound = 0;
01575 unsigned last_match=LAST_STATE_INIT;
01576 unsigned last_match_saved=LAST_STATE_INIT;
01577 int res;
01578
01579 T = Tx;
01580 Tend = T + n;
01581
01582 for (; T < Tend; T++ )
01583 {
01584 Tchar = *T;
01585
01586 for (;; )
01587 {
01588 pcs = NextState[state];
01589 if ( pcs[Tchar] == 0 && state > 0 )
01590 {
01591 state = FailState[state];
01592 }
01593 else
01594 {
01595 state = pcs[Tchar];
01596 break;
01597 }
01598 }
01599
01600 if ( state )
01601 {
01602 if ( state == last_match )
01603 continue;
01604
01605 last_match_saved=last_match;
01606 last_match = state;
01607
01608 {
01609 mlist = MatchList[state];
01610 if (!mlist)
01611 {
01612 continue;
01613 }
01614 patrn = (bnfa_pattern_t*)mlist->data;
01615 index = T - Tx + 1;
01616 nfound++;
01617 /* Don't do anything specific for case (in)sensitive patterns
01618 * since that will be covered by the rule tree itself. Each
01619 * tree might have both case sensitive & case insensitive patterns.
01620 */
01621 res = match(patrn->userdata, mlist->rule_option_tree, index, context,
01622 mlist->neg_list);
01623 if ( res > 0 )
01624 {
01625 *current_state = state;
01626 return nfound;
01627 }
01628 else if ( res < 0 )
01629 {
01630 last_match = last_match_saved;
01631 }
01632 }
01633 }
01634 }
01635 *current_state = state;
01636 return nfound;
01637 }
01638
01639 /*
01640 * Full Matrix Format Search - no case
01641 */
01642 static inline unsigned _bnfa_search_full_nfa_nocase(
01643 bnfa_struct_t* bnfa, uint8_t* Tx, int n, MpseMatch match,
01644 void* context, bnfa_state_t state, int* current_state)
01645 {
01646 uint8_t* Tend;
01647 uint8_t* T;
01648 uint8_t Tchar;
01649 unsigned index;
01650 bnfa_state_t** NextState= bnfa->bnfaNextState;
01651 bnfa_state_t* FailState= bnfa->bnfaFailState;
01652 bnfa_match_node_t** MatchList= bnfa->bnfaMatchList;
01653 bnfa_state_t* pcs;
01654 bnfa_match_node_t* mlist;
01655 bnfa_pattern_t* patrn;
01656 unsigned nfound = 0;
01657 unsigned last_match=LAST_STATE_INIT;
01658 unsigned last_match_saved=LAST_STATE_INIT;
01659 int res;
01660
01661 T = Tx;
01662 Tend = T + n;
01663
01664 for (; T < Tend; T++ )
01665 {
01666 Tchar = xlatcase[ *T ];
01667
01668 for (;; )
01669 {
01670 pcs = NextState[state];
01671 if ( pcs[Tchar] == 0 && state > 0 )
01672 {
01673 state = FailState[state];
01674 }
01675 else
01676 {
01677 state = pcs[Tchar];
01678 break;
01679 }
01680 }
01681
01682 if ( state )
01683 {
01684 if ( state == last_match )
01685 continue;
01686
01687 last_match_saved=last_match;
01688 last_match = state;
01689
01690 {
01691 mlist = MatchList[state];
01692 if (!mlist)
01693 {
01694 continue;
01695 }
01696 patrn = (bnfa_pattern_t*)mlist->data;
01697 index = T - Tx + 1;
01698 /* Don't do anything specific for case sensitive patterns and not,
01699 * since that will be covered by the rule tree itself. Each tree
01700 * might have both case sensitive & case insensitive patterns.
01701 */
01702 res = match(patrn->userdata, mlist->rule_option_tree, index, context,
01703 mlist->neg_list);
01704 if ( res > 0 )
01705 {
01706 *current_state = state;
01707 return nfound;
01708 }
01709 else if ( res < 0 )
01710 {
01711 last_match = last_match_saved;
01712 }
01713 }
01714 }
01715 }
01716 *current_state = state;
01717 return nfound;
01718 }
01719
01720 #endif
01721
01722 /*
01723 binary array search on sparse transition array
01724
01725 O(logN) search times..same as a binary tree.
01726 data must be in sorted order in the array.
01727
01728 return: = -1 => not found
01729 >= 0 => index of element 'val'
01730
01731 notes:
01732 val is tested against the high 8 bits of the 'a' array entry,
01733 this is particular to the storage format we are using.
01734 */
01735 static inline int _bnfa_binearch(const bnfa_state_t* a, int a_len, int val)
01736 {
01737 int m, l, r;
01738 int c;
01739
01740 l = 0;
01741 r = a_len - 1;
01742
01743 while ( r >= l )
01744 {
01745 m = ( r + l ) >> 1;
01746
01747 c = a[m] >> BNFA_SPARSE_VALUE_SHIFT;
01748
01749 if ( val == c )
01750 {
01751 return m;
01752 }
01753 else if ( val < c )
01754 {
01755 r = m - 1;
01756 }
01757 else /* val > c */
01758 {
01759 l = m + 1;
01760 }
01761 }
01762 return -1;
01763 }
01764
01765 /*
01766 * Sparse format for state table using single array storage
01767 *
01768 * word 1: state
01769 * word 2: control-word = cb<<24| fs
01770 * cb : control-byte
01771 * : mb | fb | nt
01772 * mb : bit 8 set if match state, zero otherwise
01773 * fb : bit 7 set if using full format, zero otherwise
01774 * nt : number of transitions 0..63 (more than 63 requires full format)
01775 * fs: failure-transition-state
01776 * word 3+: byte-value(0-255) << 24 | transition-state
01777 */
01778 static inline unsigned _bnfa_get_next_state_csparse_nfa(
01779 bnfa_state_t* pcx, unsigned sindex, unsigned input)
01780 {
01781 int k;
01782 int nc;
01783 int index;
01784 bnfa_state_t* pcs;
01785
01786 for (;; )
01787 {
01788 pcs = pcx + sindex + 1; /* skip state-id == 1st word */
01789
01790 if ( pcs[0] & BNFA_SPARSE_FULL_BIT )
01791 {
01792 if ( sindex == 0 )
01793 {
01794 return pcs[1+input] & BNFA_SPARSE_MAX_STATE;
01795 }
01796 else
01797 {
01798 if ( pcs[1+input] & BNFA_SPARSE_MAX_STATE )
01799 return pcs[1+input] & BNFA_SPARSE_MAX_STATE;
01800 }
01801 }
01802 else
01803 {
01804 nc = (pcs[0]>>BNFA_SPARSE_COUNT_SHIFT) & BNFA_SPARSE_MAX_ROW_TRANSITIONS;
01805 if ( nc > BNFA_SPARSE_LINEAR_SEARCH_LIMIT )
01806 {
01807 /* binary search... */
01808 index = _bnfa_binearch(pcs+1, nc, input);
01809 if ( index >= 0 )
01810 {
01811 return pcs[index+1] & BNFA_SPARSE_MAX_STATE;
01812 }
01813 }
01814 else
01815 {
01816 /* linear search... */
01817 for ( k=0; k<nc; k++ )
01818 {
01819 if ( (pcs[k+1]>>BNFA_SPARSE_VALUE_SHIFT) == input )
01820 {
01821 return pcs[k+1] & BNFA_SPARSE_MAX_STATE;
01822 }
01823 }
01824 }
01825 }
01826
01827 /* no transition found ... get the failure state and try again */
01828 sindex = pcs[0] & BNFA_SPARSE_MAX_STATE;
01829 }
01830 }
01831
01832 /*
01833 * Per Pattern case search, case is on per pattern basis standard snort
01834 * search note: index is not used by snort, so it's commented
01835 */
01836
01837 /*
01838 * Per Pattern case search, case is on per pattern basis
01839 * standard snort search
01840 *
01841 */
01842 unsigned _bnfa_search_csparse_nfa(
01843 bnfa_struct_t* bnfa, const uint8_t* Tx, int n, MpseMatch match,
01844 void* context, unsigned sindex, int* current_state)
01845 {
01846 bnfa_match_node_t* mlist;
01847 const uint8_t* Tend;
01848 const uint8_t* T;
01849 uint8_t Tchar;
01850 unsigned index;
01851 bnfa_match_node_t** MatchList = bnfa->bnfaMatchList;
01852 bnfa_pattern_t* patrn;
01853 bnfa_state_t* transList = bnfa->bnfaTransList;
01854 unsigned nfound = 0;
01855 unsigned last_match=LAST_STATE_INIT;
01856 unsigned last_match_saved=LAST_STATE_INIT;
01857 int res;
01858
01859 T = Tx;
01860 Tend = T + n;
01861
01862 for (; T<Tend; T++)
01863 {
01864 Tchar = xlatcase[ *T ];
01865
01866 /* Transition to next state index */
01867 sindex = _bnfa_get_next_state_csparse_nfa(transList,sindex,Tchar);
01868
01869 /* Log matches in this state - if any */
01870 if ( sindex && (transList[sindex+1] & BNFA_SPARSE_MATCH_BIT) )
01871 {
01872 if ( sindex == last_match )
01873 continue;
01874
01875 last_match_saved = last_match;
01876 last_match = sindex;
01877
01878 {
01879 mlist = MatchList[ transList[sindex] ];
01880 if ( !mlist )
01881 return nfound;
01882
01883 patrn = (bnfa_pattern_t*)mlist->data;
01884 index = T - Tx + 1;
01885 nfound++;
01886 /* Don't do anything specific for case sensitive patterns and not,
01887 * since that will be covered by the rule tree itself. Each tree
01888 * might have both case sensitive & case insensitive patterns.
01889 */
01890 res = match(patrn->userdata, mlist->rule_option_tree, index,
01891 context, mlist->neg_list);
01892 if ( res > 0 )
01893 {
01894 *current_state = sindex;
01895 return nfound;
01896 }
01897 else if ( res < 0 )
01898 {
01899 last_match = last_match_saved;
01900 }
01901 }
01902 }
01903 }
01904 *current_state = sindex;
01905 return nfound;
01906 }
01907
01908 #ifdef BNFA_MAIN
01909 /*
01910 * Case specific search, global to all patterns
01911 *
01912 * note: index is not used by snort, so it's commented
01913 */
01914 static inline unsigned _bnfa_search_csparse_nfa_case(
01915 bnfa_struct_t* bnfa, uint8_t* Tx, int n, MpseMatch match,
01916 void* context, unsigned sindex, int* current_state)
01917 {
01918 bnfa_match_node_t* mlist;
01919 uint8_t* Tend;
01920 uint8_t* T;
01921 unsigned index;
01922 bnfa_match_node_t** MatchList = bnfa->bnfaMatchList;
01923 bnfa_pattern_t* patrn;
01924 bnfa_state_t* transList = bnfa->bnfaTransList;
01925 unsigned nfound = 0;
01926 unsigned last_match=LAST_STATE_INIT;
01927 unsigned last_match_saved=LAST_STATE_INIT;
01928 int res;
01929
01930 T = Tx;
01931 Tend = T + n;
01932
01933 for (; T<Tend; T++)
01934 {
01935 /* Transition to next state index */
01936 sindex = _bnfa_get_next_state_csparse_nfa(transList,sindex,*T);
01937
01938 /* Log matches in this state - if any */
01939 if ( sindex && (transList[sindex+1] & BNFA_SPARSE_MATCH_BIT) )
01940 {
01941 if ( sindex == last_match )
01942 continue;
01943
01944 last_match_saved = last_match;
01945 last_match = sindex;
01946
01947 {
01948 mlist = MatchList[ transList[sindex] ];
01949 patrn = (bnfa_pattern_t*)mlist->data;
01950 index = T - Tx + 1;
01951 nfound++;
01952 /* Don't do anything specific for case sensitive patterns and not,
01953 * since that will be covered by the rule tree itself. Each tree
01954 * might have both case sensitive & case insensitive patterns.
01955 */
01956 res = match(patrn->userdata, mlist->rule_option_tree, index,
01957 context, mlist->neg_list);
01958 if ( res > 0 )
01959 {
01960 *current_state = sindex;
01961 return nfound;
01962 }
01963 else if ( res < 0 )
01964 {
01965 last_match = last_match_saved;
01966 }
01967 }
01968 }
01969 }
01970 *current_state = sindex;
01971 return nfound;
01972 }
01973
01974 /*
01975 * NoCase search - global to all patterns
01976 *
01977 * note: index is not used by snort, so it's commented
01978 */
01979 static inline unsigned _bnfa_search_csparse_nfa_nocase(
01980 bnfa_struct_t* bnfa, uint8_t* Tx, int n, MpseMatch match,
01981 void* context, unsigned sindex, int* current_state)
01982 {
01983 bnfa_match_node_t* mlist;
01984 uint8_t* Tend;
01985 uint8_t* T;
01986 uint8_t Tchar;
01987 unsigned index;
01988 bnfa_match_node_t** MatchList = bnfa->bnfaMatchList;
01989 bnfa_pattern_t* patrn;
01990 bnfa_state_t* transList = bnfa->bnfaTransList;
01991 unsigned nfound = 0;
01992 unsigned last_match=LAST_STATE_INIT;
01993 unsigned last_match_saved=LAST_STATE_INIT;
01994 int res;
01995
01996 T = Tx;
01997 Tend = T + n;
01998
01999 for (; T<Tend; T++)
02000 {
02001 Tchar = xlatcase[ *T ];
02002
02003 /* Transition to next state index */
02004 sindex = _bnfa_get_next_state_csparse_nfa(transList,sindex,Tchar);
02005
02006 /* Log matches in this state - if any */
02007 if ( sindex && (transList[sindex+1] & BNFA_SPARSE_MATCH_BIT) )
02008 {
02009 if ( sindex == last_match )
02010 continue;
02011
02012 last_match_saved = last_match;
02013 last_match = sindex;
02014
02015 {
02016 mlist = MatchList[ transList[sindex] ];
02017 patrn = (bnfa_pattern_t*)mlist->data;
02018 index = T - Tx + 1;
02019 nfound++;
02020 /* Don't do anything specific for case sensitive patterns and not,
02021 * since that will be covered by the rule tree itself. Each tree
02022 * might have both case sensitive & case insensitive patterns.
02023 */
02024 res = match(patrn->userdata, mlist->rule_option_tree, index,
02025 context, mlist->neg_list);
02026 if ( res > 0 )
02027 {
02028 *current_state = sindex;
02029 return nfound;
02030 }
02031 else if ( res < 0 )
02032 {
02033 last_match = last_match_saved;
02034 }
02035 }
02036 }
02037 }
02038 *current_state = sindex;
02039 return nfound;
02040 }
02041 #endif
02042
02043 int bnfaPatternCount(bnfa_struct_t* p)
02044 {
02045 return p->bnfaPatternCnt;
02046 }
02047
02048 /*
02049 * Summary Info Data
02050 */
02051 static bnfa_struct_t summary;
02052 static int summary_cnt = 0;
02053
02054 static void bnfaPrintInfoEx(bnfa_struct_t* p)
02055 {
02056 unsigned max_memory;
02057
02058 if ( !p->bnfaNumStates )
02059 {
02060 return;
02061 }
02062 max_memory = p->bnfa_memory + p->pat_memory + p->list_memory +
02063 p->matchlist_memory + p->failstate_memory + p->nextstate_memory;
02064
02065 LogCount("instances", summary_cnt);
02066 LogCount("patterns", p->bnfaPatternCnt);
02067 LogCount("pattern chars", p->bnfaMaxStates);
02068 LogCount("num states", p->bnfaNumStates);
02069 LogCount("num match states", p->bnfaMatchStates);
02070
02071 double scale;
02072
02073 if ( max_memory < 1024*1024 )
02074 {
02075 scale = 1024;
02076 LogValue("memory scale", "KB");
02077 }
02078 else
02079 {
02080 scale = 1024 * 1024;
02081 LogValue("memory scale", "MB");
02082 }
02083 LogStat("total memory", max_memory/scale);
02084 LogStat("pattern memory", p->pat_memory/scale);
02085 LogStat("match list memory", p->matchlist_memory/scale);
02086 LogStat("transition memory", p->nextstate_memory/scale);
02087 }
02088
02089 void bnfaPrintInfo(bnfa_struct_t* p)
02090 {
02091 bnfaPrint(p);
02092 }
02093
02094 void bnfaPrintSummary()
02095 {
02096 bnfaPrintInfoEx(&summary);
02097 }
02098
02099 void bnfaInitSummary()
02100 {
02101 summary_cnt=0;
02102 memset(&summary,0,sizeof(bnfa_struct_t));
02103 }
02104
02105 void bnfaAccumInfo(bnfa_struct_t* p)
02106 {
02107 bnfa_struct_t* px = &summary;
02108
02109 summary_cnt++;
02110
02111 px->bnfaAlphabetSize = p->bnfaAlphabetSize;
02112 px->bnfaPatternCnt += p->bnfaPatternCnt;
02113 px->bnfaMaxStates += p->bnfaMaxStates;
02114 px->bnfaNumStates += p->bnfaNumStates;
02115 px->bnfaNumTrans += p->bnfaNumTrans;
02116 px->bnfaMatchStates += p->bnfaMatchStates;
02117 px->bnfa_memory += p->bnfa_memory;
02118 px->pat_memory += p->pat_memory;
02119 px->list_memory += p->list_memory;
02120 px->matchlist_memory += p->matchlist_memory;
02121 px->nextstate_memory += p->nextstate_memory;
02122 px->failstate_memory += p->failstate_memory;
02123 }
02124
02125 #ifdef BNFA_MAIN
02126 #include <stdarg.h>
02127 /*
02128 * BNFA Search Function
02129 *
02130 * bnfa - state machine
02131 * Tx - text buffer to search
02132 * n - number of bytes in Tx
02133 * Match - function to call when a match is found
02134 * data - user supplied data that is passed to the Match function
02135 * sindex - state tracker, set value to zero to reset the state machine,
02136 * zero should be the value passed in on the 1st buffer or each buffer
02137 * that is to be analyzed on its own, the state machine updates this
02138 * during searches. This allows for sequential buffer searches without
02139 * resetting the state machine. Save this value as returned from the
02140 * previous search for the next search.
02141 *
02142 * returns
02143 * The state or sindex of the state machine. This can than be passed back
02144 * in on the next search, if desired.
02145 */
02146
02147 static unsigned bnfaSearch(
02148 bnfa_struct_t* bnfa, uint8_t* Tx, int n, MpseMatch match,
02149 void* context, unsigned sindex, int* current_state)
02150 {
02151 assert(current_state);
02152 int ret = 0;
02153
02154 if (current_state)
02155 {
02156 sindex = (unsigned)*current_state;
02157 }
02158
02159 #ifdef ALLOW_NFA_FULL
02160 if ( bnfa->bnfaFormat == BNFA_SPARSE )
02161 {
02162 if ( bnfa->bnfaCaseMode == BNFA_PER_PAT_CASE )
02163 {
02164 if (bnfa->bnfaMethod)
02165 {
02166 ret = _bnfa_search_csparse_nfa(
02167 bnfa, Tx, n, match, context, sindex, current_state);
02168 }
02169 else
02170 {
02171 ret = _bnfa_search_csparse_nfa_q(
02172 bnfa, Tx, n, match, context, sindex, current_state);
02173 }
02174 }
02175 else if ( bnfa->bnfaCaseMode == BNFA_CASE )
02176 {
02177 ret = _bnfa_search_csparse_nfa_case(
02178 bnfa, Tx, n, match, context, sindex, current_state);
02179 }
02180 else /* NOCASE */
02181 {
02182 ret = _bnfa_search_csparse_nfa_nocase(
02183 bnfa, Tx, n, match, context, sindex, current_state);
02184 }
02185 }
02186 else if ( bnfa->bnfaFormat == BNFA_FULL )
02187 {
02188 if ( bnfa->bnfaCaseMode == BNFA_PER_PAT_CASE )
02189 {
02190 ret = _bnfa_search_full_nfa(
02191 bnfa, Tx, n, match, context, (bnfa_state_t)sindex, current_state);
02192 }
02193 else if ( bnfa->bnfaCaseMode == BNFA_CASE )
02194 {
02195 ret = _bnfa_search_full_nfa_case(
02196 bnfa, Tx, n, match, context, (bnfa_state_t)sindex, current_state);
02197 }
02198 else
02199 {
02200 ret = _bnfa_search_full_nfa_nocase(
02201 bnfa, Tx, n, match, context, (bnfa_state_t)sindex, current_state);
02202 }
02203 }
02204 #else
02205 if ( bnfa->bnfaCaseMode == BNFA_PER_PAT_CASE )
02206 {
02207 if (bnfa->bnfaMethod)
02208 {
02209 ret = _bnfa_search_csparse_nfa(
02210 bnfa, Tx, n, match, context, sindex, current_state);
02211 }
02212 else
02213 {
02214 ret = _bnfa_search_csparse_nfa_q(
02215 bnfa, Tx, n, match, context, sindex, current_state);
02216 }
02217 }
02218 else if ( bnfa->bnfaCaseMode == BNFA_CASE )
02219 {
02220 ret = _bnfa_search_csparse_nfa_case(
02221 bnfa, Tx, n, match, context, sindex, current_state);
02222 }
02223 else /* NOCASE */
02224 {
02225 ret = _bnfa_search_csparse_nfa_nocase(
02226 bnfa, Tx, n, match, context, sindex, current_state);
02227 }
02228 #endif
02229 return ret;
02230 }
02231
02232 /*
02233 * Text Data Buffer
02234 */
02235 static uint8_t text[512];
02236 static uint8_t text2[512];
02237 static int s_verbose=0;
02238
02239 /*
02240 * A Match is found
02241 */
02242 static int MatchFound(void* id, void* tree, int index, void* data, void* neglist)
02243 {
02244 fprintf (stdout, "%s\n", (char*)data);
02245 return 0;
02246 }
02247
02248 static void objfree(void** obj)
02249 {
02250 }
02251
02252 static int buildtree(void* id, void** existing)
02253 {
02254 return 1;
02255 }
02256
02257 static int neglist(void* id, void** list)
02258 {
02259 return 1;
02260 }
02261
02262 static void LogMessage(const char* format,...)
02263 {
02264 va_list ap;
02265 va_start(ap, format);
02266 vfprintf(stderr, format, ap);
02267 va_end(ap);
02268 }
02269
02270 static SnortConfig sc;
02271 static SnortConfig* snort_conf;
02272
02273 /*
02274 *
02275 */
02276 int main(int argc, char** argv)
02277 {
02278 int i, nc, nocase = 0;
02279 bnfa_struct_t* bnfa;
02280 int current_state = 0;
02281 bool split_search = false;
02282 char* p;
02283
02284 if (argc < 3)
02285 {
02286 fprintf (stderr,"Usage: %s search-text pattern +pattern... [flags]\n",argv[0]);
02287 fprintf (stderr," flags: -q -nocase -splitsearch -v\n");
02288 exit (0);
02289 }
02290
02291 memset(&sc, 0, sizeof(SnortConfig));
02292 snort_conf = ≻
02293
02294 bnfa = bnfaNew(free, objfree, objfree);
02295 if ( !bnfa )
02296 {
02297 printf("bnfa-no memory\n");
02298 exit(0);
02299 }
02300
02301 strncpy (text, argv[1], sizeof(text) - 1);
02302 text[sizeof(text) - 1] = '\0';
02303
02304 bnfa->bnfaMethod = 1;
02305
02306 for (i = 1; i < argc; i++)
02307 {
02308 if (strcmp (argv[i], "-nocase") == 0)
02309 {
02310 nocase = 1;
02311 }
02312 if (strcmp (argv[i], "-v") == 0)
02313 {
02314 s_verbose=1;
02315 }
02316 if (strcmp (argv[i], "-splitsearch") == 0)
02317 {
02318 int len2 = strlen(text)/2;
02319 split_search =true;
02320 strncpy(text2, &text[len2], sizeof(text2) -1);
02321 text[len2] = '\0';
02322 text2[len2] = '\0';
02323 }
02324
02325 if (strcmp (argv[i], "-q") == 0)
02326 {
02327 bnfa->bnfaMethod = 0;
02328 }
02329 }
02330
02331 for (i = 2; i < argc; i++)
02332 {
02333 if (argv[i][0] == '-')
02334 continue;
02335
02336 p = argv[i];
02337
02338 if ( *p == '+')
02339 {
02340 nc=1;
02341 p++;
02342 }
02343 else
02344 {
02345 nc = nocase;
02346 }
02347
02348 bnfaAddPattern (bnfa, (uint8_t*)p, strlen(p), nc, 0, (void*)NULL);
02349 }
02350
02351 if (s_verbose)
02352 printf("Patterns added\n");
02353
02354 //Print_DFA (acsm);
02355
02356 bnfaCompile (bnfa, buildtree, neglist);
02357
02358 //Write_DFA(acsm, "bnfa-snort.dfa") ;
02359
02360 if (s_verbose)
02361 printf("Patterns compiled--written to file.\n");
02362
02363 bnfaPrintInfo (bnfa);
02364 bnfaPrintSummary ( );
02365
02366 bnfaSearch (bnfa, text, strlen (text), MatchFound, NULL, current_state, ¤t_state);
02367
02368 if (split_search)
02369 bnfaSearch (bnfa, text2, strlen (text2), MatchFound, NULL, current_state, ¤t_state);
02370
02371 bnfaFree (bnfa);
02372
02373 printf ("normal pgm end\n");
02374
02375 return (0);
02376 }
02377 #endif
02378
END OF CODE