Generated on Thu Mar 22 10:39:38 2012 for Gecode by doxygen 1.6.3

dfa.cpp

Go to the documentation of this file.
00001 /* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */
00002 /*
00003  *  Main authors:
00004  *     Christian Schulte <schulte@gecode.org>
00005  *
00006  *  Copyright:
00007  *     Christian Schulte, 2004
00008  *
00009  *  Last modified:
00010  *     $Date: 2009-09-29 20:18:44 +0200 (Tue, 29 Sep 2009) $ by $Author: schulte $
00011  *     $Revision: 9773 $
00012  *
00013  *  This file is part of Gecode, the generic constraint
00014  *  development environment:
00015  *     http://www.gecode.org
00016  *
00017  *  Permission is hereby granted, free of charge, to any person obtaining
00018  *  a copy of this software and associated documentation files (the
00019  *  "Software"), to deal in the Software without restriction, including
00020  *  without limitation the rights to use, copy, modify, merge, publish,
00021  *  distribute, sublicense, and/or sell copies of the Software, and to
00022  *  permit persons to whom the Software is furnished to do so, subject to
00023  *  the following conditions:
00024  *
00025  *  The above copyright notice and this permission notice shall be
00026  *  included in all copies or substantial portions of the Software.
00027  *
00028  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00029  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00030  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00031  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00032  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00033  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00034  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00035  *
00036  */
00037 
00038 #include <gecode/int.hh>
00039 
00040 namespace Gecode { namespace Int { namespace Extensional {
00041 
00045   class TransByI_State {
00046   public:
00047     forceinline bool
00048     operator ()(const DFA::Transition& x, const DFA::Transition& y) {
00049       return x.i_state < y.i_state;
00050     }
00051     forceinline static void
00052     sort(DFA::Transition t[], int n) {
00053       TransByI_State tbis;
00054       Support::quicksort<DFA::Transition,TransByI_State>(t,n,tbis);
00055     }
00056   };
00057 
00061   class TransBySymbol {
00062   public:
00063     forceinline bool
00064     operator ()(const DFA::Transition& x, const DFA::Transition& y) {
00065       return x.symbol < y.symbol;
00066     }
00067     forceinline static void
00068     sort(DFA::Transition t[], int n) {
00069       TransBySymbol tbs;
00070       Support::quicksort<DFA::Transition,TransBySymbol>(t,n,tbs);
00071     }
00072   };
00073 
00077   class TransBySymbolI_State {
00078   public:
00079     forceinline bool
00080     operator ()(const DFA::Transition& x, const DFA::Transition& y) {
00081       return ((x.symbol < y.symbol) ||
00082               ((x.symbol == y.symbol) && (x.i_state < y.i_state)));
00083     }
00084     forceinline static void
00085     sort(DFA::Transition t[], int n) {
00086       TransBySymbolI_State tbsi;
00087       Support::quicksort<DFA::Transition,TransBySymbolI_State>(t,n,tbsi);
00088     }
00089   };
00090 
00094   class TransByO_State {
00095   public:
00096     forceinline bool
00097     operator ()(const DFA::Transition& x, const DFA::Transition& y) {
00098       return x.o_state < y.o_state;
00099     }
00100     forceinline static void
00101     sort(DFA::Transition t[], int n) {
00102       TransByO_State tbos;
00103       Support::quicksort<DFA::Transition,TransByO_State>(t,n,tbos);
00104     }
00105   };
00106 
00107 
00111   class StateGroup {
00112   public:
00113     int state;
00114     int group;
00115   };
00116 
00120   class StateGroupByGroup {
00121   public:
00122     forceinline bool
00123     operator ()(const StateGroup& x, const StateGroup& y) {
00124       return ((x.group < y.group) ||
00125               ((x.group == y.group) && (x.state < y.state)));
00126     }
00127     static void
00128     sort(StateGroup sg[], int n) {
00129       StateGroupByGroup o;
00130       Support::quicksort<StateGroup,StateGroupByGroup>(sg,n,o);
00131     }
00132   };
00133 
00137   class GroupStates {
00138   public:
00139     StateGroup* fst;
00140     StateGroup* lst;
00141   };
00142 
00144   enum StateInfo {
00145     SI_NONE       = 0, 
00146     SI_FROM_START = 1, 
00147     SI_TO_FINAL   = 2, 
00148     SI_FINAL      = 4  
00149   };
00150 
00151 }}}
00152 
00153 namespace Gecode {
00154 
00155   DFA::DFA(int start, Transition t_spec[], int f_spec[], bool minimize) {
00156     using namespace Int;
00157     using namespace Extensional;
00158     // Compute number of states and transitions
00159     int n_states = start;
00160     int n_trans  = 0;
00161     for (Transition* t = &t_spec[0]; t->i_state >= 0; t++) {
00162       n_states = std::max(n_states,t->i_state);
00163       n_states = std::max(n_states,t->o_state);
00164       n_trans++;
00165     }
00166     for (int* f = &f_spec[0]; *f >= 0; f++)
00167       n_states = std::max(n_states,*f);
00168     n_states++;
00169 
00170     // Temporary structure for transitions
00171     Transition* trans = heap.alloc<Transition>(n_trans);
00172     for (int i = n_trans; i--; )
00173       trans[i] = t_spec[i];
00174     // Temporary structures for finals
00175     int* final = heap.alloc<int>(n_states+1);
00176     bool* is_final = heap.alloc<bool>(n_states+1);
00177     int n_finals = 0;
00178     for (int i = n_states+1; i--; )
00179       is_final[i] = false;
00180     for (int* f = &f_spec[0]; *f != -1; f++) {
00181       is_final[*f]      = true;
00182       final[n_finals++] = *f;
00183     }
00184 
00185     if (minimize) {
00186       // Sort transitions by symbol and i_state
00187       TransBySymbolI_State::sort(trans, n_trans);
00188       Transition** idx = heap.alloc<Transition*>(n_trans+1);
00189       //  idx[i]...idx[i+1]-1 gives where transitions for symbol i start
00190       int n_symbols = 0;
00191       {
00192         int j = 0;
00193         while (j < n_trans) {
00194           idx[n_symbols++] = &trans[j];
00195           int s = trans[j].symbol;
00196           while ((j < n_trans) && (s == trans[j].symbol))
00197             j++;
00198         }
00199         idx[n_symbols] = &trans[j];
00200         assert(j == n_trans);
00201       }
00202       // Map states to groups
00203       int* s2g = heap.alloc<int>(n_states+1);
00204       StateGroup* part = heap.alloc<StateGroup>(n_states+1);
00205       GroupStates* g2s = heap.alloc<GroupStates>(n_states+1);
00206       // Initialize: final states is group one, all other group zero
00207       for (int i = n_states+1; i--; ) {
00208         part[i].state = i;
00209         part[i].group = is_final[i] ? 1 : 0;
00210         s2g[i]        = part[i].group;
00211       }
00212       // Important: the state n_state is the dead state, conceptually
00213       // if there is no transition for a symbol and input state, it is
00214       // assumed that there is an implicit transition to n_state
00215 
00216       // Set up the indexing data structure, sort by group
00217       StateGroupByGroup::sort(part,n_states+1);
00218       int n_groups;
00219       if (part[0].group == part[n_states].group) {
00220         // No final states, just one group
00221         g2s[0].fst = &part[0]; g2s[0].lst = &part[n_states+1];
00222         n_groups = 1;
00223       } else  {
00224         int i = 0;
00225         assert(part[0].group == 0);
00226         do i++; while (part[i].group == 0);
00227         g2s[0].fst = &part[0]; g2s[0].lst = &part[i];
00228         g2s[1].fst = &part[i]; g2s[1].lst = &part[n_states+1];
00229         n_groups = 2;
00230       }
00231 
00232       // Do the refinement
00233       {
00234         int m_groups;
00235         do {
00236           m_groups = n_groups;
00237           // Iterate over symbols
00238           for (int sidx = n_symbols; sidx--; ) {
00239             // Iterate over groups
00240             for (int g = n_groups; g--; ) {
00241               // Ignore singleton groups
00242               if (g2s[g].lst-g2s[g].fst > 1) {
00243                 // Apply transitions to group states
00244                 // This exploits that both transitions as well as
00245                 // stategroups are sorted by (input) state
00246                 Transition* t     = idx[sidx];
00247                 Transition* t_lst = idx[sidx+1];
00248                 for (StateGroup* sg = g2s[g].fst; sg<g2s[g].lst; sg++) {
00249                   while ((t < t_lst) && (t->i_state < sg->state))
00250                     t++;
00251                   // Compute group resulting from transition
00252                   if ((t < t_lst) && (t->i_state == sg->state))
00253                     sg->group = s2g[t->o_state];
00254                   else
00255                     sg->group = s2g[n_states]; // Go to dead state
00256                 }
00257                 // Sort group by groups from transitions
00258                 StateGroupByGroup::sort(g2s[g].fst,
00259                                         static_cast<int>(g2s[g].lst-g2s[g].fst));
00260                 // Group must be split?
00261                 if (g2s[g].fst->group != (g2s[g].lst-1)->group) {
00262                   // Skip first group
00263                   StateGroup* sg = g2s[g].fst+1;
00264                   while ((sg-1)->group == sg->group)
00265                     sg++;
00266                   // Start splitting
00267                   StateGroup* lst = g2s[g].lst;
00268                   g2s[g].lst = sg;
00269                   while (sg < lst) {
00270                     s2g[sg->state] = n_groups;
00271                     g2s[n_groups].fst  = sg++;
00272                     while ((sg < lst) && ((sg-1)->group == sg->group)) {
00273                       s2g[sg->state] = n_groups; sg++;
00274                     }
00275                     g2s[n_groups++].lst = sg;
00276                   }
00277                 }
00278               }
00279             }
00280           }
00281         } while (n_groups != m_groups);
00282         // New start state
00283         start = s2g[start];
00284         // Compute new final states
00285         n_finals = 0;
00286         for (int g = n_groups; g--; )
00287           for (StateGroup* sg = g2s[g].fst; sg < g2s[g].lst; sg++)
00288             if (is_final[sg->state]) {
00289               final[n_finals++] = g;
00290               break;
00291             }
00292         // Compute representatives
00293         int* s2r = heap.alloc<int>(n_states+1);
00294         for (int i = n_states+1; i--; )
00295           s2r[i] = -1;
00296         for (int g = n_groups; g--; )
00297           s2r[g2s[g].fst->state] = g;
00298         // Clean transitions
00299         int j = 0;
00300         for (int i = 0; i<n_trans; i++)
00301           if (s2r[trans[i].i_state] != -1) {
00302             trans[j].i_state = s2g[trans[i].i_state];
00303             trans[j].symbol  = trans[i].symbol;
00304             trans[j].o_state = s2g[trans[i].o_state];
00305             j++;
00306           }
00307         n_trans  = j;
00308         n_states = n_groups;
00309         heap.free<int>(s2r,n_states+1);
00310       }
00311       heap.free<GroupStates>(g2s,n_states+1);
00312       heap.free<StateGroup>(part,n_states+1);
00313       heap.free<int>(s2g,n_states+1);
00314       heap.free<Transition*>(idx,n_trans+1);
00315     }
00316 
00317     // Do a reachability analysis for all states starting from start state
00318     Gecode::Support::StaticStack<int,Heap> visit(heap,n_states);
00319     int* state = heap.alloc<int>(n_states);
00320     for (int i=n_states; i--; )
00321       state[i] = SI_NONE;
00322 
00323     Transition** idx = heap.alloc<Transition*>(n_states+1);
00324     {
00325       // Sort all transitions according to i_state and create index structure
00326       //  idx[i]...idx[i+1]-1 gives where transitions for state i start
00327       TransByI_State::sort(trans, n_trans);
00328       {
00329         int j = 0;
00330         for (int i=0; i<n_states; i++) {
00331           idx[i] = &trans[j];
00332           while ((j < n_trans) && (i == trans[j].i_state))
00333             j++;
00334         }
00335         idx[n_states] = &trans[j];
00336         assert(j == n_trans);
00337       }
00338 
00339       state[start] = SI_FROM_START;
00340       visit.push(start);
00341       while (!visit.empty()) {
00342         int s = visit.pop();
00343         for (Transition* t = idx[s]; t < idx[s+1]; t++)
00344           if (!(state[t->o_state] & SI_FROM_START)) {
00345             state[t->o_state] |= SI_FROM_START;
00346             visit.push(t->o_state);
00347           }
00348       }
00349     }
00350 
00351     // Do a reachability analysis for all states to a final state
00352     {
00353       // Sort all transitions according to o_state and create index structure
00354       //  idx[i]...idx[i+1]-1 gives where transitions for state i start
00355       TransByO_State::sort(trans, n_trans);
00356       {
00357         int j = 0;
00358         for (int i=0; i<n_states; i++) {
00359           idx[i] = &trans[j];
00360           while ((j < n_trans) && (i == trans[j].o_state))
00361             j++;
00362         }
00363         idx[n_states] = &trans[j];
00364         assert(j == n_trans);
00365       }
00366 
00367       for (int i = n_finals; i--; ) {
00368         state[final[i]] |= (SI_TO_FINAL | SI_FINAL);
00369         visit.push(final[i]);
00370       }
00371       while (!visit.empty()) {
00372         int s = visit.pop();
00373         for (Transition* t = idx[s]; t < idx[s+1]; t++)
00374           if (!(state[t->i_state] & SI_TO_FINAL)) {
00375             state[t->i_state] |= SI_TO_FINAL;
00376             visit.push(t->i_state);
00377           }
00378       }
00379     }
00380     heap.free<Transition*>(idx,n_states+1);
00381     heap.free<int>(final,n_states+1);
00382     heap.free<bool>(is_final,n_states+1);
00383 
00384     // Now all reachable states are known (also the final ones)
00385     int* re = heap.alloc<int>(n_states);
00386     for (int i = n_states; i--; )
00387       re[i] = -1;
00388 
00389     // Renumber states
00390     int m_states = 0;
00391     // Start state gets zero
00392     re[start] = m_states++;
00393 
00394     // Renumber final states
00395     for (int i = n_states; i--; )
00396       if ((state[i] == (SI_FINAL | SI_FROM_START | SI_TO_FINAL)) && (re[i] < 0))
00397         re[i] = m_states++;
00398     // If start state is final, final states start from zero, otherwise from one
00399     int final_fst = (state[start] & SI_FINAL) ? 0 : 1;
00400     int final_lst = m_states;
00401     // final_fst...final_lst-1 are the final states
00402 
00403     // Renumber remaining states
00404     for (int i = n_states; i--; )
00405       if ((state[i] == (SI_FROM_START | SI_TO_FINAL)) && (re[i] < 0))
00406         re[i] = m_states++;
00407 
00408     // Count number of remaining transitions
00409     int m_trans = 0;
00410     for (int i = n_trans; i--; )
00411       if ((re[trans[i].i_state] >= 0) && (re[trans[i].o_state] >= 0))
00412         m_trans++;
00413 
00414     // All done... Construct the automaton
00415     DFAI* d = new DFAI(m_trans);
00416     d->n_states  = m_states;
00417     d->n_trans   = m_trans;
00418     d->final_fst = final_fst;
00419     d->final_lst = final_lst;
00420     {
00421       int j = 0;
00422       Transition* r = &d->trans[0];
00423       for (int i = 0; i<n_trans; i++)
00424         if ((re[trans[i].i_state] >= 0) && (re[trans[i].o_state] >= 0)) {
00425           r[j].symbol  = trans[i].symbol;
00426           r[j].i_state = re[trans[i].i_state];
00427           r[j].o_state = re[trans[i].o_state];
00428           j++;
00429         }
00430       TransBySymbol::sort(r,m_trans);
00431     }
00432     {
00433       // Count number of symbols
00434       unsigned int n_symbols = 0;
00435       for (int i = 0; i<m_trans; ) {
00436         int s = d->trans[i++].symbol;
00437         n_symbols++;
00438         while ((i<m_trans) && (d->trans[i].symbol == s))
00439           i++;
00440       }
00441       d->n_symbols = n_symbols;
00442     }
00443     {
00444       // Compute maximal degree
00445       unsigned int max_degree = 0;
00446       unsigned int* deg = heap.alloc<unsigned int>(m_states);
00447 
00448       // Compute in-degree per state
00449       for (int i = m_states; i--; )
00450         deg[i] = 0;
00451       for (int i = m_trans; i--; )
00452         deg[d->trans[i].o_state]++;
00453       for (int i = m_states; i--; )
00454         max_degree = std::max(max_degree,deg[i]);
00455 
00456       // Compute out-degree per state
00457       for (int i = m_states; i--; )
00458         deg[i] = 0;
00459       for (int i = m_trans; i--; )
00460         deg[d->trans[i].i_state]++;
00461       for (int i = m_states; i--; )
00462         max_degree = std::max(max_degree,deg[i]);
00463 
00464       heap.free<unsigned int>(deg,m_states);
00465 
00466       // Compute transitions per symbol
00467       {
00468         int i=0;
00469         while (i < m_trans) {
00470           int j=i++;
00471           while ((i < m_trans) && 
00472                  (d->trans[j].symbol == d->trans[i].symbol))
00473             i++;
00474           max_degree = std::max(max_degree,static_cast<unsigned int>(i-j));
00475         }
00476       }
00477 
00478       d->max_degree = max_degree;
00479     }
00480 
00481     d->fill();
00482     object(d);
00483     heap.free<int>(re,n_states);
00484     heap.free<int>(state,n_states);
00485     heap.free<Transition>(trans,n_trans);
00486   }
00487 
00488   SharedHandle::Object*
00489   DFA::DFAI::copy(void) const {
00490     DFAI* d = new DFAI(n_trans);
00491     d->n_states   = n_states;
00492     d->n_symbols  = n_symbols;
00493     d->n_trans    = n_trans;
00494     d->max_degree = max_degree;
00495     d->final_fst  = final_fst;
00496     d->final_lst  = final_lst;
00497     heap.copy<Transition>(&d->trans[0], &trans[0], n_trans);
00498     d->fill();
00499     return d;
00500   }
00501 
00502   void
00503   DFA::DFAI::fill(void) {
00504     // Compute smallest logarithm larger than n_symbols
00505     n_log = 1;
00506     while (n_symbols >= static_cast<unsigned int>(1<<n_log))
00507       n_log++;
00508     // Allocate memory
00509     table = heap.alloc<HashEntry>(1<<n_log);
00510     // Initialize table
00511     for (int i=(1<<n_log); i--; )
00512       table[i].fst = table[i].lst = NULL;
00513     int mask = (1 << n_log) - 1;
00514     // Enter transitions to table
00515     for (int i = 0; i<n_trans; ) {
00516       int s = trans[i].symbol;
00517       Transition* fst = &trans[i];
00518       i++;
00519       while ((i<n_trans) && (trans[i].symbol == s))
00520         i++;
00521       Transition* lst = &trans[i];
00522       // Enter with linear collision resolution
00523       int p = s & mask;
00524       while (table[p].fst != NULL)
00525         p = (p+1) & mask;
00526       table[p].symbol = s;
00527       table[p].fst    = fst;
00528       table[p].lst    = lst;
00529     }
00530   }
00531 
00532 }
00533 
00534 // STATISTICS: int-prop
00535