C++一个不错的正则表达式引擎(三)

简介:
//
// String Elx
//
template <class CHART> class CStringElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase);
public:
    CBufferT <CHART> m_szPattern;
    int m_brightleft;
    int m_bignorecase;
};
//
// Implementation
//
template <class CHART> CStringElxT <CHART> :: CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase) : m_szPattern(fixed, nlength)
...{
    m_brightleft  = brightleft;
    m_bignorecase = bignorecase;
}
template <class CHART> int CStringElxT <CHART> :: Match(CContext * pContext) const
...{
    const CHART * pcsz  = (const CHART *)pContext->m_pMatchString;
    int npos = pContext->m_nCurrentPos;
    int tlen = pContext->m_pMatchStringLength;
    int slen = m_szPattern.GetSize();
    int bsucc;
    if(m_brightleft)
    ...{
        if(npos < slen)
            return 0;
        if(m_bignorecase)
            bsucc = ! m_szPattern.nCompareNoCase(pcsz + (npos - slen));
        else
            bsucc = ! m_szPattern.nCompare      (pcsz + (npos - slen));
        if( bsucc )
            pContext->m_nCurrentPos -= slen;
    }
    else
    ...{
        if(npos + slen > tlen)
            return 0;
        if(m_bignorecase)
            bsucc = ! m_szPattern.nCompareNoCase(pcsz + npos);
        else
            bsucc = ! m_szPattern.nCompare      (pcsz + npos);
        if( bsucc )
            pContext->m_nCurrentPos += slen;
    }
    return bsucc;
}
template <class CHART> int CStringElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    int slen = m_szPattern.GetSize();
    if(m_brightleft)
        pContext->m_nCurrentPos += slen;
    else
        pContext->m_nCurrentPos -= slen;
    return 0;
}
//
// CConditionElx
//
template <class CHART> class CConditionElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CConditionElxT();
public:
    // backref condition
    int m_nnumber;
    CBufferT <CHART> m_szNamed;
    // elx condition
    ElxInterface * m_pelxask;
    // selection
    ElxInterface * m_pelxyes, * m_pelxno;
};
template <class CHART> CConditionElxT <CHART> :: CConditionElxT()
...{
    m_nnumber = -1;
}
template <class CHART> int CConditionElxT <CHART> :: Match(CContext * pContext) const
...{
    // status
    int nbegin = pContext->m_nCurrentPos;
    int nsize  = pContext->m_stack.GetSize();
    int ncsize = pContext->m_capturestack.GetSize();
    // condition result
    int condition_yes = 0;
    // backref type
    if( m_nnumber >= 0 )
    ...{
        do
        ...{
            if(m_nnumber >= pContext->m_captureindex.GetSize()) break;
            int index = pContext->m_captureindex[m_nnumber];
            if( index < 0) break;
            // else valid
            condition_yes = 1;
        }
        while(0);
    }
    else
    ...{
        if( m_pelxask == 0 )
            condition_yes = 1;
        else
            condition_yes = m_pelxask->Match(pContext);
        pContext->m_stack.Restore(nsize);
        pContext->m_nCurrentPos = nbegin;
    }
    // elx result
    int bsucc;
    if( condition_yes )
        bsucc = m_pelxyes == 0 ? 1 : m_pelxyes->Match(pContext);
    else
        bsucc = m_pelxno  == 0 ? 1 : m_pelxno ->Match(pContext);
    if( bsucc )
    ...{
        pContext->m_stack.Push(ncsize);
        pContext->m_stack.Push(condition_yes);
    }
    else
    ...{
        pContext->m_capturestack.Restore(ncsize);
    }
    return bsucc;
}
template <class CHART> int CConditionElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    // pop
    int ncsize, condition_yes;
    pContext->m_stack.Pop(condition_yes);
    pContext->m_stack.Pop(ncsize);
    // elx result
    int bsucc;
    if( condition_yes )
        bsucc = m_pelxyes == 0 ? 0 : m_pelxyes->MatchNext(pContext);
    else
        bsucc = m_pelxno  == 0 ? 0 : m_pelxno ->MatchNext(pContext);
    if( bsucc )
    ...{
        pContext->m_stack.Push(ncsize);
        pContext->m_stack.Push(condition_yes);
    }
    else
    ...{
        pContext->m_capturestack.Restore(ncsize);
    }
    return bsucc;
}
//
// MatchResult
//
template <int x> class MatchResultT
...{
public:
    int IsMatched() const;
public:
    int GetStart() const;
    int GetEnd  () const;
public:
    int MaxGroupNumber() const;
    int GetGroupStart(int nGroupNumber) const;
    int GetGroupEnd  (int nGroupNumber) const;
public:
    MatchResultT(CContext * pContext, int nMaxNumber = -1);
    MatchResultT <x> & operator = (const MatchResultT <x> &);
    inline operator int() const ...{ return IsMatched(); }
public:
    CBufferT <int> m_result;
};
typedef MatchResultT <0> MatchResult;
// Stocked Elx IDs
enum STOCKELX_ID_DEFINES
...{
    STOCKELX_EMPTY = 0,
    /**////////////////////////
    STOCKELX_DOT_ALL,
    STOCKELX_DOT_NOT_ALL,
    STOCKELX_WORD,
    STOCKELX_WORD_NOT,
    STOCKELX_SPACE,
    STOCKELX_SPACE_NOT,
    STOCKELX_DIGITAL,
    STOCKELX_DIGITAL_NOT,
    /**///////////////////////
    STOCKELX_DOT_ALL_RIGHTLEFT,
    STOCKELX_DOT_NOT_ALL_RIGHTLEFT,
    STOCKELX_WORD_RIGHTLEFT,
    STOCKELX_WORD_RIGHTLEFT_NOT,
    STOCKELX_SPACE_RIGHTLEFT,
    STOCKELX_SPACE_RIGHTLEFT_NOT,
    STOCKELX_DIGITAL_RIGHTLEFT,
    STOCKELX_DIGITAL_RIGHTLEFT_NOT,
    /**//////////////////////
    STOCKELX_COUNT
};
// REGEX_FLAGS
#ifndef _REGEX_FLAGS_DEFINED
    enum REGEX_FLAGS
    ...{
        NO_FLAG        = 0,
        SINGLELINE     = 0x01,
        MULTILINE      = 0x02,
        GLOBAL         = 0x04,
        IGNORECASE     = 0x08,
        RIGHTTOLEFT    = 0x10,
        EXTENDED       = 0x20,
    };
    #define _REGEX_FLAGS_DEFINED
#endif
//
// Builder T
//
template <class CHART> class CBuilderT
...{
public:
    typedef CDelegateElxT  <CHART> CDelegateElx;
    typedef CBracketElxT   <CHART> CBracketElx;
    typedef CBackrefElxT   <CHART> CBackrefElx;
    typedef CConditionElxT <CHART> CConditionElx;
// Methods
public:
    ElxInterface * Build(const CBufferRefT <CHART> & pattern, int flags);
    int GetNamedNumber(const CBufferRefT <CHART> & named) const;
    void Clear();
public:
     CBuilderT();
    ~CBuilderT();
// Public Attributes
public:
    ElxInterface * m_pTopElx;
    int            m_nFlags;
    int            m_nMaxNumber;
    int            m_nNextNamed;
    int            m_nGroupCount;
    CBufferT <ElxInterface  *> m_objlist;
    CBufferT <ElxInterface  *> m_grouplist;
    CBufferT <CDelegateElx  *> m_recursivelist;
    CBufferT <CListElx      *> m_namedlist;
    CBufferT <CBackrefElx   *> m_namedbackreflist;
    CBufferT <CConditionElx *> m_namedconditionlist;
// CHART_INFO
protected:
    struct CHART_INFO
    ...{
    public:
        CHART ch;
        int   type;
        int   pos;
        int   len;
    public:
        CHART_INFO(CHART c, int t, int p = 0, int l = 0) ...{ ch = c; type = t; pos = p; len = l;    }
        inline int operator == (const CHART_INFO & ci)   ...{ return ch == ci.ch && type == ci.type; }
        inline int operator != (const CHART_INFO & ci)   ...{ return ! operator == (ci);             }
    };
protected:
    static unsigned int Hex2Int(const CHART * pcsz, int length, int & used);
    static int ReadDec(char * & str, unsigned int & dec);
    void MoveNext();
    int  GetNext2();
    ElxInterface * BuildAlternative(int vaflags);
    ElxInterface * BuildList       (int & flags);
    ElxInterface * BuildRepeat     (int & flags);
    ElxInterface * BuildSimple     (int & flags);
    ElxInterface * BuildCharset    (int & flags);
    ElxInterface * BuildRecursive  (int & flags);
    ElxInterface * BuildBoundary   (int & flags);
    ElxInterface * BuildBackref    (int & flags);
    ElxInterface * GetStockElx     (int nStockId);
    ElxInterface * Keep(ElxInterface * pElx);
// Private Attributes
protected:
    CBufferRefT <CHART> m_pattern;
    CHART_INFO prev, curr, next, nex2;
    int m_nNextPos;
    int m_nCharsetDepth;
    int m_bQuoted;
    int (*m_quote_fun)(int);
    ElxInterface * m_pStockElxs[STOCKELX_COUNT];
};
//
// Implementation
//
template <class CHART> CBuilderT <CHART> :: CBuilderT() : m_pattern(0, 0), prev(0, 0), curr(0, 0), next(0, 0), nex2(0, 0)
...{
    Clear();
}
template <class CHART> CBuilderT <CHART> :: ~CBuilderT()
...{
    Clear();
}
template <class CHART> int CBuilderT <CHART> :: GetNamedNumber(const CBufferRefT <CHART> & named) const
...{
    for(int i=0; i<m_namedlist.GetSize(); i++)
    ...{
        if( ! ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_szNamed.CompareNoCase(named) )
            return ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_nnumber;
    }
    return -3;
}
template <class CHART> ElxInterface * CBuilderT <CHART> :: Build(const CBufferRefT <CHART> & pattern, int flags)
...{
    // init
    m_pattern       = pattern;
    m_nNextPos      = 0;
    m_nCharsetDepth = 0;
    m_nMaxNumber    = 0;
    m_nNextNamed    = 0;
    m_nFlags        = flags;
    m_bQuoted       = 0;
    m_quote_fun     = 0;
    m_grouplist         .Restore(0);
    m_recursivelist     .Restore(0);
    m_namedlist         .Restore(0);
    m_namedbackreflist  .Restore(0);
    m_namedconditionlist.Restore(0);
    int i;
    for(i=0; i<3; i++) MoveNext();
    // build
    m_pTopElx = BuildAlternative(flags);
    // group 0
    m_grouplist.Prepare(0);
    m_grouplist[0] = m_pTopElx;
    // append named to unnamed
    m_nGroupCount = m_grouplist.GetSize();
    m_grouplist.Prepare(m_nMaxNumber + m_namedlist.GetSize());
    for(i=0; i<m_namedlist.GetSize(); i++)
    ...{
        CBracketElx * pleft  = (CBracketElx *)m_namedlist[i]->m_elxlist[0];
        CBracketElx * pright = (CBracketElx *)m_namedlist[i]->m_elxlist[2];
        // append
        m_grouplist[m_nGroupCount ++] = m_namedlist[i];
        if( pleft->m_nnumber > 0 )
            continue;
        // same name
        int find_same_name = GetNamedNumber(pleft->m_szNamed);
        if( find_same_name >= 0 )
        ...{
            pleft ->m_nnumber = find_same_name;
            pright->m_nnumber = find_same_name;
        }
        else
        ...{
            m_nMaxNumber ++;
            pleft ->m_nnumber = m_nMaxNumber;
            pright->m_nnumber = m_nMaxNumber;
        }
    }
    for(i=1; i<m_nGroupCount; i++)
    ...{
        CBracketElx * pleft = (CBracketElx *)((CListElx*)m_grouplist[i])->m_elxlist[0];
        if( pleft->m_nnumber > m_nMaxNumber )
            m_nMaxNumber = pleft->m_nnumber;
    }
    // connect recursive
    for(i=0; i<m_recursivelist.GetSize(); i++)
    ...{
        if( m_recursivelist[i]->m_ndata == -3 )
            m_recursivelist[i]->m_ndata = GetNamedNumber(m_recursivelist[i]->m_szNamed);
        if( m_recursivelist[i]->m_ndata >= 0 && m_recursivelist[i]->m_ndata < m_grouplist.GetSize() )
            m_recursivelist[i]->m_pelx = m_grouplist[m_recursivelist[i]->m_ndata];
    }
    // named backref
    for(i=0; i<m_namedbackreflist.GetSize(); i++)
    ...{
        m_namedbackreflist[i]->m_nnumber = GetNamedNumber(m_namedbackreflist[i]->m_szNamed);
    }
    // named condition
    for(i=0; i<m_namedconditionlist.GetSize(); i++)
    ...{
        int nn = GetNamedNumber(m_namedconditionlist[i]->m_szNamed);
        if( nn >= 0 )
        ...{
            m_namedconditionlist[i]->m_nnumber = nn;
            m_namedconditionlist[i]->m_pelxask = 0;
        }
    }
    return m_pTopElx;
}
template <class CHART> void CBuilderT <CHART> :: Clear()
...{
    for(int i=0; i<m_objlist.GetSize(); i++)
    ...{
        delete m_objlist[i];
    }
    m_objlist.Restore(0);
    m_pTopElx = 0;
    memset(m_pStockElxs, 0, sizeof(m_pStockElxs));
}
//
// hex to int
//
template <class CHART> unsigned int CBuilderT <CHART> :: Hex2Int(const CHART * pcsz, int length, int & used)
...{
    unsigned int result = 0;
    int & i = used;
    for(i=0; i<length; i++)
    ...{
        if(pcsz[i] >= RCHART('0') && pcsz[i] <= RCHART('9'))
            result = (result << 4) + (pcsz[i] - RCHART('0'));
        else if(pcsz[i] >= RCHART('A') && pcsz[i] <= RCHART('F'))
            result = (result << 4) + (0x0A + (pcsz[i] - RCHART('A')));
        else if(pcsz[i] >= RCHART('a') && pcsz[i] <= RCHART('f'))
            result = (result << 4) + (0x0A + (pcsz[i] - RCHART('a')));
        else
            break;
    }
    return result;
}
template <class CHART> inline ElxInterface * CBuilderT <CHART> :: Keep(ElxInterface * pelx)
...{
    m_objlist.Push(pelx);
    return pelx;
}
template <class CHART> void CBuilderT <CHART> :: MoveNext()
...{
    // forwards
    prev = curr;
    curr = next;
    next = nex2;
    // get nex2
    while( ! GetNext2() ) ...{};
}
template <class CHART> int CBuilderT <CHART> :: GetNext2()
...{
    // check length
    if(m_nNextPos >= m_pattern.GetSize())
    ...{
        nex2 = CHART_INFO(0, 1, m_nNextPos, 0);
        return 1;
    }
    int   delta = 1;
    CHART ch    = m_pattern[m_nNextPos];
    // if quoted
    if(m_bQuoted)
    ...{
        if(ch == RCHART('\'))
        ...{
            if(m_pattern[m_nNextPos + 1] == RCHART('E'))
            ...{
                m_quote_fun = 0;
                m_bQuoted   = 0;
                m_nNextPos += 2;
                return 0;
            }
        }
        if(m_quote_fun != 0)
            nex2 = CHART_INFO((CHART)(*m_quote_fun)((int)ch), 0, m_nNextPos, delta);
        else
            nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
        m_nNextPos += delta;
        return 1;
    }
    // common
    switch(ch)
    ...{
    case RCHART('\'):
        ...{
            CHART ch1 = m_pattern[m_nNextPos+1];
            // backref
            if(ch1 >= RCHART('0') && ch1 <= RCHART('9'))
            ...{
                nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
                break;
            }
            // escape
            delta     = 2;
            switch(ch1)
            ...{
            case RCHART('A'):
            case RCHART('Z'):
            case RCHART('w'):
            case RCHART('W'):
            case RCHART('s'):
            case RCHART('S'):
            case RCHART('B'):
            case RCHART('d'):
            case RCHART('D'):
            case RCHART('k'):
            case RCHART('g'):
                nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
                break;
            case RCHART('b'):
                if(m_nCharsetDepth > 0)
                    nex2 = CHART_INFO('', 0, m_nNextPos, delta);
                else
                    nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
                break;
            /**//*
            case RCHART('<'):
            case RCHART('>'):
                if(m_nCharsetDepth > 0)
                    nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
                else
                    nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
                break;
            */
            case RCHART('x'):
                if(m_pattern[m_nNextPos+2] != '{')
                ...{
                    int red = 0;
                    unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 2, red);
                    delta += red;
                    if(red > 0)
                        nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
                    else
                        nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
                break;
                }
            case RCHART('u'):
                if(m_pattern[m_nNextPos+2] != '{')
                ...{
                    int red = 0;
                    unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 4, red);
                    delta += red;
                    if(red > 0)
                        nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
                    else
                        nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
                }
                else
                ...{
                    int red = 0;
                    unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 3, sizeof(int) * 2, red);
                    delta += red;
                    while(m_nNextPos + delta < m_pattern.GetSize() && m_pattern.At(m_nNextPos + delta) != RCHART('}'))
                        delta ++;
                    delta ++; // skip '}'
                    nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
                }
                break;
            case RCHART('a'): nex2 = CHART_INFO(RCHART('a'), 0, m_nNextPos, delta); break;
            case RCHART('f'): nex2 = CHART_INFO(RCHART(' '), 0, m_nNextPos, delta); break;
            case RCHART('n'): nex2 = CHART_INFO(RCHART(' '), 0, m_nNextPos, delta); break;
            case RCHART('r'): nex2 = CHART_INFO(RCHART(' '), 0, m_nNextPos, delta); break;
            case RCHART('t'): nex2 = CHART_INFO(RCHART(' '), 0, m_nNextPos, delta); break;
            case RCHART('v'): nex2 = CHART_INFO(RCHART('v'), 0, m_nNextPos, delta); break;
            case RCHART('e'): nex2 = CHART_INFO(RCHART( 27 ), 0, m_nNextPos, delta); break;
            case RCHART('G'):  // skip 'G'
                if(m_nCharsetDepth > 0)
                ...{
                    m_nNextPos += 2;
                    return 0;
                }
                else
                ...{
                    nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
                    break;
                }
            case RCHART('L'):
                if( ! m_quote_fun ) m_quote_fun = ::tolower;
            case RCHART('U'):
                if( ! m_quote_fun ) m_quote_fun = ::toupper;
            case RCHART('Q'):
                ...{
                    m_bQuoted   = 1;
                    m_nNextPos += 2;
                    return 0;
                }
            case RCHART('E'):
                ...{
                    m_quote_fun = 0;
                    m_bQuoted   = 0;
                    m_nNextPos += 2;
                    return 0;
                }
            case 0:
                if(m_nNextPos+1 >= m_pattern.GetSize())
                ...{
                    delta = 1;
                    nex2 = CHART_INFO(ch , 0, m_nNextPos, delta);
                }
                else
                    nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); // common '


本文转自jazka 51CTO博客,原文链接:http://blog.51cto.com/jazka/228019,如需转载请自行联系原作者
相关文章
|
6月前
|
算法 测试技术 C#
【动态规划】【字符串】C++算法:正则表达式匹配
【动态规划】【字符串】C++算法:正则表达式匹配
|
4月前
|
存储 C++ 容器
C++一分钟之-正则表达式库(regex)
【7月更文挑战第7天】C++从C++11开始支持正则表达式,通过`&lt;regex&gt;`库提供功能。本文涵盖基本概念如`std::regex`、`std::smatch`,以及`regex_search`和`regex_match`的使用。常见问题包括大小写敏感性、特殊字符转义、贪婪与非贪婪匹配和捕获组。提供的代码示例展示了如何进行匹配、不区分大小写的匹配、特殊字符匹配、贪婪与非贪婪匹配和捕获组的使用。理解并练习正则表达式能提升文本处理效率。
75 0
|
6月前
|
存储 JavaScript API
C++ 正则表达式库 std::basic_regex 中文手册(API说明来自cppreference.com)
C++ 正则表达式库 std::basic_regex 中文手册(API说明来自cppreference.com)
150 0
|
算法 Java
从0到1打造正则表达式执行引擎(一) 正则表达式转NFA (1)
重复匹配(正则表达式中的 ? + *) 正则表达式里有4种表示重复的方式,分别是:
71 1
|
算法 测试技术
从0到1打造正则表达式执行引擎(二) NFA转DFA
然后对DFA的节点0执行步骤1,找到NFA中所有a可达的NFA节点(1#2#4#6#8#9)构成NFA中的节点1,如下图。
122 0
|
设计模式 算法
从0到1打造正则表达式执行引擎(一) 正则表达式转NFA (2)
看完上文之后相信你一直知道如果将一个正则表达式转化为状态机的方法了,这里我们要将理论转化为代码。首先我们要将图转化为代码标识,我用State表示一个节点,其中用了Map<MatchStrategy, List> next表示其后继节点,next中有个key-value就是一条边,MatchStrategy用来描述边的信息。
72 0
|
算法 C++
剑指offer(C++)-JZ19:正则表达式匹配(算法-动态规划)
剑指offer(C++)-JZ19:正则表达式匹配(算法-动态规划)
|
C++ Windows Perl
[笔记]c++基础实践《二》regex正则表达式
[笔记]c++基础实践《二》regex正则表达式