C++一个不错的正则表达式引擎(二)

简介:
//
// Implementation
//
template <class CHART> CBoundaryElxT <CHART> :: CBoundaryElxT(int ntype, int byes)
...{
    m_ntype = ntype;
    m_byes  = byes;
}
template <class CHART> int CBoundaryElxT <CHART> :: Match(CContext * pContext) const
...{
    const CHART * pcsz  = (const CHART *)pContext->m_pMatchString;
    int npos = pContext->m_nCurrentPos;
    int tlen = pContext->m_pMatchStringLength;
    CHART chL = npos > 0    ? pcsz[npos - 1] : 0;
    CHART chR = npos < tlen ? pcsz[npos    ] : 0;
    int bsucc = 0;
    switch(m_ntype)
    ...{
    case BOUNDARY_FILE_BEGIN:
        bsucc = (npos <= 0);
        break;
    case BOUNDARY_FILE_END:
        bsucc = (npos >= tlen);
        break;
    case BOUNDARY_LINE_BEGIN:
        bsucc = (npos <= 0   ) || (chL == RCHART(' ')) || ((chL == RCHART(' ')) && (chR != RCHART(' ')));
        break;
    case BOUNDARY_LINE_END:
        bsucc = (npos >= tlen) || (chR == RCHART(' ')) || ((chR == RCHART(' ')) && (chL != RCHART(' ')));
        break;
    case BOUNDARY_WORD_BEGIN:
        bsucc = ! IsWordChar(chL) &&   IsWordChar(chR);
        break;
    case BOUNDARY_WORD_END:
        bsucc =   IsWordChar(chL) && ! IsWordChar(chR);
        break;
    case BOUNDARY_WORD_EDGE:
        bsucc =   IsWordChar(chL) ?  ! IsWordChar(chR) : IsWordChar(chR);
        break;
    }
    return bsucc;
}
template <class CHART> int CBoundaryElxT <CHART> :: MatchNext(CContext *) const
...{
    return 0;
}
template <class CHART> inline int CBoundaryElxT <CHART> :: IsWordChar(CHART ch)
...{
    return (ch >= RCHART('A') && ch <= RCHART('Z')) || (ch >= RCHART('a') && ch <= RCHART('z')) || (ch >= RCHART('0') && ch <= RCHART('9')) || (ch == RCHART('_'));
}
//
// Bracket
//
template <class CHART> class CBracketElxT : public ElxInterface  
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CBracketElxT(int nnumber, int bright);
public:
    int m_nnumber;
    int m_bright;
    CBufferT <CHART> m_szNamed;
};
template <class CHART> CBracketElxT <CHART> :: CBracketElxT(int nnumber, int bright)
...{
    m_nnumber = nnumber;
    m_bright  = bright;
}
template <class CHART> int CBracketElxT <CHART> :: Match(CContext * pContext) const
...{
    // check, for named
    if(m_nnumber < 0) return 0;
    if( ! m_bright )
    ...{
        pContext->m_captureindex.Prepare(m_nnumber, -1);
        int index = pContext->m_captureindex[m_nnumber];
        // check
        if(index > 0 && index < pContext->m_capturestack.GetSize() && pContext->m_capturestack[index+2] < 0)
        ...{
            pContext->m_capturestack[index+3] --;
            return 1;
        }
        // save
        pContext->m_captureindex[m_nnumber] = pContext->m_capturestack.GetSize();
        pContext->m_capturestack.Push(m_nnumber);
        pContext->m_capturestack.Push(pContext->m_nCurrentPos);
        pContext->m_capturestack.Push(-1);
        pContext->m_capturestack.Push( 0); // z-index
    }
    else
    ...{
        // check
        int index = pContext->m_captureindex[m_nnumber];
        if(pContext->m_capturestack[index + 3] < 0)
        ...{
            pContext->m_capturestack[index + 3] ++;
            return 1;
        }
        // save
        pContext->m_capturestack[index + 2] = pContext->m_nCurrentPos;
        pContext->m_capturestack[index + 3] = pContext->m_nParenZindex ++;
    }
    return 1;
}
template <class CHART> int CBracketElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    int index = pContext->m_captureindex[m_nnumber];
    if( ! m_bright )
    ...{
        if(pContext->m_capturestack[index + 3] < 0)
        ...{
            pContext->m_capturestack[index + 3] ++;
            return 0;
        }
        pContext->m_capturestack.Restore(pContext->m_capturestack.GetSize() - 4);
        // to find
        index = pContext->m_capturestack.GetSize() - 4;
        while(index >= 0 && pContext->m_capturestack[index] != m_nnumber) index -= 4;
        // new index
        pContext->m_captureindex[m_nnumber] = index;
    }
    else
    ...{
        if(pContext->m_capturestack[index + 3] < 0)
        ...{
            pContext->m_capturestack[index + 3] --;
            return 0;
        }
        pContext->m_capturestack[index + 2] = -1;
        pContext->m_capturestack[index + 3] =  0;
    }
    return 0;
}
//
// Deletage
//
template <class CHART> class CDelegateElxT : public ElxInterface  
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CDelegateElxT(int ndata = 0);
public:
    ElxInterface * m_pelx;
    int m_ndata; // +0 : recursive to
                 // -3 : named recursive
    CBufferT <CHART> m_szNamed;
};
template <class CHART> CDelegateElxT <CHART> :: CDelegateElxT(int ndata)
...{
    m_pelx  = 0;
    m_ndata = ndata;
}
template <class CHART> int CDelegateElxT <CHART> :: Match(CContext * pContext) const
...{
    if(m_pelx != 0)
        return m_pelx->Match(pContext);
    else
        return 1;
}
template <class CHART> int CDelegateElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    if(m_pelx != 0)
        return m_pelx->MatchNext(pContext);
    else
        return 0;
}
//
// Empty
//
template <int x> class CEmptyElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CEmptyElxT();
};
typedef CEmptyElxT <0> CEmptyElx;
//
// Global
//
template <int x> class CGlobalElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CGlobalElxT();
};
typedef CGlobalElxT <0> CGlobalElx;
//
// Repeat
//
template <int x> class CRepeatElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CRepeatElxT(ElxInterface * pelx, int ntimes);
protected:
    int MatchFixed    (CContext * pContext) const;
    int MatchNextFixed(CContext * pContext) const;
public:
    ElxInterface * m_pelx;
    int m_nfixed;
};
typedef CRepeatElxT <0> CRepeatElx;
//
// Greedy
//
template <int x> class CGreedyElxT : public CRepeatElxT <x>
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CGreedyElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
protected:
    int MatchVart    (CContext * pContext) const;
    int MatchNextVart(CContext * pContext) const;
public:
    int m_nvart;
};
typedef CGreedyElxT <0> CGreedyElx;
//
// Independent
//
template <int x> class CIndependentElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CIndependentElxT(ElxInterface * pelx);
public:
    ElxInterface * m_pelx;
};
typedef CIndependentElxT <0> CIndependentElx;
//
// List
//
template <int x> class CListElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CListElxT(int brightleft);
public:
    CBufferT <ElxInterface *> m_elxlist;
    int m_brightleft;
};
typedef CListElxT <0> CListElx;
//
// Posix Elx
//
template <class CHART> class CPosixElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CPosixElxT(const char * posix, int brightleft);
protected:
    static int m_isblank(int c);
public:
    int (*m_posixfun)(int);
    int m_brightleft;
    int m_byes;
};
//
// Implementation
//
template <class CHART> CPosixElxT <CHART> :: CPosixElxT(const char * posix, int brightleft)
...{
    m_brightleft = brightleft;
    if(posix[1] == '^')
    ...{
        m_byes = 0;
        posix += 2;
    }
    else
    ...{
        m_byes = 1;
        posix += 1;
    }
    if     (!strncmp(posix, "alnum:", 6)) m_posixfun = ::isalnum ;
    else if(!strncmp(posix, "alpha:", 6)) m_posixfun = ::isalpha ;
    else if(!strncmp(posix, "ascii:", 6)) m_posixfun = ::isascii ;
    else if(!strncmp(posix, "cntrl:", 6)) m_posixfun = ::iscntrl ;
    else if(!strncmp(posix, "digit:", 6)) m_posixfun = ::isdigit ;
    else if(!strncmp(posix, "graph:", 6)) m_posixfun = ::isgraph ;
    else if(!strncmp(posix, "lower:", 6)) m_posixfun = ::islower ;
    else if(!strncmp(posix, "print:", 6)) m_posixfun = ::isprint ;
    else if(!strncmp(posix, "punct:", 6)) m_posixfun = ::ispunct ;
    else if(!strncmp(posix, "space:", 6)) m_posixfun = ::isspace ;
    else if(!strncmp(posix, "upper:", 6)) m_posixfun = ::isupper ;
    else if(!strncmp(posix, "xdigit:",7)) m_posixfun = ::isxdigit;
    else if(!strncmp(posix, "blank:", 6)) m_posixfun = m_isblank ;
    else                                  m_posixfun = 0         ;
}
template <class CHART> int CPosixElxT <CHART> :: m_isblank(int c)
...{
    return c == 0x20 || c == ' ';
}
template <class CHART> int CPosixElxT <CHART> :: Match(CContext * pContext) const
...{
    if(m_posixfun == 0) return 0;
    int tlen = pContext->m_pMatchStringLength;
    int npos = pContext->m_nCurrentPos;
    // check
    int at   = m_brightleft ? npos - 1 : npos;
    if( at < 0 || at >= tlen )
        return 0;
    CHART ch = ((const CHART *)pContext->m_pMatchString)[at];
    int bsucc = (*m_posixfun)(ch);
    if( ! m_byes )
        bsucc = ! bsucc;
    if( bsucc )
        pContext->m_nCurrentPos += m_brightleft ? -1 : 1;
    return bsucc;
}
template <class CHART> int CPosixElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    pContext->m_nCurrentPos -= m_brightleft ? -1 : 1;
    return 0;
}
//
// Possessive
//
template <int x> class CPossessiveElxT : public CGreedyElxT <x>
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CPossessiveElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
};
typedef CPossessiveElxT <0> CPossessiveElx;
//
// Range Elx
//
template <class CHART> class CRangeElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CRangeElxT(int brightleft, int byes);
public:
    CBufferT <CHART> m_ranges;
    CBufferT <CHART> m_chars;
    CBufferT <ElxInterface *> m_embeds;
public:
    int m_brightleft;
    int m_byes;
};
//
// Implementation
//
template <class CHART> CRangeElxT <CHART> :: CRangeElxT(int brightleft, int byes)
...{
    m_brightleft = brightleft;
    m_byes       = byes;
}
template <class CHART> int CRangeElxT <CHART> :: Match(CContext * pContext) const
...{
    int tlen = pContext->m_pMatchStringLength;
    int npos = pContext->m_nCurrentPos;
    // check
    int at   = m_brightleft ? npos - 1 : npos;
    if( at < 0 || at >= tlen )
        return 0;
    CHART ch = ((const CHART *)pContext->m_pMatchString)[at];
    int bsucc = 0, i;
    // compare
    for(i=0; !bsucc && i<m_ranges.GetSize(); i+=2)
    ...{
        if(m_ranges[i] <= ch && ch <= m_ranges[i+1]) bsucc = 1;
    }
    for(i=0; !bsucc && i<m_chars.GetSize(); i++)
    ...{
        if(m_chars[i] == ch) bsucc = 1;
    }
    for(i=0; !bsucc && i<m_embeds.GetSize(); i++)
    ...{
        if(m_embeds[i]->Match(pContext))
        ...{
            pContext->m_nCurrentPos = npos;
            bsucc = 1;
        }
    }
    if( ! m_byes )
        bsucc = ! bsucc;
    if( bsucc )
        pContext->m_nCurrentPos += m_brightleft ? -1 : 1;
    return bsucc;
}
template <class CHART> int CRangeElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    pContext->m_nCurrentPos -= m_brightleft ? -1 : 1;
    return 0;
}
//
// Reluctant
//
template <int x> class CReluctantElxT : public CRepeatElxT <x>
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CReluctantElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
protected:
    int MatchVart    (CContext * pContext) const;
    int MatchNextVart(CContext * pContext) const;
public:
    int m_nvart;
};
typedef CReluctantElxT <0> CReluctantElx;


本文转自jazka 51CTO博客,原文链接:http://blog.51cto.com/jazka/228017,如需转载请自行联系原作者
相关文章
|
6月前
|
算法 测试技术 C#
【动态规划】【字符串】C++算法:正则表达式匹配
【动态规划】【字符串】C++算法:正则表达式匹配
|
4月前
|
存储 C++ 容器
C++一分钟之-正则表达式库(regex)
【7月更文挑战第7天】C++从C++11开始支持正则表达式,通过`&lt;regex&gt;`库提供功能。本文涵盖基本概念如`std::regex`、`std::smatch`,以及`regex_search`和`regex_match`的使用。常见问题包括大小写敏感性、特殊字符转义、贪婪与非贪婪匹配和捕获组。提供的代码示例展示了如何进行匹配、不区分大小写的匹配、特殊字符匹配、贪婪与非贪婪匹配和捕获组的使用。理解并练习正则表达式能提升文本处理效率。
75 0
|
6月前
|
存储 JavaScript API
C++ 正则表达式库 std::basic_regex 中文手册(API说明来自cppreference.com)
C++ 正则表达式库 std::basic_regex 中文手册(API说明来自cppreference.com)
150 0
|
算法 Java
从0到1打造正则表达式执行引擎(一) 正则表达式转NFA (1)
重复匹配(正则表达式中的 ? + *) 正则表达式里有4种表示重复的方式,分别是:
71 1
|
算法 测试技术
从0到1打造正则表达式执行引擎(二) NFA转DFA
然后对DFA的节点0执行步骤1,找到NFA中所有a可达的NFA节点(1#2#4#6#8#9)构成NFA中的节点1,如下图。
122 0
|
设计模式 算法
从0到1打造正则表达式执行引擎(一) 正则表达式转NFA (2)
看完上文之后相信你一直知道如果将一个正则表达式转化为状态机的方法了,这里我们要将理论转化为代码。首先我们要将图转化为代码标识,我用State表示一个节点,其中用了Map<MatchStrategy, List> next表示其后继节点,next中有个key-value就是一条边,MatchStrategy用来描述边的信息。
72 0
|
算法 C++
剑指offer(C++)-JZ19:正则表达式匹配(算法-动态规划)
剑指offer(C++)-JZ19:正则表达式匹配(算法-动态规划)
|
C++ Windows Perl
[笔记]c++基础实践《二》regex正则表达式
[笔记]c++基础实践《二》regex正则表达式