[C++] [正则表达式] 一个不错的正则表达式引擎4C++

简介:
// RegEx.cpp : 定义控制台应用程序的入口点。
//
// 本文所使用的“正则表达式解析引擎”来自
//  http://www.regexlab.com/deelx/
// 
#include "stdafx.h"
#include "Regex.h"
/**//// ===========================================================================
/// 合法的IP
/// A.B.C.D
/// A/B/C/D 为[0-255]间的整数
/// 本规则忽略数字前导0,即 00192 == 192
/// ---------------------------------------------------------------------------
int is_ip( const char* str )
...{
    static CRegexpT< char > regexp( "0*(1\d{0,2}|2([0-4]\d?|5[0-5]?|\d?)|[3-9]\d?|0)(.0*(1\d{0,2}|2([0-4]\d?|5[0-5]?|\d?)|[3-9]\d?|0)){3}" );
    MatchResult mrRet = regexp.MatchExact( str );
    return mrRet.IsMatched();
}
int is_email( const char* str )
...{
    static CRegexpT< char > regexp( "^\w+(\.\w+)*@\w+(\.\w+)+$" );
    MatchResult mrRet = regexp.MatchExact( str );
    return mrRet.IsMatched();
}
int _tmain(int argc, _TCHAR* argv[])
...{
    char* ip1 = "192.168.58.251";
    char* ip2 = "256.168.58.251";
    printf( "%s ==> %s " , ip1 , (is_ip(ip1)?"yes":"no") );
    printf( "%s ==> %s " , ip2 , (is_ip(ip2)?"yes":"no") );
    char* m1 = " hjbcn@126.com";
    char* m2 = "hjbcn#126.com";
    printf( "%s ==> %s " , m1 , (is_email(m1)?"yes":"no") );
    printf( "%s ==> %s " , m2 , (is_email(m2)?"yes":"no") );
    getchar();
    return 0;
}
// RegEx.h
//
// DEELX Regular Expression Engine (v1.2)
//
// Copyright 2006 (c) RegExLab.com
// All Rights Reserved.
//
//  http://www.regexlab.com/deelx/
//
// Author: 史寿伟 (sswater shi)
//  sswater@gmail.com
//
// $Revision: 1.1.2.27 $
//
#ifndef __DEELX_REGEXP__H__
#define __DEELX_REGEXP__H__
#include <memory.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>
//
// Data Reference
//
template <class ELT> class CBufferRefT
...{
public:
    CBufferRefT(const ELT * pcsz, int length);
    CBufferRefT(const ELT * pcsz);
public:
    int nCompare      (const ELT * pcsz) const;
    int nCompareNoCase(const ELT * pcsz) const;
    int  Compare      (const ELT * pcsz) const;
    int  CompareNoCase(const ELT * pcsz) const;
    int  Compare      (const CBufferRefT <ELT> &) const;
    int  CompareNoCase(const CBufferRefT <ELT> &) const;
    ELT At          (int nIndex, ELT def = 0) const;
    ELT operator [] (int nIndex) const;
    const ELT * GetBuffer() const;
    int GetSize() const;
public:
    virtual ~CBufferRefT();
// Content
protected:
    const ELT * m_pRef;
    int         m_nSize;
};
//
// Implemenation
//
template <class ELT> CBufferRefT <ELT> :: CBufferRefT(const ELT * pcsz, int length)
...{
    m_pRef  = pcsz;
    m_nSize = length;
}
template <class ELT> CBufferRefT <ELT> :: CBufferRefT(const ELT * pcsz)
...{
    m_pRef  = pcsz;
    m_nSize = 0;
    if(pcsz != 0) while(m_pRef[m_nSize] != 0) m_nSize ++;
}
template <class ELT> int CBufferRefT <ELT> :: nCompare(const ELT * pcsz) const
...{
    for(int i=0; i<m_nSize; i++)
    ...{
        if(m_pRef[i] != pcsz[i])
            return m_pRef[i] - pcsz[i];
    }
    return 0;
}
template <class ELT> int CBufferRefT <ELT> :: nCompareNoCase(const ELT * pcsz) const
...{
    for(int i=0; i<m_nSize; i++)
    ...{
        if(m_pRef[i] != pcsz[i])
        ...{
            if(toupper((int)m_pRef[i]) != toupper((int)pcsz[i]))
                return m_pRef[i] - pcsz[i];
        }
    }
    return 0;
}
template <class ELT> inline int CBufferRefT <ELT> :: Compare(const ELT * pcsz) const
...{
    return nCompare(pcsz) ? 1 : (int)pcsz[m_nSize];
}
template <class ELT> inline int CBufferRefT <ELT> :: CompareNoCase(const ELT * pcsz) const
...{
    return nCompareNoCase(pcsz) ? 1 : (int)pcsz[m_nSize];
}
template <class ELT> inline int CBufferRefT <ELT> :: Compare(const CBufferRefT <ELT> & cref) const
...{
    return m_nSize == cref.m_nSize ? nCompare(cref.GetBuffer()) : 1;
}
template <class ELT> inline int CBufferRefT <ELT> :: CompareNoCase(const CBufferRefT <ELT> & cref) const
...{
    return m_nSize == cref.m_nSize ? nCompareNoCase(cref.GetBuffer()) : 1;
}
template <class ELT> inline ELT CBufferRefT <ELT> :: At(int nIndex, ELT def) const
...{
    return nIndex >= m_nSize ? def : m_pRef[nIndex];
}
template <class ELT> inline ELT CBufferRefT <ELT> :: operator [] (int nIndex) const
...{
    return nIndex >= m_nSize ? 0 : m_pRef[nIndex];
}
template <class ELT> const ELT * CBufferRefT <ELT> :: GetBuffer() const
...{
    static const ELT _def[] = ...{0}; return m_pRef ? m_pRef : _def;
}
template <class ELT> inline int CBufferRefT <ELT> :: GetSize() const
...{
    return m_nSize;
}
template <class ELT> CBufferRefT <ELT> :: ~CBufferRefT()
...{
}
//
// Data Buffer
//
template <class ELT> class CBufferT : public CBufferRefT <ELT>
...{
public:
    CBufferT(const ELT * pcsz, int length);
    CBufferT(const ELT * pcsz);
    CBufferT();
public:
    ELT & operator [] (int nIndex);
    const ELT & operator [] (int nIndex) const;
    void  Append(const ELT * pcsz, int length, int eol = 0);
    void  Append(ELT el, int eol = 0);
public:
    void  Push(ELT   el);
    int   Pop (ELT & el);
    int   Peek(ELT & el) const;
public:
    const ELT * GetBuffer() const;
    ELT * GetBuffer();
    ELT * Detach();
    void  Release();
    void  Prepare(int index, int fill = 0);
    void  Restore(int size);
public:
    virtual ~CBufferT();
// Content
protected:
    ELT * m_pBuffer;
    int   m_nMaxLength;
};
//
// Implemenation
//
template <class ELT> CBufferT <ELT> :: CBufferT(const ELT * pcsz, int length) : CBufferRefT <ELT> (0, length)
...{
    m_nMaxLength = CBufferRefT <ELT> :: m_nSize + 1;
    CBufferRefT <ELT> :: m_pRef = m_pBuffer = new ELT[m_nMaxLength];
    memcpy(m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
    m_pBuffer[CBufferRefT <ELT> :: m_nSize] = 0;
}
template <class ELT> CBufferT <ELT> :: CBufferT(const ELT * pcsz) : CBufferRefT <ELT> (pcsz)
...{
    m_nMaxLength = CBufferRefT <ELT> :: m_nSize + 1;
    CBufferRefT <ELT> :: m_pRef = m_pBuffer = new ELT[m_nMaxLength];
    memcpy(m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
    m_pBuffer[CBufferRefT <ELT> :: m_nSize] = 0;
}
template <class ELT> CBufferT <ELT> :: CBufferT() : CBufferRefT <ELT> (0, 0)
...{
    m_nMaxLength = 0;
    m_pBuffer    = 0;
}
template <class ELT> inline ELT & CBufferT <ELT> :: operator [] (int nIndex)
...{
    return m_pBuffer[nIndex];
}
template <class ELT> inline const ELT & CBufferT <ELT> :: operator [] (int nIndex) const
...{
    return m_pBuffer[nIndex];
}
template <class ELT> void CBufferT <ELT> :: Append(const ELT * pcsz, int length, int eol)
...{
    int nNewLength = m_nMaxLength;
    // Check length
    if(nNewLength < 8)
        nNewLength = 8;
    if(CBufferRefT <ELT> :: m_nSize + length + eol > nNewLength)
        nNewLength *= 2;
    if(CBufferRefT <ELT> :: m_nSize + length + eol > nNewLength)
    ...{
        nNewLength  = CBufferRefT <ELT> :: m_nSize + length + eol + 11;
        nNewLength -= nNewLength % 8;
    }
    // Realloc
    if(nNewLength > m_nMaxLength)
    ...{
        ELT * pNewBuffer = new ELT[nNewLength];
        if(m_pBuffer != 0)
        ...{
            memcpy(pNewBuffer, m_pBuffer, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
            delete [] m_pBuffer;
        }
        CBufferRefT <ELT> :: m_pRef = m_pBuffer = pNewBuffer;
        m_nMaxLength = nNewLength;
    }
    // Append
    memcpy(m_pBuffer + CBufferRefT <ELT> :: m_nSize, pcsz, sizeof(ELT) * length);
    CBufferRefT <ELT> :: m_nSize += length;
    if(eol > 0) m_pBuffer[CBufferRefT <ELT> :: m_nSize] = 0;
}
template <class ELT> inline void CBufferT <ELT> :: Append(ELT el, int eol)
...{
    Append(&el, 1, eol);
}
template <class ELT> void CBufferT <ELT> :: Push(ELT el)
...{
    // Realloc
    if(CBufferRefT <ELT> :: m_nSize >= m_nMaxLength)
    ...{
        int nNewLength = m_nMaxLength * 2;
        if( nNewLength < 8 ) nNewLength = 8;
        ELT * pNewBuffer = new ELT[nNewLength];
        if(m_pBuffer != 0)
        ...{
            memcpy(pNewBuffer, m_pBuffer, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
            delete [] m_pBuffer;
        }
        CBufferRefT <ELT> :: m_pRef = m_pBuffer = pNewBuffer;
        m_nMaxLength = nNewLength;
    }
    // Append
    m_pBuffer[CBufferRefT <ELT> :: m_nSize++] = el;
}
template <class ELT> inline int CBufferT <ELT> :: Pop(ELT & el)
...{
    if(CBufferRefT <ELT> :: m_nSize > 0)
    ...{
        el = m_pBuffer[--CBufferRefT <ELT> :: m_nSize];
        return 1;
    }
    else
    ...{
        return 0;
    }
}
template <class ELT> inline int CBufferT <ELT> :: Peek(ELT & el) const
...{
    if(CBufferRefT <ELT> :: m_nSize > 0)
    ...{
        el = m_pBuffer[CBufferRefT <ELT> :: m_nSize - 1];
        return 1;
    }
    else
    ...{
        return 0;
    }
}
template <class ELT> const ELT * CBufferT <ELT> :: GetBuffer() const
...{
    static const ELT _def[] = ...{0}; return m_pBuffer ? m_pBuffer : _def;
}
template <class ELT> ELT * CBufferT <ELT> :: GetBuffer()
...{
    static const ELT _def[] = ...{0}; return m_pBuffer ? m_pBuffer : (ELT *)_def;
}
template <class ELT> ELT * CBufferT <ELT> :: Detach()
...{
    ELT * pBuffer = m_pBuffer;
    CBufferRefT <ELT> :: m_pRef  = m_pBuffer    = 0;
    CBufferRefT <ELT> :: m_nSize = m_nMaxLength = 0;
    return pBuffer;
}
template <class ELT> void CBufferT <ELT> :: Release()
...{
    ELT * pBuffer = Detach();
    if(pBuffer != 0) delete [] pBuffer;
}
template <class ELT> void CBufferT <ELT> :: Prepare(int index, int fill)
...{
    int nNewSize = index + 1;
    // Realloc
    if(nNewSize > m_nMaxLength)
    ...{
        int nNewLength = m_nMaxLength;
        if( nNewLength < 8 )
            nNewLength = 8;
        if( nNewSize > nNewLength )
            nNewLength *= 2;
        if( nNewSize > nNewLength )
        ...{
            nNewLength  = nNewSize + 11;
            nNewLength -= nNewLength % 8;
        }
        ELT * pNewBuffer = new ELT[nNewLength];
        if(m_pBuffer != 0)
        ...{
            memcpy(pNewBuffer, m_pBuffer, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
            delete [] m_pBuffer;
        }
        CBufferRefT <ELT> :: m_pRef = m_pBuffer = pNewBuffer;
        m_nMaxLength = nNewLength;
    }
    // size
    if( CBufferRefT <ELT> :: m_nSize < nNewSize )
    ...{
        memset(m_pBuffer + CBufferRefT <ELT> :: m_nSize, fill, sizeof(ELT) * (nNewSize - CBufferRefT <ELT> :: m_nSize));
        CBufferRefT <ELT> :: m_nSize = nNewSize;
    }
}
template <class ELT> inline void CBufferT <ELT> :: Restore(int size)
...{
    CBufferRefT <ELT> :: m_nSize = size;
}
template <class ELT> CBufferT <ELT> :: ~CBufferT()
...{
    if(m_pBuffer != 0) delete [] m_pBuffer;
}
//
// Context
//
class CContext
...{
public:
    CBufferT <int> m_stack;
    CBufferT <int> m_capturestack, m_captureindex;
public:
    int    m_nCurrentPos;
    int    m_nBeginPos;
    int    m_nLastBeginPos;
    int    m_nParenZindex;
    void * m_pMatchString;
    int    m_pMatchStringLength;
};
//
// Interface
//
class ElxInterface
...{
public:
    virtual int Match    (CContext * pContext) const = 0;
    virtual int MatchNext(CContext * pContext) const = 0;
public:
    virtual ~ElxInterface() ...{};
};
//
// Alternative
//
template <int x> class CAlternativeElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CAlternativeElxT();
public:
    CBufferT <ElxInterface *> m_elxlist;
};
typedef CAlternativeElxT <0> CAlternativeElx;
//
// Assert
//
template <int x> class CAssertElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CAssertElxT(ElxInterface * pelx, int byes = 1);
public:
    ElxInterface * m_pelx;
    int m_byes;
};
typedef CAssertElxT <0> CAssertElx;
//
// Back reference elx
//
template <class CHART> class CBackrefElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CBackrefElxT(int nnumber, int brightleft, int bignorecase);
public:
    int m_nnumber;
    int m_brightleft;
    int m_bignorecase;
    CBufferT <CHART> m_szNamed;
};
//
// Implementation
//
template <class CHART> CBackrefElxT <CHART> :: CBackrefElxT(int nnumber, int brightleft, int bignorecase)
...{
    m_nnumber     = nnumber;
    m_brightleft  = brightleft;
    m_bignorecase = bignorecase;
}
template <class CHART> int CBackrefElxT <CHART> :: Match(CContext * pContext) const
...{
    // check number, for named
    if( m_nnumber < 0 || m_nnumber >= pContext->m_captureindex.GetSize() ) return 0;
    int index = pContext->m_captureindex[m_nnumber];
    if( index < 0 ) return 0;
    // check enclosed
    int pos1 = pContext->m_capturestack[index + 1];
    int pos2 = pContext->m_capturestack[index + 2];
    if( pos2 < 0 ) pos2 = pContext->m_nCurrentPos;
    // info
    int lpos = pos1 < pos2 ? pos1 : pos2;
    int rpos = pos1 < pos2 ? pos2 : pos1;
    int slen = rpos - lpos;
    const CHART * pcsz = (const CHART *)pContext->m_pMatchString;
    int npos = pContext->m_nCurrentPos;
    int tlen = pContext->m_pMatchStringLength;
    // compare
    int bsucc;
    CBufferRefT <CHART> refstr(pcsz + lpos, slen);
    if( m_brightleft )
    ...{
        if(npos < slen)
            return 0;
        if(m_bignorecase)
            bsucc = ! refstr.nCompareNoCase(pcsz + (npos - slen));
        else
            bsucc = ! refstr.nCompare      (pcsz + (npos - slen));
        if( bsucc )
        ...{
            pContext->m_stack.Push(npos);
            pContext->m_nCurrentPos -= slen;
        }
    }
    else
    ...{
        if(npos + slen > tlen)
            return 0;
        if(m_bignorecase)
            bsucc = ! refstr.nCompareNoCase(pcsz + npos);
        else
            bsucc = ! refstr.nCompare      (pcsz + npos);
        if( bsucc )
        ...{
            pContext->m_stack.Push(npos);
            pContext->m_nCurrentPos += slen;
        }
    }
    return bsucc;
}
template <class CHART> int CBackrefElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    int npos = 0;
    pContext->m_stack.Pop(npos);
    pContext->m_nCurrentPos = npos;
    return 0;
}
// RCHART
#ifndef RCHART
    #define RCHART(ch) ((CHART)ch)
#endif
// BOUNDARY_TYPE
enum BOUNDARY_TYPE
...{
    BOUNDARY_FILE_BEGIN, // begin of whole text
    BOUNDARY_FILE_END  , // end of whole text
    BOUNDARY_LINE_BEGIN, // begin of line
    BOUNDARY_LINE_END  , // end of line
    BOUNDARY_WORD_BEGIN, // begin of word
    BOUNDARY_WORD_END  , // end of word
    BOUNDARY_WORD_EDGE ,
};
//
// Boundary Elx
//
template <class CHART> class CBoundaryElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CBoundaryElxT(int ntype, int byes = 1);
protected:
    static int IsWordChar(CHART ch);
public:
    int m_ntype;
    int m_byes;
};


本文转自jazka 51CTO博客,原文链接:http://blog.51cto.com/jazka/228014,如需转载请自行联系原作者
相关文章
|
6月前
|
算法 测试技术 C#
【动态规划】【字符串】C++算法:正则表达式匹配
【动态规划】【字符串】C++算法:正则表达式匹配
|
4月前
|
存储 C++ 容器
C++一分钟之-正则表达式库(regex)
【7月更文挑战第7天】C++从C++11开始支持正则表达式,通过`&lt;regex&gt;`库提供功能。本文涵盖基本概念如`std::regex`、`std::smatch`,以及`regex_search`和`regex_match`的使用。常见问题包括大小写敏感性、特殊字符转义、贪婪与非贪婪匹配和捕获组。提供的代码示例展示了如何进行匹配、不区分大小写的匹配、特殊字符匹配、贪婪与非贪婪匹配和捕获组的使用。理解并练习正则表达式能提升文本处理效率。
75 0
|
6月前
|
存储 JavaScript API
C++ 正则表达式库 std::basic_regex 中文手册(API说明来自cppreference.com)
C++ 正则表达式库 std::basic_regex 中文手册(API说明来自cppreference.com)
150 0
|
算法 Java
从0到1打造正则表达式执行引擎(一) 正则表达式转NFA (1)
重复匹配(正则表达式中的 ? + *) 正则表达式里有4种表示重复的方式,分别是:
71 1
|
算法 测试技术
从0到1打造正则表达式执行引擎(二) NFA转DFA
然后对DFA的节点0执行步骤1,找到NFA中所有a可达的NFA节点(1#2#4#6#8#9)构成NFA中的节点1,如下图。
122 0
|
设计模式 算法
从0到1打造正则表达式执行引擎(一) 正则表达式转NFA (2)
看完上文之后相信你一直知道如果将一个正则表达式转化为状态机的方法了,这里我们要将理论转化为代码。首先我们要将图转化为代码标识,我用State表示一个节点,其中用了Map<MatchStrategy, List> next表示其后继节点,next中有个key-value就是一条边,MatchStrategy用来描述边的信息。
72 0
|
算法 C++
剑指offer(C++)-JZ19:正则表达式匹配(算法-动态规划)
剑指offer(C++)-JZ19:正则表达式匹配(算法-动态规划)
|
C++ Windows Perl
[笔记]c++基础实践《二》regex正则表达式
[笔记]c++基础实践《二》regex正则表达式