//将正规式转变成NFA
package hjzgg.formal_ceremony_to_dfa;
import java.util.ArrayList;
class Edge{
public int u, v;
public char key;
public Edge(int u, int v, char key) {
super();
this.u = u;
this.v = v;
this.key = key;
}
@Override
public String toString() {
return u + "->" + v + " " + key;
}
@Override
public boolean equals(Object arg0) {
Edge tmp = (Edge)arg0;
return tmp.u==this.u && tmp.v==this.v && tmp.key==this.key;
}
@Override
public int hashCode() {
return u+v+key;
}
}
class NFA{
public static final int MAX_NODE = 100;
private boolean finalState[] = new boolean[MAX_NODE];//记录每一个节点是否为终态
private String formal_ceremony;//正规式字符串
private int cnt_node=1;//记录节点的个数
private Map<Integer, Integer> endNode = new TreeMap<Integer, Integer>();//每一个开始节点对应的终端节点
private ArrayList<Edge> nodeAl = new ArrayList<Edge>();
private Vector<Pair>[] g = new Vector[MAX_NODE];//NFA图
private Set<Character> st = new TreeSet<Character>();//正规式中出现的字符的集合
public NFA(String formal_ceremony) {
super();
this.formal_ceremony = formal_ceremony;
}
private void addEdge(int u, int v, char ch){
nodeAl.add(new Edge(u, v, ch));
if(g[u] == null)
g[u] = new Vector<Pair>();
g[u].add(new Pair(v, ch));
if(ch!='$')
st.add(ch);
}
public boolean kernel_way(int fa, int ld, int rd, boolean isClosure){//fa表示区间的开始点,正规式的区间[ld, rd], isClosure表示这段区间查是否存在闭包
if(ld < 0 || rd >= formal_ceremony.length()){
System.out.println("正规式不正确---发生数组越界!");
return false;
}
int pre_node = fa;
int inBracket = 0;//判断'|'是否在括弧内
for(int i=ld; i<=rd; ++i){
if(formal_ceremony.charAt(i)=='(') ++inBracket;
else if(formal_ceremony.charAt(i)==')') --inBracket;
else if(formal_ceremony.charAt(i)=='|' && 0==inBracket){
if(!kernel_way(fa, ld, i-1, isClosure)) return false;
if(!kernel_way(fa, i+1, rd, isClosure)) return false;
return true;
}
}
for(int i=ld; i<=rd; ++i){
if(formal_ceremony.charAt(i)=='('){//又是一个子区间
//寻找和 该 '('相匹配的')'
int cntLeftBracket = 0;//统计遍历过程中'('出现的次数,遇到')'减去1
int posRightBracket = -1;//记录相匹配的')'的位置
int posLeftBracket = i;
for(int j=i+1; j<=rd; ++j){
if(formal_ceremony.charAt(j)=='(')
++cntLeftBracket;
else if(formal_ceremony.charAt(j)==')'){
if(cntLeftBracket == 0){
posRightBracket = j;
break;
}
--cntLeftBracket;
}
}
if(posRightBracket == -1){//出错
System.out.println("正规式出错----括弧不匹配!");
return false;
}
int nodeFather = 0;//括弧内正则式的开始节点
if(posRightBracket+1 <= rd && formal_ceremony.charAt(posRightBracket+1)=='*'){
i = posRightBracket+1;//过滤掉"()*"
addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空
pre_node = cnt_node;
nodeFather = cnt_node;
addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空
pre_node = cnt_node;
//处理()*括弧内的正规式
if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, true)) return false;
} else {
nodeFather = pre_node;
if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, false))//对于"(101)", 看成101
return false;
i = posRightBracket;
}
} else {//单个字符
if(formal_ceremony.charAt(i)==')') continue;
if(i+1 <= rd && formal_ceremony.charAt(i+1)=='*'){
addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空
pre_node = cnt_node;
addEdge(pre_node, pre_node, formal_ceremony.charAt(i));
if(i+1==rd && isClosure)
addEdge(pre_node, fa, '$');//表示这一条边为空并且是连接到父亲节点
else{
if(endNode.containsKey(fa))
addEdge(pre_node, endNode.get(fa), '$');
else{
addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空
if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点
}
}
pre_node = cnt_node;
++i;//过滤*
} else {
if(i==rd && isClosure){//是闭包的情况
addEdge(pre_node, fa, formal_ceremony.charAt(i));
} else{
if(endNode.containsKey(fa))
addEdge(pre_node, endNode.get(fa), formal_ceremony.charAt(i));
else{
addEdge(pre_node, ++cnt_node, formal_ceremony.charAt(i));
if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点
}
}
pre_node = cnt_node;
}
}
}
return true;
}
private void checkFinalState(){//检查哪一个节点是终态
for(int i=1; i<=cnt_node; ++i){
int cc = 0;
if(g[i] == null){//表明是终态
finalState[i] = true;
continue;
}
for(int j=0; j<g[i].size(); ++j)
if(g[i].elementAt(j).v != i)
++cc;
if(cc == 0)//表明是终态
finalState[i] = true;
}
}
public boolean[] getFinalState(){
return finalState;
}
public Vector<Pair>[] getNFAGraphics(){
if(kernel_way(1, 0, formal_ceremony.length()-1, false)){
// for(Edge e : nodeAl)//打印NFA
// System.out.println(e);
checkFinalState();
return g;
}
return null;
}
public Set<Character> getCharacterSet(){
return st;
}
public void outputNFA(){
if(kernel_way(1, 0, formal_ceremony.length()-1, false)){
checkFinalState();
for(Edge e : nodeAl)
System.out.println(e);
}
}
}
/*
* 将正规式转换成NFA
* */
public class ToNFA {
public static void main(String[] args){
String formal_ceremony = "0*(100*)*0*";
// String formal_ceremony = "1(1010*|1(010)*1)*0";
// String formal_ceremony = "1(0|1)*101";
// String formal_ceremony = "0*1*(010)0*1*";
// String formal_ceremony = "(0|1|2)*";
// String formal_ceremony = "0|1";
// String formal_ceremony = "0|1|2|3";
// String formal_ceremony = "(0|1|6)|(2|3)|(4|5)";
// String formal_ceremony = "(0|1)*|(2|3)*";
// String formal_ceremony = "((10)|(01)*|(0|1))";
NFA nfa = new NFA(formal_ceremony);
nfa.outputNFA();
}
}
//将NFA转变成确定化DFA
package hjzgg.formal_ceremony_to_dfa;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Set;
import java.util.Vector;
class Pair {
public int v;
public char ch;
public Pair(int v, char ch) {
super();
this.v = v;
this.ch = ch;
}
}
class MyHashSet extends HashSet<Integer>{//重写 set 集合的 hashcode()和equals()方法
private int state;
public void setState(int state){
this.state = state;
}
public int getState(){
return state;
}
@Override
public boolean equals(Object arg0) {
MyHashSet tmp = (MyHashSet)arg0;
if(tmp.size() != this.size()) return false;
Iterator<Integer> it = this.iterator();
while(it.hasNext()){
if(!tmp.contains(it.next())) return false;
}
return true;
}
@Override
public int hashCode() {
int sum = 0;
Iterator<Integer> it = this.iterator();
while(it.hasNext())
sum += (((java.lang.Integer)it.next()).intValue());
return sum;
}
}
class DefinedNFA{
private int dfaNode = 0;//defined DFA节点的个数
private boolean[] finalState = null;//表示NFA中哪一个节点是终态
private boolean[] newFinalState = new boolean[NFA.MAX_NODE] ;
private Vector<Pair>[] g = null;//NFA 图
private Set<Edge>edgeSet = new HashSet<Edge>(); //标记图中的边是否被访问过
private MyHashSet st = null; //集合,表示每一个子集状态
private Queue<MyHashSet> queue = new LinkedList<MyHashSet>();//存放要执行的子集状态
private Set<MyHashSet> sst = new HashSet<MyHashSet>();
private Set<Character> characterSet = null;//正规式中的字符的集合
private ArrayList<Edge> nodeAl = new ArrayList<Edge>();//NFA边的集合
public DefinedNFA(Vector<Pair>[] g, Set<Character> characterSet, boolean[] finalState) {
super();
this.g = g;
this.characterSet = characterSet;
this.finalState = finalState;
}
public Set<Character> getCharacterSet(){
return characterSet;
}
public int getDfaNode(){
return dfaNode;
}
public boolean[] getNewFinalState(){
return newFinalState;
}
public ArrayList<Edge> getNodeAl(){
return nodeAl;
}
private void dfs(int u, char ch){
if(g[u]==null) return ;
int len = g[u].size();
for(int i=0; i<len; ++i){
Pair pair = g[u].elementAt(i);
Edge edge = new Edge(u, pair.v, pair.ch);
if(!edgeSet.contains(edge) && pair.ch==ch){
edgeSet.add(edge);
st.add(pair.v);
dfs(pair.v, '$');
}
}
}
public void checkIsFinalState(Set<Integer> st, int state){
Iterator<Integer> it = st.iterator();
while(it.hasNext()){
int val = it.next();
if(finalState[val])
newFinalState[state] = true;
}
}
private void initFirstSet(){
edgeSet.clear();
st = new MyHashSet();
st.add(1);
st.setState(++dfaNode);
dfs(1, '$');
checkIsFinalState(st, dfaNode);
sst.add(st);
queue.add(st);
}
private void addEdge(int u, int v, char ch){
nodeAl.add(new Edge(u, v, ch));
}
public void ToStateMatrix(){
initFirstSet();
while(!queue.isEmpty()){
MyHashSet myset = queue.poll();
for(Character ch : characterSet){
st = new MyHashSet();
for(Integer i : myset){
edgeSet.clear();
dfs(i, ch);
}
if(st.size()>0){
if(!sst.contains(st)){
sst.add(st);
queue.add(st);
st.setState(++dfaNode);
checkIsFinalState(st, dfaNode);
} else {
Iterator<MyHashSet> it = sst.iterator();
while(it.hasNext()){
MyHashSet tmp = it.next();
if(tmp.equals(st)){
st = tmp;
break;
}
}
}
addEdge(myset.getState(), st.getState(), ch);
}
}
}
}
public void outputDFA(){
ToStateMatrix();//有状态转换矩阵得到defined NFA
for(Edge e : nodeAl)
System.out.println(e);
}
}
public class ToDefinedDFA {
public static void main(String[] args) {
// String formal_ceremony = "((10)|(01)*|(0|1))";
// String formal_ceremony = "(0|1|6)|(2|3)|(4|5)";
// String formal_ceremony = "1(0|1)*101";
String formal_ceremony = "0*(100*)*0*";
NFA nfa = new NFA(formal_ceremony);
DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState());
definedDFA.outputDFA();
}
}
//将确定化DFA最小化
package hjzgg.formal_ceremony_to_dfa;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
class MinimumDFA{
private boolean[] newFinalState = null;//由确定化DFA得到
private ArrayList<Edge> nodeAl = null;//由确定化DFA得到
private int dfaNode;//确定化DFA节点的个数
private Set<Character> characterSet = null;//正规式中的字符的集合
private ArrayList<Set<Integer>> setList = new ArrayList<Set<Integer>>();
public MinimumDFA(boolean[] newFinalState, ArrayList<Edge> nodeAl, int dfaNode, Set<Character> characterSet) {
super();
this.newFinalState = newFinalState;
this.nodeAl = nodeAl;
this.dfaNode = dfaNode;
this.characterSet = characterSet;
}
private void init(){//利用分割法将集合分成终态和非终态
Set<Integer> finalStateSet = new HashSet<Integer>();
Set<Integer> NofinalStateSet = new HashSet<Integer>();
for(int i=1; i<=dfaNode; ++i)
if(newFinalState[i])//终态
finalStateSet.add(i);
else
NofinalStateSet.add(i);
setList.add(finalStateSet);
setList.add(NofinalStateSet);
}
public void toMinimumDfa(){
init();
boolean flag = true;
ArrayList<Set<Integer>> tmpSetList = new ArrayList<Set<Integer>>();
while(flag){
flag = false;
hjzgg:
for(int k=0; k<setList.size(); ++k){
Set<Integer> st = setList.get(k);
if(st.size()<=1) continue;
for(Character ch : characterSet){
Map<Integer, Integer> mp = new HashMap<Integer, Integer>();
for(int i=0; i<nodeAl.size(); ++i){//st集合(也就是map的val值)在 ch这个点对应的集合 {st}a = {...}
Edge edge = nodeAl.get(i);
if(edge.key == ch && st.contains(edge.u))
mp.put(edge.u, edge.v);
}
for(Integer i : st)
if(!mp.containsKey(i))//表明i节点对应的是一条空边
mp.put(i, -1);
//将st集合拆分成两个不想交的集合
Set<Integer> firstSet = new HashSet<Integer>();
Set<Integer> secondSet = new HashSet<Integer>();
for(int j=0; j<setList.size(); ++j){
firstSet.clear();
secondSet.clear();
Set<Integer> tmpSt = setList.get(k);
for(Entry<Integer, Integer> entry : mp.entrySet()){//返回此映射中包含的映射关系的 set 视图。返回的 set 中的每个元素都是一个 Map.Entry
if(tmpSt.contains(entry.getValue()))
firstSet.add(entry.getKey());
else secondSet.add(entry.getKey());
}
if(firstSet.size()!=0 && secondSet.size()!=0){
flag = true;//如果发现可以拆分的集合,则继续最顶层的while循环
for(Integer i : tmpSt){//将firstSet 和 secondSet中都没有的元素添加到firstSet中
if(!firstSet.contains(i) && !secondSet.contains(i))
firstSet.add(i);
}
setList.remove(k);
setList.add(firstSet);
setList.add(secondSet);
break hjzgg;
}
}
}
}
}
// for(int k=0; k<setList.size(); ++k)//输出最终的集合划分
// System.out.println(setList.get(k));
// System.out.println("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&");
for(int k=0; k<setList.size(); ++k){
Set<Integer> st = setList.get(k);
if(st.size() > 1){//看成是一个等价的状态,选择第一个元素当作代表
int first=0;
for(Integer i : st){//取得第一个元素
first = i;
break;
}
ArrayList<Edge> tmpList = new ArrayList<Edge>();
for(int i=0; i<nodeAl.size(); ++i){//遍历所有的边,找到不是first
Edge edge = nodeAl.get(i);
if(st.contains(edge.u) && edge.u!=first){
nodeAl.remove(i);
--i;
} else if(st.contains(edge.v) && edge.v!=first){
nodeAl.remove(i);
--i;
tmpList.add(new Edge(edge.u, first, edge.key));
}
}
nodeAl.addAll(tmpList);
}
}
}
public void outputMinimumDFA(){
// for(int i=0; i<nodeAl.size(); ++i)//输出未确定化的DFA
// System.out.println(nodeAl.get(i));
toMinimumDfa();
for(int i=0; i<nodeAl.size(); ++i)
System.out.println(nodeAl.get(i));
}
}
public class ToMinimumDFA {
public static void main(String[] args) {
// String formal_ceremony = "1(0|1)*101";
String formal_ceremony = "0*(100*)*0*";
NFA nfa = new NFA(formal_ceremony);
DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState());
definedDFA.ToStateMatrix();
MinimumDFA minimumDFA = new MinimumDFA(definedDFA.getNewFinalState(), definedDFA.getNodeAl(), definedDFA.getDfaNode(), definedDFA.getCharacterSet());
minimumDFA.outputMinimumDFA();
}
}