正則表達式轉換NFA --java代碼實現

從網上搜了一篇正則表達式轉換NFA的代碼實現,連接:
https://blog.51cto.com/siwanghu/1705664
使用了 (ab|c) * abb這個正則表達式驗證了一下,使用McMaughton-Yamada-Thompson算法進行轉換,結果以下
在這裏插入圖片描述
但代碼跑出來結果以下:
在這裏插入圖片描述
將其轉換爲狀態轉換圖:
在這裏插入圖片描述java

發現代碼的結果沒法表示 正則表達式 – abb ,也就是說代碼對於閉包*這處理有問題,沒有考慮閉包能夠爲 ε 的狀況,仔細看了下代碼:
而後我
開始想得比較簡單,直接將node

Edge edge1 = new Edge(endNode, begNode, "epsilon");

改成了web

Edge edge1 = new Edge(begNode, endNode, "epsilon");

覺得是nfa閉包的兩個節點反了,跑起來發現結果以下:
在這裏插入圖片描述
可是發現這樣少一個4–>3的空(ε)驅動.
最後對比了一下McMaughton-Yamada-Thompson算法中的概括規則:
在這裏插入圖片描述
發現少了一條邊,這條邊對應到代碼裏邊,就是少了這樣一條邊:正則表達式

Edge edge4 = new Edge(graph.getEnd(), graph.getStart(), "epsilon");

(注意在這裏插入圖片描述這裏也要相應加上)
這樣一改以後運行結果就對了!
在這裏插入圖片描述
化成狀態轉換圖爲:
在這裏插入圖片描述算法

代碼以下(相較於原版,加了註釋,並改了上述內容):
Node類:閉包

package regextoNFA;

/** * 狀態結點類 */
public class Node { 
 
  
	private int id;
	private static int ID=0;
	
	public Node() { 
 
  
		this.id = ID++;
	}
	
	public int getId() { 
 
  
		return id;
	}
	
	public static void reset(){ 
 
  
		ID=0;
	}
	
	@Override
	public String toString() { 
 
  
		return id+"";
	}
 
}

Edge類:ide

package regextoNFA;

/** * 鏈接兩狀態結點的邊 * 標籤label 即 驅動字符 其中 epsilon 即 ε */
public class Edge { 
 
  
	private Node begin;
	private Node end;
	private String label;
	private Edge edge;
	
	public Edge() { 
 
  
		super();
	}
 
	public Edge(Node begin, Node end) { 
 
  
		super();
		this.begin = begin;
		this.end = end;
	}
 
	public Edge(Node begin, Node end, String label) { 
 
  
		super();
		this.begin = begin;
		this.end = end;
		this.label = label;
	}
 
	public String getLabel() { 
 
  
		return label;
	}
 
	public void setLabel(String label) { 
 
  
		this.label = label;
	}
 
	public Edge getEdge() { 
 
  
		return edge;
	}
 
	public void setEdge(Edge edge) { 
 
  
		this.edge = edge;
	}
	
	public Node getBegin() { 
 
  
		return begin;
	}
 
	public void setBegin(Node begin) { 
 
  
		this.begin = begin;
	}
 
	public Node getEnd() { 
 
  
		return end;
	}
 
	public void setEnd(Node end) { 
 
  
		this.end = end;
	}
 
	@Override
	public String toString() { 
 
  
		return "Edge [begin="+begin+", end="+end+", label="+label+"]";
	}
	
}

Regex類:svg

package regextoNFA;

import java.util.Stack;

public class Regex { 
 
  
	private String regex = "";
	private Stack operatorStack;
	private Stack operandStack;
	private int[][] priority = { 
 
  
			{ 
 
   1, 1, 1, -1, 1, 1 }, // *&|()#
			{ 
 
   -1, 1, 1, -1, 1, 1 }, { 
 
   -1, -1, 1, -1, 1, 1 },
			{ 
 
   -1, -1, -1, -1, 0, 2 }, { 
 
   1, 1, 1, 1, 1, 1 },
			{ 
 
   -1, -1, -1, -1, -1, -1 } };
 
	public Regex() { 
 
  
		regex = "";
		operatorStack = new Stack();
		operandStack = new Stack();
	}
 
	public Regex(String _regex) { 
 
  
		regex = "";
		operatorStack = new Stack();
		operandStack = new Stack();
		prepareString(_regex);
	}
 
	/** * 核心轉換代碼 * stack 記錄 NFA 片斷 * 依次讀取表達式的每一個字符 ch * 若是 ch 是運算符,從 stack 出棧所需數目的 NFA 片斷,構建新的 NFA 片斷後入棧 stack * 若是 ch 是普通字符,建立新的狀態,並構建只包含此狀態的 NFA 片斷入棧 stack * 返回 stack 棧頂的 NFA 片斷,即最終結果 */
	public Graph transformNFA() { 
 
  
		if (regex.length() == 0)
			return null;
		else { 
 
  
			int i = 0;
			operatorStack.push('#');
			char[] _regex = (regex + "#").toCharArray();
			while (_regex[i] != '#'
					|| (Character) (operatorStack.peek()) != '#') { 
 
  
				if (!isOperator(_regex[i])) { 
 
  
					operandStack.push((Character)_regex[i]);
					i++;
				} else { 
 
  
					int value=priorityOperator((Character)(operatorStack.peek()), _regex[i]);
					switch (value) { 
 
  
					case 1:
						Character character=(Character)operatorStack.pop();
						switch (character) { 
 
  
						case '*':
							Object obj=operandStack.pop();
							Graph graph1=new Graph();
							graph1.Star(obj);
							operandStack.push(graph1);
							break;
						case '&':
							Object obj2=operandStack.pop();
							Object obj1=operandStack.pop();
							Graph graph2=new Graph();
							graph2.Concat(obj1, obj2);
							operandStack.push(graph2);
							break;
						case '|':
							Object obj4=operandStack.pop();
							Object obj3=operandStack.pop();
							Graph graph3=new Graph();
							graph3.Union(obj3, obj4);
							operandStack.push(graph3);
							break;
						default:
							break;
						}
						break;
					case 0:
						operatorStack.pop();
						i++;
						break;
					case -1:
						operatorStack.push(_regex[i]);
						i++;
						break;
					default:
						break;
					}
				}
			}
			return (Graph) operandStack.pop();
		}
	}
	
	public void reset(){ 
 
  
		Node.reset();
		operandStack.clear();
		operatorStack.clear();
	}
 
	public String getRegex() { 
 
  
		return regex;
	}
 
	public void setRegex(String _regex) { 
 
  
		prepareString(_regex);
	}
 
	private boolean isOperator(Character character) { 
 
  
		String operatorString = "*&|()#";
		if (operatorString.contains(character.toString())) { 
 
  
			return true;
		} else { 
 
  
			return false;
		}
	}
 
	private int priorityOperator(Character character1, Character character2) { 
 
  
		String priorityString = "*&|()#";
		return this.priority[priorityString.indexOf(character1.toString())][priorityString
				.indexOf(character2.toString())];
	}
 
	/** * 在構建 NFA 以前,須要對正則表達式進行處理,以 (a|b)*abb 爲例,在正則表達式裏是沒有鏈接符號的,這時就須要添加鏈接符 * 對當前字符類型進行判斷,並對前一個字符進行判斷,最終獲得添加鏈接符以後的字符串 * 如 (a|b)*abb 添加完則爲 (a|b)*&a&b&b */
	private void prepareString(String _regex) { 
 
  
		char[] regexs = _regex.replaceAll(" ", "").toCharArray();
		for (int i = 0; i < regexs.length; i++) { 
 
  
			if (i == 0)
				regex += regexs[i];
			else { 
 
  
				if (regexs[i] == '|' || regexs[i] == '*' || regexs[i] == ')') { 
 
  
					regex += regexs[i];
				} else { 
 
  
					if (regexs[i - 1] == '(' || regexs[i - 1] == '|')
						regex += regexs[i];
					else
						regex += ("&" + regexs[i]);
				}
			}
		}
	}
 
}

Graph類:函數

package regextoNFA;

import java.util.ArrayList;
import java.util.List;
 
public class Graph { 
 
  
	private List<Edge> edges;
	private Node start;
	private Node end;
 
	public Graph() { 
 
  
		edges = new ArrayList<Edge>();
	}
 
	public List<Edge> getEdges() { 
 
  
		return edges;
	}
 
	public Node getStart() { 
 
  
		return start;
	}
 
	public void setStart(Node start) { 
 
  
		this.start = start;
	}
 
	public Node getEnd() { 
 
  
		return end;
	}
 
	public void setEnd(Node end) { 
 
  
		this.end = end;
	}
 
	public void reset() { 
 
  
		Node.reset();
	}
 
	/** * 根據操做符和操做對象來進行鏈接、聯合、閉包處理 * 由參數來判斷調用構建NFA的函數 * 處理a*時會調用處理單字符閉包的函數,對應的也有處理NFA閉包的函數 */
	public void Star(Object obj) { 
 
  
		if (obj.getClass().getName().equals(Graph.class.getName())) { 
 
  
			addStar((Graph) obj);
			return;
		}
		if (obj.getClass().getName().equals(Character.class.getName())) { 
 
  
			addStar((Character) obj);
			return;
		} else { 
 
  
			throw new RuntimeException("You have an error in your Regex syntax");
		}
	}
 
	public void Union(Object obj1, Object obj2) { 
 
  
		if (obj1.getClass().getName().equals(Character.class.getName())) { 
 
  
			if (obj2.getClass().getName().equals(Graph.class.getName())) { 
 
  
				addUnion((Character) obj1, (Graph) obj2);
				return;
			}
			if (obj2.getClass().getName().equals(Character.class.getName())) { 
 
  
				addUnion((Character) obj1, (Character) obj2);
				return;
			}
		}
		if (obj1.getClass().getName().equals(Graph.class.getName())) { 
 
  
			if (obj2.getClass().getName().equals(Graph.class.getName())) { 
 
  
				addUnion((Graph) obj1, (Graph) obj2);
				return;
			}
			if (obj2.getClass().getName().equals(Character.class.getName())) { 
 
  
				addUnion((Graph) obj1, (Character) obj2);
				return;
			}
		} else { 
 
  
			throw new RuntimeException("You have an error in your Regex syntax");
		}
	}
 
	public void Concat(Object obj1, Object obj2) { 
 
  
		if (obj1.getClass().getName().equals(Character.class.getName())) { 
 
  
			if (obj2.getClass().getName().equals(Graph.class.getName())) { 
 
  
				addConcat((Character) obj1, (Graph) obj2);
				return;
			}
			if (obj2.getClass().getName().equals(Character.class.getName())) { 
 
  
				addConcat((Character) obj1, (Character) obj2);
				return;
			}
		}
		if (obj1.getClass().getName().equals(Graph.class.getName())) { 
 
  
			if (obj2.getClass().getName().equals(Graph.class.getName())) { 
 
  
				addConcat((Graph) obj1, (Graph) obj2);
				return;
			}
			if (obj2.getClass().getName().equals(Character.class.getName())) { 
 
  
				addConcat((Graph) obj1, (Character) obj2);
				return;
			}
		} else { 
 
  
			throw new RuntimeException("You have an error in your Regex syntax");
		}
	}
 
	/** * 處理NFA閉包 */
	public void addStar(Graph graph) { 
 
  
		Node begNode = new Node();
		Node endNode = new Node();
		Edge edge1 = new Edge(begNode, endNode, "epsilon");
		Edge edge2 = new Edge(begNode, graph.getStart(), "epsilon");
		Edge edge3 = new Edge(graph.getEnd(), endNode, "epsilon");
		/** * 改動的地方 */
		Edge edge4 = new Edge(graph.getEnd(), graph.getStart(), "epsilon");
		for (int i = 0; i < graph.getEdges().size(); i++) { 
 
  
			this.edges.add(graph.getEdges().get(i));
		}
		this.edges.add(edge1);
		this.edges.add(edge2);
		this.edges.add(edge3);
		/** * 改動的地方 */
		this.edges.add(edge4);
		this.start = begNode;
		this.end = endNode;
	}
 
	/** * 處理單字符閉包 */
	public void addStar(Character character) { 
 
  
		Node nodeCenter = new Node();
		Node nodebeg = new Node();
		Node nodeend = new Node();
		Edge edgeLink = new Edge(nodeCenter, nodeCenter, character.toString());
		Edge edgeEpsilonBeg = new Edge(nodebeg, nodeCenter, "epsilon");
		Edge edgeepsilonEnd = new Edge(nodeCenter, nodeend, "epsilon");
		this.edges.add(edgeLink);
		this.edges.add(edgeEpsilonBeg);
		this.edges.add(edgeepsilonEnd);
		this.start = nodebeg;
		this.end = nodeend;
	}
 
	public void addUnion(Character character, Graph graph) { 
 
  
		Node begNode = new Node();
		Node endNode = new Node();
		Edge edge1 = new Edge(begNode, graph.getStart(), "epsilon");
		Edge edge2 = new Edge(graph.getEnd(), endNode, "epsilon");
		Edge edge3 = new Edge(begNode, endNode, character.toString());
		for (int i = 0; i < graph.getEdges().size(); i++) { 
 
  
			this.edges.add(graph.getEdges().get(i));
		}
		this.edges.add(edge1);
		this.edges.add(edge2);
		this.edges.add(edge3);
		this.start = begNode;
		this.end = endNode;
	}
 
	public void addUnion(Graph graph, Character character) { 
 
  
		Node begNode = new Node();
		Node endNode = new Node();
		Edge edge1 = new Edge(begNode, graph.getStart(), "epsilon");
		Edge edge2 = new Edge(graph.getEnd(), endNode, "epsilon");
		Edge edge3 = new Edge(begNode, endNode, character.toString());
		for (int i = 0; i < graph.getEdges().size(); i++) { 
 
  
			this.edges.add(graph.getEdges().get(i));
		}
		this.edges.add(edge1);
		this.edges.add(edge2);
		this.edges.add(edge3);
		this.start = begNode;
		this.end = endNode;
	}
 
	public void addUnion(Graph graph1, Graph graph2) { 
 
  
		Node begNode = new Node();
		Node endNode = new Node();
		Edge edge1 = new Edge(begNode, graph1.getStart(), "epsilon");
		Edge edge2 = new Edge(begNode, graph2.getStart(), "epsilon");
		Edge edge3 = new Edge(graph1.getEnd(), endNode, "epsilon");
		Edge edge4 = new Edge(graph2.getEnd(), endNode, "epsilon");
		this.start = begNode;
		this.end = endNode;
		for (int i = 0; i < graph1.getEdges().size(); i++) { 
 
  
			this.edges.add(graph1.getEdges().get(i));
		}
		for (int i = 0; i < graph2.getEdges().size(); i++) { 
 
  
			this.edges.add(graph2.getEdges().get(i));
		}
		this.edges.add(edge1);
		this.edges.add(edge2);
		this.edges.add(edge3);
		this.edges.add(edge4);
	}
 
	public void addUnion(Character characterOne, Character characterTwo) { 
 
  
		Node nodebeg = new Node();
		Node nodeend = new Node();
		Edge edgeOne = new Edge(nodebeg, nodeend, characterOne.toString());
		Edge edgeTwo = new Edge(nodebeg, nodeend, characterTwo.toString());
		edges.add(edgeOne);
		edges.add(edgeTwo);
		start = nodebeg;
		end = nodeend;
	}
 
	public void addConcat(Character character, Graph graph) { 
 
  
		Node begNode = new Node();
		Edge edge = new Edge(begNode, graph.getStart(), character.toString());
		for (int i = 0; i < graph.getEdges().size(); i++) { 
 
  
			this.edges.add(graph.getEdges().get(i));
		}
		this.edges.add(edge);
		this.start = begNode;
		this.end = graph.getEnd();
	}
 
	public void addConcat(Graph graph, Character character) { 
 
  
		Node endNode = new Node();
		Edge edge = new Edge(graph.getEnd(), endNode, character.toString());
		for (int i = 0; i < graph.getEdges().size(); i++) { 
 
  
			this.edges.add(graph.getEdges().get(i));
		}
		this.edges.add(edge);
		this.start = graph.getStart();
		this.end = endNode;
	}
 
	public void addConcat(Graph graph1, Graph graph2) { 
 
  
		Edge edge = new Edge(graph1.getEnd(), graph2.getStart(), "epsilon");
		this.start = graph1.getStart();
		this.end = graph2.getEnd();
		for (int i = 0; i < graph1.getEdges().size(); i++) { 
 
  
			this.edges.add(graph1.getEdges().get(i));
		}
		for (int i = 0; i < graph2.getEdges().size(); i++) { 
 
  
			this.edges.add(graph2.getEdges().get(i));
		}
		this.edges.add(edge);
	}
 
	public void addConcat(Character characterOne, Character characterTwo) { 
 
  
		Node begNode = new Node();
		Node midNode = new Node();
		Node endNode = new Node();
		Edge edge1 = new Edge(begNode, midNode, characterOne.toString());
		Edge edge2 = new Edge(midNode, endNode, characterTwo.toString());
		this.start = begNode;
		this.end = endNode;
		this.edges.add(edge1);
		this.edges.add(edge2);
	}
 
	@Override
	public String toString() { 
 
  
		String printString = "Start=" + this.start + " End=" + this.end + "\n";
		for (int i = 0; i < edges.size(); i++) { 
 
  
			printString += edges.get(i) + "\n";
		}
		return printString;
	}
}

運行/修改想轉換的正則表達式的類:this

package regextoNFA;

public class RegexToNfa { 
 
  
	public static void main(String[] args) { 
 
  
		try { 
 
  
			String regexString1 = "(ab|c)*abb";
			Regex regex1 = new Regex(regexString1);
			System.out.println(regex1);
			System.out.println(regex1.getRegex());
			System.out.println(regex1.transformNFA());
			regex1.reset();
		} catch (Exception e) { 
 
  
			System.out.println(e.getMessage());
		}
	}
}

最後感謝原做大佬寫出來的大佬!!!挽救可憐的孩子於水火之中_(: * 」∠)_