Archive for the ‘antlr’ Category

Postanowiłem nauczyć się w końcu jak pisać porządny parser/lexer i porzucić wyrażenia regularne, jako, że ich możliwości jeżeli chodzi o parsowanie chyba sięgnęły granic z moim wykonaniu. Nie no sprawdzaja się nieźle, ale utrzymanie albo debug to już inna sprawa.

W moich poszukiwaniach trafiłem na tutorial Alexa Millera, niestety używa on składni antlr’a 2. W moich bojach udało mi się uruchomić opisywane przykłady w środowisku antlrowrks, korzystając ze składni antlr’a 3, poniżej wyniki jakby ktos też potrzebował:

lexer/parser (plik MyGrammar.g)

grammar MyGrammar;

options 
{
k=2;
output = AST;
}

tokens {
	SCRIPT;		// Imaginary token inserted at the root of the script
	BLOCK;		// Imaginary token inserted at the root of a block
	COMMAND;     // Imaginary token inserted at the root of a command
	PROPERTY;     // Imaginary token inserted at the root of a property
}

script	:	block*->^(SCRIPT block*);

block 	:	STRING LBRACE command* RBRACE->^(BLOCK STRING command*);
command :	STRING property* SEMI->^(COMMAND STRING property*);
property 
	:	(STRING EQUALS)? STRING->^(PROPERTY STRING+);

LBRACE : '{';
RBRACE : '}';
EQUALS : '=';
SEMI : ';';

NEWLINE	: ('\n'|'\r') {skip();};

WS : (' '|'\t') {skip();};

STRING	: ('a'..'z'|'A'..'Z'|'0'..'9'|'_')+
	|	 ('"' (~'"')* '"');

LINE_COMMENT : '#' ~('\n'|'\r')* '\r'? NEWLINE {skip();};

Tree walker (plik MyParserTree.g)

tree grammar MyGrammarTree;

options {
    tokenVocab=MyGrammar;
    ASTLabelType=CommonTree;
    output=AST;
}

@members {
	Script script;
	
	Script getScript(){
		return script;
	}
	
	private String unquote(String s){
		return s.replaceAll( "^\"|\"$", "");
	}
}

script returns [Script script]
@init
{
	script = new Script();
}
: ^(SCRIPT (block[script])*);
block[Script script]
@init
{
	Block block = null;
}
:^(
	BLOCK
	(
		b=STRING
		{
			block = new Block(b.getText());
		}
		(command[block])*
	)
){ script.addBlock(block); };
command[Block block]
@init
{
	Command command = null;
}

:^(
	COMMAND 
	(
		c=STRING
		{
			command = new Command(c.getText());
			block.addCommand(command);
		}
		(property[command])*
	)
);
property[Command command]
: ^( PROPERTY 
	(
		p1=STRING (p2=STRING)?
		{
			if(p2==null){
				command.addProperty(unquote(p1.getText()));
			}
			else {
				command.addProperty(unquote(p1.getText()), unquote(p2.getText()));
			}
		}
	)
   );

Kod parsera/lexera:

import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree;

public class RunParser {

	public static void main(String[] args) throws Exception {
		ANTLRFileStream reader = new ANTLRFileStream("test.bla");

		MyGrammarLexer lexer = new MyGrammarLexer(reader);

		CommonTokenStream tokens = new CommonTokenStream(lexer);
		
		MyGrammarParser parser = new MyGrammarParser(tokens);
		MyGrammarParser.script_return result = parser.script();
		CommonTree ast = (CommonTree) result.getTree();
		printTree(ast, 0);
	}

	public static void printTree(CommonTree t, int indent) {
		if (t != null) {
			StringBuffer sb = new StringBuffer(indent);
			for (int i = 0; i < indent; i++) {
				sb = sb.append("   ");
			}
			for (int i = 0; i < t.getChildCount(); i++) {
				System.out.println(sb.toString() + t.getChild(i).toString()+" "+t.getChild(i).getTokenStopIndex());
				printTree((CommonTree) t.getChild(i), indent + 1);
			}
		}
	}

	public static void main1(String[] args) throws Exception {

		CharStream reader = new ANTLRFileStream("test.bla");

		MyGrammarLexer lexer = new MyGrammarLexer(reader);

		Token token = lexer.nextToken();
		while (token.getType() != Token.EOF) {
			System.out.println("\t" + getTokenType(token.getType()) + "\t\t" + token.getText());
			token = lexer.nextToken();
		}
	}

	private static String getTokenType(int tokenType) {
		switch (tokenType) {
		case MyGrammarLexer.STRING:
			return "STRING";
		case MyGrammarLexer.LBRACE:
			return "LBRACE";
		case MyGrammarLexer.RBRACE:
			return "RBRACE";
		case MyGrammarLexer.EQUALS:
			return "EQUALS";
		case MyGrammarLexer.SEMI:
			return "SEMI";
		case MyGrammarLexer.LINE_COMMENT:
			return "LINE_COMMENT";
		default:
			return "OTHER";
		}
	}

}

TreeWalker:

import java.io.IOException;

import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;


public class RunTreeWalker {

	/**
	 * @param args
	 * @throws IOException 
	 * @throws RecognitionException 
	 */
	public static void main(String[] args) throws IOException, RecognitionException {
		ANTLRFileStream reader = new ANTLRFileStream("test.bla");

		MyGrammarLexer lexer = new MyGrammarLexer(reader);

		CommonTokenStream tokens = new CommonTokenStream(lexer);
		
		MyGrammarParser parser = new MyGrammarParser(tokens);
		MyGrammarParser.script_return result = parser.script();
		CommonTree ast = (CommonTree) result.getTree();
		
		MyGrammarTree walker = new MyGrammarTree(new CommonTreeNodeStream(ast));
		walker.script();
		System.out.println(walker.getScript());
	}

}