Postanowiłem nauczyć się w końcu jak pisać porządny parser/lexer i porzucić wyrażenia regularne, jako, że ich możliwości jeżeli chodzi o parsowanie chyba sięgnęły granic z moim wykonaniu. Nie no sprawdzaja się nieźle, ale utrzymanie albo debug to już inna sprawa.
W moich poszukiwaniach trafiłem na tutorial Alexa Millera, niestety używa on składni antlr’a 2. W moich bojach udało mi się uruchomić opisywane przykłady w środowisku antlrowrks, korzystając ze składni antlr’a 3, poniżej wyniki jakby ktos też potrzebował:
lexer/parser (plik MyGrammar.g)
grammar MyGrammar;
options
{
k=2;
output = AST;
}
tokens {
SCRIPT; // Imaginary token inserted at the root of the script
BLOCK; // Imaginary token inserted at the root of a block
COMMAND; // Imaginary token inserted at the root of a command
PROPERTY; // Imaginary token inserted at the root of a property
}
script : block*->^(SCRIPT block*);
block : STRING LBRACE command* RBRACE->^(BLOCK STRING command*);
command : STRING property* SEMI->^(COMMAND STRING property*);
property
: (STRING EQUALS)? STRING->^(PROPERTY STRING+);
LBRACE : '{';
RBRACE : '}';
EQUALS : '=';
SEMI : ';';
NEWLINE : ('\n'|'\r') {skip();};
WS : (' '|'\t') {skip();};
STRING : ('a'..'z'|'A'..'Z'|'0'..'9'|'_')+
| ('"' (~'"')* '"');
LINE_COMMENT : '#' ~('\n'|'\r')* '\r'? NEWLINE {skip();};
Tree walker (plik MyParserTree.g)
tree grammar MyGrammarTree;
options {
tokenVocab=MyGrammar;
ASTLabelType=CommonTree;
output=AST;
}
@members {
Script script;
Script getScript(){
return script;
}
private String unquote(String s){
return s.replaceAll( "^\"|\"$", "");
}
}
script returns [Script script]
@init
{
script = new Script();
}
: ^(SCRIPT (block[script])*);
block[Script script]
@init
{
Block block = null;
}
:^(
BLOCK
(
b=STRING
{
block = new Block(b.getText());
}
(command[block])*
)
){ script.addBlock(block); };
command[Block block]
@init
{
Command command = null;
}
:^(
COMMAND
(
c=STRING
{
command = new Command(c.getText());
block.addCommand(command);
}
(property[command])*
)
);
property[Command command]
: ^( PROPERTY
(
p1=STRING (p2=STRING)?
{
if(p2==null){
command.addProperty(unquote(p1.getText()));
}
else {
command.addProperty(unquote(p1.getText()), unquote(p2.getText()));
}
}
)
);
Kod parsera/lexera:
import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree;
public class RunParser {
public static void main(String[] args) throws Exception {
ANTLRFileStream reader = new ANTLRFileStream("test.bla");
MyGrammarLexer lexer = new MyGrammarLexer(reader);
CommonTokenStream tokens = new CommonTokenStream(lexer);
MyGrammarParser parser = new MyGrammarParser(tokens);
MyGrammarParser.script_return result = parser.script();
CommonTree ast = (CommonTree) result.getTree();
printTree(ast, 0);
}
public static void printTree(CommonTree t, int indent) {
if (t != null) {
StringBuffer sb = new StringBuffer(indent);
for (int i = 0; i < indent; i++) {
sb = sb.append(" ");
}
for (int i = 0; i < t.getChildCount(); i++) {
System.out.println(sb.toString() + t.getChild(i).toString()+" "+t.getChild(i).getTokenStopIndex());
printTree((CommonTree) t.getChild(i), indent + 1);
}
}
}
public static void main1(String[] args) throws Exception {
CharStream reader = new ANTLRFileStream("test.bla");
MyGrammarLexer lexer = new MyGrammarLexer(reader);
Token token = lexer.nextToken();
while (token.getType() != Token.EOF) {
System.out.println("\t" + getTokenType(token.getType()) + "\t\t" + token.getText());
token = lexer.nextToken();
}
}
private static String getTokenType(int tokenType) {
switch (tokenType) {
case MyGrammarLexer.STRING:
return "STRING";
case MyGrammarLexer.LBRACE:
return "LBRACE";
case MyGrammarLexer.RBRACE:
return "RBRACE";
case MyGrammarLexer.EQUALS:
return "EQUALS";
case MyGrammarLexer.SEMI:
return "SEMI";
case MyGrammarLexer.LINE_COMMENT:
return "LINE_COMMENT";
default:
return "OTHER";
}
}
}
TreeWalker:
import java.io.IOException;
import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
public class RunTreeWalker {
/**
* @param args
* @throws IOException
* @throws RecognitionException
*/
public static void main(String[] args) throws IOException, RecognitionException {
ANTLRFileStream reader = new ANTLRFileStream("test.bla");
MyGrammarLexer lexer = new MyGrammarLexer(reader);
CommonTokenStream tokens = new CommonTokenStream(lexer);
MyGrammarParser parser = new MyGrammarParser(tokens);
MyGrammarParser.script_return result = parser.script();
CommonTree ast = (CommonTree) result.getTree();
MyGrammarTree walker = new MyGrammarTree(new CommonTreeNodeStream(ast));
walker.script();
System.out.println(walker.getScript());
}
}