// // JavaCC GDMO Parser // This parser was originally designed to read GDMO Managed Objects and // to build an AST which could be used for translation to UML. // // The GRAMMAR has been tested against a number of standard Managed Object // models but has one or two kown quirks. The main one is that at present // only the double quote terminator for BEHAVIOUR and other description texts // is supported. It's not hard to change this but I just have not done it. // // %W% dated %E% at %U% // // Dermot Dwyer (dermot@cognesis.com) // Cognesis Limited // The Portergate // Ecclesall Road // Sheffield // S11 8NX United Kingdom // // Tel: +44 114 209 6039 // options { LOOKAHEAD = 1; CHOICE_AMBIGUITY_CHECK=1; OTHER_AMBIGUITY_CHECK=1; STATIC=false; // Parser debug options - comment back in to generate a version which dumps // out a trace as the parse progresses // DEBUG_PARSER = true; // DEBUG_LOOKAHEAD = true; } PARSER_BEGIN(GdmoTranslator) import syntaxtree.*; import visitor.*; public class GdmoTranslator { // Main method as this is currently a Java application // Possible use as an applet or bean is for further study public static void main(String args[]) throws ParseException { // Construct an instance of the translator GdmoTranslator parser = new GdmoTranslator(System.in); parser.go(); System.out.println("Parse Complete - UML Production Begins"); // Visitor code goes here } /** * This method was created in VisualAge. */ public final void go() { try // The Parser classes can throw a parse exception { // All JAVACC specifications have the same level of hierarchy // therfore any of them may be used as a target production. Here // we try to ecvaluate the production DocumentSpecification() Node root = DocumentSpecification(); // Prepare the GdmoVisitor class to perform UML generation // GdmoVisitor gdmov = new GdmoDeclarationVisitor(); // root.accept(gdmov); // return gdmov; } catch (ParseException p) { // Uses a slightly modified version of ParseException which // provides a better level of feed back to the user System.out.println("Ooops."); System.out.println(p.getMessage()); // return null; } catch (Exception e) { // Parse Error so tell the world abbout it System.out.println("Ooops."); System.out.println(e.getMessage()); e.printStackTrace(); // return null; } } } PARSER_END(GdmoTranslator) // White space is not significant in Gdmo so it is simply // Skipped by the translator SKIP : { " " | "\t" | "\n" | "\r" // Comments are broken out into a lexer state | "--" : ONE_LINE_COMMENT // As are single line comments and ... } // Handle one-line ASN.1 style comments which are introduced by the characters "--" // and end at the next newline SKIP: // A one-line comment simply discards text until { // the next newline character is detected <( "\n" | "--" )> : DEFAULT } MORE: // Just keep going..... { < ~[] > } // Gdmo comments are stripped by the lexical analyser. This is a temporary measure as in future // comments will have to be caried over into the UML. To do this an AST node will have to be // created which associates a comment block with a code segment // // Basic Gdmo tokeniser stuff. The keywords recognised by this parser are // listed in the following token table. Note that tokens are specified in order // of increasing string length which "persuades" the lexer to optimise the DFA // bit vectors. // TOKEN : { // Special character symbols | | | | | | | | // Two character tokens | | | // Three character tokens | | | | | // Four character tokens | | | | // Five character tokens | | | | | | // Six character tokens | | | | | | | | // Seven character tokens | | | | | | | | | | | | | | | | | // Eight character tokens | | | | | | | | | | | | | | | | | | | | | | | | | | | | | // // Certain nationalities struggle with the spelling of BEHAVIOUR so // Lets accept both. This would be better done with a rule in the // grammar but I'm far from perfect // | // // Multinational approach to the spelling of CHARACTERISED as well | } // LITERALS TOKEN : { < INTEGER_LITERAL: (["l","L"])? | (["l","L"])? | (["l","L"])? > | < #DECIMAL_LITERAL: ["1"-"9"] (["0"-"9"])* > | < #HEX_LITERAL: "'" (["0"-"9","a"-"f","A"-"F"])+ "'H" > | < #OCTAL_LITERAL: "0" (["0"-"7"])* > | < FLOATING_POINT_LITERAL: (["0"-"9"])+ "." (["0"-"9"])* ()? (["f","F","d","D"])? | "." (["0"-"9"])+ ()? (["f","F","d","D"])? | (["0"-"9"])+ (["f","F","d","D"])? | (["0"-"9"])+ ()? ["f","F","d","D"] > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < STRING_LITERAL: "\"" ( (~["\"","\\"]) | ("\\" ( ["n","t","b","r","f","\\","'","\""] | ["0"-"7"] ( ["0"-"7"] )? | ["0"-"3"] ["0"-"7"] ["0"-"7"] ) ) )* "\"" > | // The descriptive texts should really be allowed to be delimited by a range of // different characters including quote, shriek etc. For now I've handled only the // the two I need - quote and shriek. This would be much better done by recognising // the introducer character and then consuming input up to the matching delimiter. // The whole lot does need to be buffered, however, as I want to whack the text into // my UML models. < SHRIEK_LITERAL: "!" ( (~["!","\\"]) | ("\\" ( ["n","t","b","r","f","\\","'"] | ["0"-"7"] ( ["0"-"7"] )? | ["0"-"3"] ["0"-"7"] ["0"-"7"] ) ) )* "!" > } // ID is the basic recogniser for an identifier. In particular it requires that an // identifier starts with an upercase alphameric. ASN.1 Types, which are used to describe // the attribute syntax of Managed Object attributes, conform to this definition of ID // TOKEN : { < ID: ["A"-"Z"] ( ["a"-"z","A"-"Z","-","0"-"9"] )*> } // I have re-used the ASN.1 definitions for an ID and a LABEL. A LABEL begins // with a lower-case alphameric TOKEN: { < LABEL: ["a"-"z"] ( ["a"-"z","A"-"Z","-","0"-"9"] )*> } // This is the fundamental Non-terminal that we are seeking to build in // Parsing the GDMO input. At present it is assumed that the basic // translation unit is a Template. The label for each template has been // factored from the template to avoid a LOOKAHEAD of 2 as the very first // thing we do void DocumentSpecification () : { } { (