# §{header}: # # This is file src/de/mlhartme/mork/xml/xml.grm, # Mork version 0.5 Copyright © 1998-2002 Michael Hartmeier # # Mork is licensed under the terms of the GNU Lesser General Public License. # It is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the file license.txt for details. # # §. [PARSER] # this grammar has to match the main entities: document, extSubset, extPE or extParsedEnt start ::= document | extParsedEnt | extPE ; # # sec 2: # [1] document ::= prolog element Misc* ; # [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? ; # [27-31] Misc ::= Comment | PI | S ; doctypedecl ::= "" ; DeclSep ::= /* TODO: PEReference | */ S; markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment ; # TODO: usused # extSubset ::= TextDecl? extSubsetDecl ; extSubsetDecl ::= (markupdecl | conditionalSect | DeclSep)* ; # # sec 3: # [39]-[64] element ::= EmptyElemTag | STag content ETag ; STag ::= "<" Name (S Attribute)* S? ">" ; Attribute ::= Name Eq AttValue ; ETag ::= "" ; content ::= (CharData|S)? ((element | Reference | CDSect | PI | Comment) (CharData|S)?)* ; EmptyElemTag ::= "<" Name (S Attribute)* S? "/>" ; elementdecl ::= "" ; ElementNameDef ::= Name ; # TODO contentspec ::= "EMPTY" | "ANY" | Mixed | children ; children ::= (choice | seq) ("?" | "*" | "+")? ; cp ::= (ElementName | choice | seq) ("?" | "*" | "+")? ; ElementName ::= Name ; # TODO choice ::= "(" S? cp ( S? "|" S? cp )+ S? ")" ; # E50: * -> + to resolve conflict seq ::= "(" S? cp ( S? "," S? cp )* S? ")" ; Mixed ::= "(" S? "#PCDATA" (S? "|" S? Name)* S? ")" "*" # changed ")*" -> ")" "*" | "(" S? "#PCDATA" S? ")" ; AttlistDecl ::= "" ; ElementNameA ::= Name ; # TODO AttDef ::= S Name S AttType S DefaultDecl ; AttType ::= StringType | TokenizedType | EnumeratedType ; StringType ::= "CDATA" ; TokenizedType ::= "ID" | "IDREF" | "IDREFS" | "ENTITY" | "ENTITIES" | "NMTOKEN" | "NMTOKENS" ; EnumeratedType ::= NotationType | Enumeration ; NotationType ::= "NOTATION" S "(" S? Name (S? "|" S? Name)* S? ")"; Enumeration ::= "(" S? Nmtoken (S? "|" S? Nmtoken)* S? ")"; DefaultDecl ::= "#REQUIRED" | "#IMPLIED" | ("#FIXED" S | None) AttValue ; None ::= ; # TODO: "optional" mapping problem? conditionalSect ::= includeSect | ignoreSect ; includeSect ::= "" ; ignoreSect ::= "" ; ignoreSectContents ::= Ignore ("" Ignore)* ; # # sec 4: # [70]-[76] EntityDecl ::= GEDecl | PEDecl ; GEDecl ::= "" ; PEDecl ::= "" ; EntityDef ::= EntityValue | ExternalID NDataDecl? ; PEDef ::= EntityValue | ExternalID ; ExternalID ::= "SYSTEM" S SystemLiteral | "PUBLIC" S PublicLiteral S SystemLiteral ; NDataDecl ::= S "NDATA" S Name ; # [78]-[79] extParsedEnt ::= ParsedEntTextDecl content ; extPE ::= PETextDecl extSubsetDecl ; # [82]-[83] NotationDecl ::= "" ; # "NotationID" replaces "ExternalID | PublicID" to work around this confict on S: # ExternalID ::= "PUBLIC" S PublicLiteral . S SystemLiteral # PublicID ::= "PUBLIC" S PublicLiteral . NotationID ::= "PUBLIC" S PublicLiteral (S SystemLiteral)? S? | "SYSTEM" S SystemLiteral S?; [SCANNER] nopriorities; white = PEReference; # # sect 2: # [2]-[21] Char ::= 0x9 | 0xA | 0xD | 0x20..0xD7FF | 0xE000..0xFFFD ; # TODO: 0x1000..0x10FFFF S ::= (0x20 | 0x9 | 0xD | 0xA)+ ; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender ; Name ::= (Letter | '_' | ':') NameChar* ; # TODO: unused # Names ::= Name (S Name)* ; Nmtoken ::= NameChar+ ; # TODO: unused # Nmtokens ::= Nmtoken (S Nmtoken)* ; EntityValue ::= '"' (('&'|'"')! | PEReference | Reference)* '"' | '\'' (('&'|'\'')! | PEReference | Reference)* '\'' ; AttValue ::= '"' (('<' | '&' | '"')! | Reference)* '"' | '\'' (('<' | '&' | '\'')! | Reference)* '\''; SystemLiteral ::= '"' '"'!* '"' | '\'' '\''!* '\'' ; PublicLiteral ::= '"' PublicChar* '"' | '\'' (PublicChar - '-')* '\'' ; PublicChar ::= 0x20 | 0xD | 0xA | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '\'' | '(' | ')' | '+' | ',' | '.' | '/' | ':' | '=' | '?' | ';' | '!' | '*' | '#' | '@' | '$' | '_' ; CharData ::= ( ('<' | '&' | '%')!+ # TODO: + --> * - (('<' | '&' | '%')!* "]" ('<' | '&' | '%')!*)) - S; # TODO: ] --> ]]> # TODO: "+" around it? # TODO: remove completely? # TODO: §2.4 does not exclude % # TODO: -S # works around scanner conflict between S and CharData # that both might follow and empty tag Comment ::= "" ; PI ::= "" Char*)))? "?>" ; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) ; CDSect ::= CDStart CData CDEnd ; CDStart ::= "" Char*) ; CDEnd ::= "]]>" ; # [23]-[26] XMLDecl ::= ""; VersionInfo ::= S "version" Eq ('\'' VersionNum '\'' | '"' VersionNum '"') ; Eq ::= S? '=' S? ; VersionNum ::= ('a'..'z' | 'A'..'Z' | '0'..'9' | '_' | '.' | ':' | '-')+ ; # [32] SDDecl ::= S "standalone" Eq (('\'' ("yes" | "no") '\'') | ('"' ("yes" | "no") '"')) ; # TODO: unsued: # LanguageID ::= Langcode ('-' Subcode)* ; # Langcode ::= ISO639Code | IanaCode | UserCode ; # ISO639Code ::= ('a'..'z' | 'A'..'Z') ('a'..'z' | 'A'..'Z') ; # IanaCode ::= ('i' | 'I') '-' ('a'..'z' | 'A'..'Z')+ ; # UserCode ::= ('x' | 'X') '-' ('a'..'z' | 'A'..'Z')+ ; # Subcode ::= ('a'..'z' | 'A'..'Z')+ ; # TODO: where are [33]-[38]? # # sect 3 # [65] Ignore ::= (Char+ - (Char* (" ]]> # TODO: Char+ --> Char* # TODO: - PEReference # # sect 4 # [66-69] CharRef ::= "&#" ('0'..'9')+ ";" | "&#x" ('0'..'9' | 'a'..'f' | 'A'..'F')* ';' ; Reference ::= EntityRef | CharRef ; EntityRef ::= '&' Name ';' ; PEReference ::= '%' Name ';' ; # [77] # The rule # TextDecl ::= "" ; # has been duplicated, the "?" was moved inside and each copy starts with a # string constant. Rational: # o string constants resolve parser conflicts # o duplicated symbols solves a scanner conflict two reduce TextDecl on CharData or PEReference ParsedEntTextDecl ::= "1" ("")? ; PETextDecl ::= "2" ("")? ; # [80]-[81] EncodingDecl ::= S "encoding" Eq ('"' EncName '"' | '\'' EncName '\'') ; EncName ::= ('A'..'Z' | 'a'..'z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '.' | '_' | '-')* ; # # appendix B Letter ::= BaseChar | Ideographic ; BaseChar ::= 0x0041..0x5A | 0x0061..0x007A | 0x00C0..0x00D6 | 0x00D8..0x00F6 | 0x00F8..0x00FF; #TODO: > 255 Ideographic ::= 0x4E00..0x9FA5 | 0x3007 | 0x3021..0x3029 ; CombiningChar ::= 0x0300..0x0345 | 0x0360..0x0361; # TODO: > 0x03ff Digit ::= 0x0030..0x0039 ; # TODO: > 255 Extender ::= 0x00B7; # TODO > 255