Changes between Version 35 and Version 36 of Taskforces/FCS/FCS-Specification-Draft


Ignore:
Timestamp:
11/03/15 09:02:53 (9 years ago)
Author:
Leif-Jöran
Comment:

FCS-QL EBNF added to normative appendix

Legend:

Unmodified
Added
Removed
Modified
  • Taskforces/FCS/FCS-Specification-Draft

    v35 v36  
    968968|| `http://clarin.eu/fcs/diagnostic/14` || General processing hint. || E.g. "No matches, because layer 'XY' is not available in your selection of resources" || non-fatal || Endpoints `MUST` use this diagnostic only if the Client performed an Advanced Search request. ||
    969969
     970== CLARIN FCS-QL Grammar Specification #fcsQLEBNF
     971The version of the CLARIN FCS-QL is tied to the FCS Core version starting with version 2.0.
     972
     973=== FCS-QL EBNF ===
     974{{{#!comment
     975  Please keep the EBNF nicely formatted. Thanks!
     976}}}
     977{{{
     978 [1] query                ::= main-query within-part?
     979
     980 [2.11] main-query        ::= simple-query
     981                            | simple-query "|" main-query     /* or */
     982                            | simple-query main-query         /* sequence */
     983                            | simple-query quantifier         /* quatification */
     984       
     985 [3.11] simple-query      ::= '(' main_query ')'              /* grouping */
     986                            | implicit-query
     987                            | segment-query     
     988 
     989 [4] implicit-query       ::= flagged-regexp     
     990 
     991 [5] segment-query        ::= "[" expression? "]"       
     992
     993 [6] within-part          ::= simple-within-part
     994
     995 [7] simple-within-part   ::= "within" simple-within-scope
     996
     997 [8] simple-within-scope  ::= "sentence"
     998                            | "s"
     999                            | "utterance"
     1000                            | "u"
     1001                            | "paragraph"
     1002                            | "p"
     1003                            | "turn"
     1004                            | "t"
     1005                            | "text"
     1006                            | "session"         
     1007
     1008[11.11] expression        ::= basic-expression
     1009                            | expression "|" expression     /* or */
     1010                            | expression "&" expression     /* and */
     1011                               
     1012[12.11] basic-expression  ::= '(' expression ')'         /* grouping */
     1013                            | "!" expression                /* not */
     1014                            | attribute operator flagged-regexp
     1015
     1016[13] operator             ::= "="                           /* equals */
     1017                            | "!="                          /* non-equals */
     1018
     1019[14] quantifier           ::= "+"                           /* one-or-more */
     1020                            | "*"                           /* zero-or-more */
     1021                            | "?"                           /* zero-or-one */
     1022                            | "{" integer "}"               /* exactly n-times */
     1023                            | "{" integer? "," integer "}"  /* at most */
     1024                            | "{" integer "," integer? "}"  /* min-max */       
     1025
     1026[15] flagged-regexp       ::= regexp
     1027                            | regexp "/" regexp-flag+   
     1028
     1029[16] regexp-flag          ::= "i"  /* case-insensitive; Poliqarp/Perl compat */
     1030                            | "I"  /* case-sensitive; Poliqarp compat */
     1031                            | "c"  /* case-insensitive, CQP compat */
     1032                            | "C"  /* case-sensitive */
     1033                            | "l"  /* literal matching, CQP compat*/
     1034                            | "d"  /* diacritic agnostic matching, CQP compat */
     1035       
     1036[17] regexp               ::= quoted-string
     1037
     1038[18] attribute            ::= simple-attribute
     1039                            | qualified-attribute
     1040
     1041[19] simple-attribute     ::= identifier
     1042
     1043[20] qualified-attribute  ::= identifier ":" identifier 
     1044
     1045[21.11] identifier        ::= identifier-first-char identifier-char*
     1046
     1047[21.12] identifier-first-char      ::= [a-zA-Z]
     1048
     1049[22] identifier-char      ::= [a-zA-Z0-9\-]
     1050
     1051[24] integer              ::= [0-9]+
     1052
     1053[26] quoted-string        ::= "'" (char | ws)* "'"  /* single-quotes */
     1054                            | """ (char | ws)* """  /* double-quotes */
     1055
     1056[27] char                 ::= <any unicode codepoint excluding whitespace codepoints>
     1057                            | "\" escaped-char
     1058
     1059[28] ws                   ::= <any whitespace codepoint>
     1060
     1061[29] escaped-char         ::= "\"                                  /* backslash (\) */
     1062                            | "'"                                  /* single quote (') */
     1063                            | """                                  /* double quote (") */
     1064                            | "n"                                  /* generic newline, i.e "\n", "\r", etc */
     1065                            | "t"                                  /* character tabulation (U+0009) */
     1066                            | "x" hex hex                          /* Unicode codepoint with hex value hh */
     1067                            | "u" hex hex hex hex                  /* Unicode codepoint with hex value hhhh */
     1068                            | "U" hex hex hex hex hex hex hex hex  /* Unicode codepoint with hex value hhhhhhhh */
     1069
     1070[30] hex                  ::= [0-9a-fA-F]
     1071}}}
     1072=== Notes ===
     1073 * based on Poliqarp with inspiration from others
     1074 * "attribute": the annotation layer to be used, e.g. "word", "lemma", "pos" or qualified "pos:stts" the supported values for this construct are beyond the grammar and are defined in supplementary documents
     1075 * "simple-within-scope": possible values for scope
     1076   *  "sentence", "s", "utterance", "u": denote a matching scope of something like a sentence or utterance. provides compatibility with FCS 1.0 ("Generic Hits", "Each hit SHOULD be presented within the context of a complete sentence.")
     1077   * "paragraph" | "p" | "turn" | "t": denote the next larger unit, e.g. something like a paragraph
     1078   * "article" | "session": something like a whole document
     1079 * {{{[27]}}} and {{{[28]}}} "any $SOMETING codepoint" are a pain to get easily done in at least ANTLR and JavaCC. Especially in combination with {{{[29]}}} :/
     1080 * regex are not defined/guarded by this grammar :/
     1081 * non-continuous rule numbers are currently intended; we've already removed some. Rules will be renumbered, when grammar is fixed.
     1082 * Integrated Peter B's suggestion {{{[2v2]}}} and {{{[3v2]}}} together with {{{[11v2]}}} and {{{[12v2]}}} for resolving structural ambiguity eventhough antlr handles this perfectly fine.
     1083 * Changed "identifier" {{{[21]}}} to only be allowed to start with a letter e.g. not digits and - (hyphen) to more resemble XML names.
     1084
    9701085= Non-normative Appendix
    9711086{{{