source: FCS-QL/trunk/src/main/antlr4/eu/clarin/sru/fcs/qlparser/FCSLexer.g4 @ 6863

Last change on this file since 6863 was 6863, checked in by Leif-Jöran, 9 years ago

Moving some lexer tokens to after identifiers.

File size: 2.6 KB
Line 
1lexer grammar FCSLexer;
2
3/*
4 * Lexer part of parser for FCS Core FCS-QL version 2.0
5 * default mode
6 * 20150501- /ljo
7 */
8
9LPAREN: '(';
10RPAREN: ')';
11L_SQUARE_BRACKET: '[';
12R_SQUARE_BRACKET: ']';
13OR: '|';
14AND: '&';
15NOT: '!';
16FWD_SLASH: '/';
17
18OPERATOR
19    : '='
20    | '!='
21    ;
22
23QUANTIFIER
24    : '+'                           /* one-or-more */
25    | '*'                           /* zero-or-more */
26    | '?'                           /* zero-or-one */
27    | '{' INTEGER '}'               /* exactly n-times */
28    | '{' INTEGER? ',' INTEGER '}'  /* at most */
29    | '{' INTEGER ',' INTEGER? '}'  /* min-max */
30    ;   
31
32REGEXP
33    : QUOTED_STRING
34    ;
35
36SIMPLE_ATTRIBUTE
37    : IDENTIFIER
38    ;
39QUALIFIED_ATTRIBUTE
40    :  IDENTIFIER ':' IDENTIFIER
41    ;
42
43IDENTIFIER
44    : IDENTIFIER_FIRST_CHAR IDENTIFIER_CHAR*
45    ; 
46
47fragment IDENTIFIER_FIRST_CHAR
48    : [a-zA-Z]
49    ;
50
51fragment IDENTIFIER_CHAR
52    : [a-zA-Z0-9\-]
53    ;
54
55INTEGER
56    : [0-9+]
57    ;
58
59WITHIN: 'within';
60
61SIMPLE_WITHIN_SCOPE
62    : 'sentence'
63    | 's'
64    | 'utterance'
65    | 'u'
66    | 'paragraph'
67    | 'p'
68    | 'turn'
69    | 't'
70    | 'text'
71    | 'session'
72    ;
73
74REGEXP_FLAG
75    : 'i'  /* case-insensitive; Poliqarp/Perl compat */
76    | 'I'  /* case-sensitive; Poliqarp compat */
77    | 'c'  /* case-insensitive, CQP compat */
78    | 'C'  /* case-sensitive */
79    | 'l'  /* literal matching, CQP compat*/
80    | 'd'  /* diacritic agnostic matching, CQP compat */
81    ;
82
83/* // doesnt work
84QUOTED_STRING
85    : '\'' (CHAR | WS)*? '\''
86    | '"' (CHAR | WS)*? '"'
87    ;
88*/
89
90QUOTED_STRING
91    : '\'' (ESCAPED_CHAR | ~['\\])* '\''
92    | '"' (ESCAPED_CHAR | ~["\\])* '"'
93    ;
94
95fragment CHAR
96    : ESCAPED_CHAR
97    | ~('\u0009' | '\u000A' | '\u000B' | '\u000C' | '\u000D' | '\u0020' | '\u0085'
98        | '\u00A0' | '\u1680' | '\u2000' | '\u2001' | '\u2002' | '\u2003' | '\u2004'
99        | '\u2005' | '\u2006' | '\u2007' | '\u2008' | '\u2009' | '\u200A' | '\u2028'
100        | '\u2029' | '\u202F' | '\u205F' | '\u3000' ) //anything but white space
101       
102    ;
103
104/* any unicode whitespace */
105fragment WS: '\u0009' | '\u000A' | '\u000B' | '\u000C' | '\u000D' | '\u0020' | '\u0085'
106  | '\u00A0' | '\u1680' | '\u2000' | '\u2001' | '\u2002' | '\u2003' | '\u2004'
107  | '\u2005' | '\u2006' | '\u2007' | '\u2008' | '\u2009' | '\u200A' | '\u2028'
108  | '\u2029' | '\u202F' | '\u205F' | '\u3000'
109  ;
110
111fragment ESCAPED_CHAR
112    : '\\'
113       ( '\\'
114        | '\''
115        | '"'
116        | 'n'
117        | 't'
118        | 'x' HEX HEX
119        | 'u' HEX HEX HEX HEX
120        | 'U' HEX HEX HEX HEX HEX HEX HEX HEX
121      )
122    ;
123
124fragment HEX
125    : [0-9a-fA-F]
126    ;
127
128Space
129    : WS -> skip
130    ;
131
Note: See TracBrowser for help on using the repository browser.