source: FCS-QL/trunk/src/main/antlr4/eu/clarin/sru/fcs/qlparser/FCSLexer.g4 @ 6865

Last change on this file since 6865 was 6865, checked in by Leif-Jöran, 9 years ago

Simplified AST

File size: 2.4 KB
Line 
1lexer grammar FCSLexer;
2
3/*
4 * Lexer part of parser for FCS Core FCS-QL version 2.0
5 * default mode
6 * 20150501- /ljo
7 */
8
9L_PAREN: '(';
10R_PAREN: ')';
11L_SQUARE_BRACKET: '[';
12R_SQUARE_BRACKET: ']';
13OR: '|';
14AND: '&';
15NOT: '!';
16FWD_SLASH: '/';
17L_CURLY_BRACKET: '{';
18R_CURLY_BRACKET: '}';
19Q_ONE_OR_MORE: '+';
20Q_ZERO_OR_MORE: '*';
21Q_ZERO_OR_ONE: '?';
22Q_COMMA: ',';
23OPERATOR_EQ: '=';
24OPERATOR_NE: '!=';
25COLON: ':';
26
27REGEXP_FLAGS
28    : ( 'i'    /* case-insensitive; Poliqarp/Perl compat */
29      | 'I'    /* case-sensitive; Poliqarp compat */
30      | 'c'    /* case-insensitive, CQP compat */
31      | 'C'    /* case-sensitive */
32      | 'l'    /* literal matching, CQP compat*/
33      | 'd')+  /* diacritic agnostic matching, CQP compat */
34    ;
35
36REGEXP
37    : QUOTED_STRING
38    ;
39
40//SIMPLE_ATTRIBUTE
41//    : IDENTIFIER
42//    ;
43
44//QUALIFIED_ATTRIBUTE
45//    :  IDENTIFIER ':' IDENTIFIER
46//    ;
47
48IDENTIFIER
49    : IDENTIFIER_FIRST_CHAR IDENTIFIER_CHAR*
50    ; 
51
52fragment IDENTIFIER_FIRST_CHAR
53    : [a-zA-Z]
54    ;
55
56fragment IDENTIFIER_CHAR
57    : [a-zA-Z0-9\-]
58    ;
59
60INTEGER
61    : [0-9+]
62    ;
63
64WITHIN: 'within';
65
66SIMPLE_WITHIN_SCOPE
67    : 'sentence'
68    | 's'
69    | 'utterance'
70    | 'u'
71    | 'paragraph'
72    | 'p'
73    | 'turn'
74    | 't'
75    | 'text'
76    | 'session'
77    ;
78
79/* // doesnt work
80QUOTED_STRING
81    : '\'' (CHAR | WS)*? '\''
82    | '"' (CHAR | WS)*? '"'
83    ;
84*/
85
86QUOTED_STRING
87    : '\'' (ESCAPED_CHAR | ~['\\])* '\''
88    | '"' (ESCAPED_CHAR | ~["\\])* '"'
89    ;
90
91fragment CHAR
92    : ESCAPED_CHAR
93    | ~('\u0009' | '\u000A' | '\u000B' | '\u000C' | '\u000D' | '\u0020' | '\u0085'
94        | '\u00A0' | '\u1680' | '\u2000' | '\u2001' | '\u2002' | '\u2003' | '\u2004'
95        | '\u2005' | '\u2006' | '\u2007' | '\u2008' | '\u2009' | '\u200A' | '\u2028'
96        | '\u2029' | '\u202F' | '\u205F' | '\u3000' ) //anything but white space
97       
98    ;
99
100/* any unicode whitespace */
101fragment WS: '\u0009' | '\u000A' | '\u000B' | '\u000C' | '\u000D' | '\u0020' | '\u0085'
102  | '\u00A0' | '\u1680' | '\u2000' | '\u2001' | '\u2002' | '\u2003' | '\u2004'
103  | '\u2005' | '\u2006' | '\u2007' | '\u2008' | '\u2009' | '\u200A' | '\u2028'
104  | '\u2029' | '\u202F' | '\u205F' | '\u3000'
105  ;
106
107fragment ESCAPED_CHAR
108    : '\\'
109       ( '\\'
110        | '\''
111        | '"'
112        | 'n'
113        | 't'
114        | 'x' HEX HEX
115        | 'u' HEX HEX HEX HEX
116        | 'U' HEX HEX HEX HEX HEX HEX HEX HEX
117      )
118    ;
119
120fragment HEX
121    : [0-9a-fA-F]
122    ;
123
124Space
125    : WS -> skip
126    ;
Note: See TracBrowser for help on using the repository browser.