source: FCS-QL/tags/FCS-QL-0.2/src/main/antlr4/eu/clarin/sru/fcs/qlparser/FCSLexer.g4 @ 7233

Last change on this file since 7233 was 7233, checked in by Oliver Schonefeld, 2 years ago
  • tag version 0.2
File size: 2.5 KB
Line 
1lexer grammar FCSLexer;
2
3/*
4 * Lexer part of parser for FCS Core FCS-QL version 2.0
5 * default mode
6 * 20150501- /ljo
7 */
8
9L_PAREN: '(';
10R_PAREN: ')';
11L_SQUARE_BRACKET: '[';
12R_SQUARE_BRACKET: ']';
13OR: '|';
14AND: '&';
15NOT: '!';
16FWD_SLASH: '/';
17L_CURLY_BRACKET: '{';
18R_CURLY_BRACKET: '}';
19Q_ONE_OR_MORE: '+';
20Q_ZERO_OR_MORE: '*';
21Q_ZERO_OR_ONE: '?';
22Q_COMMA: ',';
23OPERATOR_EQ: '=';
24OPERATOR_NE: '!=';
25COLON: ':';
26WITHIN: 'within';
27
28
29SIMPLE_WITHIN_SCOPE
30    : 'sentence'
31    | 's'
32    | 'utterance'
33    | 'u'
34    | 'paragraph'
35    | 'p'
36    | 'turn'
37    | 't'
38    | 'text'
39    | 'session'
40    ;
41
42
43REGEXP_FLAGS
44    : ( 'i'    /* case-insensitive; Poliqarp/Perl compat */
45      | 'I'    /* case-sensitive; Poliqarp compat */
46      | 'c'    /* case-insensitive, CQP compat */
47      | 'C'    /* case-sensitive */
48      | 'l'    /* literal matching, CQP compat*/
49      | 'd')+  /* diacritic agnostic matching, CQP compat */
50    ;
51
52
53fragment IDENTIFIER_FIRST_CHAR
54    : [a-zA-Z]
55    ;
56
57
58fragment IDENTIFIER_LAST_CHAR
59    : [a-zA-Z0-9]
60    ;
61
62
63fragment IDENTIFIER_CHAR
64    : [a-zA-Z0-9\-]
65    ;
66
67
68IDENTIFIER
69    : IDENTIFIER_FIRST_CHAR (IDENTIFIER_CHAR* IDENTIFIER_LAST_CHAR)?
70    ; 
71
72
73INTEGER
74    : [0-9]+
75    ;
76
77
78REGEXP
79    : QUOTED_STRING
80    ;
81
82
83/* // doesnt work
84QUOTED_STRING
85    : '\'' (CHAR | WS)*? '\''
86    | '"' (CHAR | WS)*? '"'
87    ;
88*/
89
90
91QUOTED_STRING
92    : '\'' (ESCAPED_CHAR | ~['\\])* '\''
93    | '"' (ESCAPED_CHAR | ~["\\])* '"'
94    ;
95
96
97fragment CHAR
98    : ESCAPED_CHAR
99    | ~('\u0009' | '\u000A' | '\u000B' | '\u000C' | '\u000D' | '\u0020' | '\u0085'
100        | '\u00A0' | '\u1680' | '\u2000' | '\u2001' | '\u2002' | '\u2003' | '\u2004'
101        | '\u2005' | '\u2006' | '\u2007' | '\u2008' | '\u2009' | '\u200A' | '\u2028'
102        | '\u2029' | '\u202F' | '\u205F' | '\u3000' ) //anything but white space
103       
104    ;
105
106
107/* any unicode whitespace */
108fragment WS: '\u0009' | '\u000A' | '\u000B' | '\u000C' | '\u000D' | '\u0020' | '\u0085'
109  | '\u00A0' | '\u1680' | '\u2000' | '\u2001' | '\u2002' | '\u2003' | '\u2004'
110  | '\u2005' | '\u2006' | '\u2007' | '\u2008' | '\u2009' | '\u200A' | '\u2028'
111  | '\u2029' | '\u202F' | '\u205F' | '\u3000'
112  ;
113
114
115fragment ESCAPED_CHAR
116    : '\\'
117       ( '\\'
118        | '\''
119        | '"'
120        | 'n'
121        | 't'
122        | '.'
123        | '^'
124        | '$'
125        | '*'
126        | '+'
127        | '?'
128        | '('
129        | ')'
130        | '{'
131        | '['
132        | '|'
133        | 'x' HEX HEX
134        | 'u' HEX HEX HEX HEX
135        | 'U' HEX HEX HEX HEX HEX HEX HEX HEX
136      )
137    ;
138
139
140fragment HEX
141    : [0-9a-fA-F]
142    ;
143
144
145Space
146    : WS -> skip
147    ;
Note: See TracBrowser for help on using the repository browser.