highlight.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// can't use regex crate because we need to go byte by byte // possibly regex-automata? there's a manually-walked DFA // // idea is: // // - if this could be the start of a keyword, try to match a keyword // - if we got a keyword, accept it and start the next match 1 byte after // - else, see if the current range matches the beginning of any rules // - if yes, follow those rules until we only have 1 left. longest match wins // - if no, advance by 1 byte and try again // // in general, we first try keywords because they override everything, then we build a set of rules // that are currently possible and winnow them down until we have only 1 left, then we progress // that one until we have the full match. we skip anything that doesn't match any rules. // // this implies that: // // - we only need a list of keywords; we keep advancing until nothing matches, then rewind 1 byte // and take the one that does match // - we need patterns that can be advanced 1 byte at a time // - patterns need to support "could this slice be a start to this pattern?" // - a region is basically the same thing as a pattern, but the "end" condition is explicit rather // than just "nothing matches any more" pub(crate) enum Rule { Keyword(String), Pattern(Regex), Region(Regex, Regex, Regex), UntilEOLRegion(Regex), } pub(crate) struct Highlighter<C: Fn(char) -> bool> { classifier: C, rules: Vec<Rule>, source: Vec<u8>, matches: Vec<usize>, index: usize, } impl Highlighter { pub(crate) fn new<C>(classifier: C, rules: Vec<Rule>, source: &str) -> Highlighter<C> { let source = source.as_bytes().to_vec(); let index = 0; Highlighter { rules, source, index, } } pub(crate) fn highlight(&mut self) -> String { loop { if self.is_done() { break; } } } fn is_done(&self) -> bool { self.index >= self.source.len() } }