highlight.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// can't use regex crate because we need to go byte by byte
// possibly regex-automata? there's a manually-walked DFA
//
// idea is:
//
// - if this could be the start of a keyword, try to match a keyword
// - if we got a keyword, accept it and start the next match 1 byte after
// - else, see if the current range matches the beginning of any rules
// - if yes, follow those rules until we only have 1 left. longest match wins
// - if no, advance by 1 byte and try again
//
// in general, we first try keywords because they override everything, then we build a set of rules
// that are currently possible and winnow them down until we have only 1 left, then we progress
// that one until we have the full match. we skip anything that doesn't match any rules.
//
// this implies that:
//
// - we only need a list of keywords; we keep advancing until nothing matches, then rewind 1 byte
// and take the one that does match
// - we need patterns that can be advanced 1 byte at a time
// - patterns need to support "could this slice be a start to this pattern?"
// - a region is basically the same thing as a pattern, but the "end" condition is explicit rather
// than just "nothing matches any more"
pub(crate) enum Rule {
Keyword(String),
Pattern(Regex),
Region(Regex, Regex, Regex),
UntilEOLRegion(Regex),
}
pub(crate) struct Highlighter<C: Fn(char) -> bool> {
classifier: C,
rules: Vec<Rule>,
source: Vec<u8>,
matches: Vec<usize>,
index: usize,
}
impl Highlighter {
pub(crate) fn new<C>(classifier: C, rules: Vec<Rule>, source: &str) -> Highlighter<C> {
let source = source.as_bytes().to_vec();
let index = 0;
Highlighter {
rules,
source,
index,
}
}
pub(crate) fn highlight(&mut self) -> String {
loop {
if self.is_done() {
break;
}
}
}
fn is_done(&self) -> bool {
self.index >= self.source.len()
}
}