Echo Writes Code

highlight.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// can't use regex crate because we need to go byte by byte
// possibly regex-automata? there's a manually-walked DFA
// 
// idea is:
//
// - if this could be the start of a keyword, try to match a keyword
// - if we got a keyword, accept it and start the next match 1 byte after
// - else, see if the current range matches the beginning of any rules
// - if yes, follow those rules until we only have 1 left. longest match wins
// - if no, advance by 1 byte and try again
//
// in general, we first try keywords because they override everything, then we build a set of rules
// that are currently possible and winnow them down until we have only 1 left, then we progress
// that one until we have the full match. we skip anything that doesn't match any rules.
//
// this implies that:
//
// - we only need a list of keywords; we keep advancing until nothing matches, then rewind 1 byte
//   and take the one that does match
// - we need patterns that can be advanced 1 byte at a time
// - patterns need to support "could this slice be a start to this pattern?"
// - a region is basically the same thing as a pattern, but the "end" condition is explicit rather
//   than just "nothing matches any more"



pub(crate) enum Rule {
	Keyword(String),
	Pattern(Regex),
	Region(Regex, Regex, Regex),
	UntilEOLRegion(Regex),
}

pub(crate) struct Highlighter<C: Fn(char) -> bool> {
	classifier: C,
	rules: Vec<Rule>,
	source: Vec<u8>,
	matches: Vec<usize>,
	index: usize,
}

impl Highlighter {
	pub(crate) fn new<C>(classifier: C, rules: Vec<Rule>, source: &str) -> Highlighter<C> {
		let source = source.as_bytes().to_vec();
		let index = 0;

		Highlighter {
			rules,
			source,
			index,
		}
	}

	pub(crate) fn highlight(&mut self) -> String {
		loop {
			if self.is_done() {
				break;
			}
		}
	}

	fn is_done(&self) -> bool {
		self.index >= self.source.len()
	}
}