1use std::iter::once;
6use std::ops::Range;
7
8use malloc_size_of_derive::MallocSizeOf;
9use rayon::iter::Either;
10use unicode_segmentation::UnicodeSegmentation;
11
12use crate::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
13
14#[derive(Clone, Copy, MallocSizeOf)]
15pub enum Lines {
16 Single,
17 Multiple,
18}
19
20impl Lines {
21 fn contents_vec(&self, contents: impl Into<String>) -> Vec<String> {
22 match self {
23 Self::Multiple => {
24 let mut contents: Vec<_> = contents
26 .into()
27 .replace("\r\n", "\n")
28 .split(['\n', '\r'])
29 .map(|line| format!("{line}\n"))
30 .collect();
31 if let Some(last_line) = contents.last_mut() {
33 last_line.truncate(last_line.len() - 1);
34 }
35 contents
36 },
37 Self::Single => {
38 vec![contents.into()]
39 },
40 }
41 }
42}
43
44pub enum RopeMovement {
46 Grapheme,
47 Word,
48 Line,
49 LineStartOrEnd,
50 RopeStartOrEnd,
51}
52
53#[derive(MallocSizeOf)]
58pub struct Rope {
59 lines: Vec<String>,
62 mode: Lines,
67}
68
69impl Rope {
70 pub fn new(contents: impl Into<String>, mode: Lines) -> Self {
71 Self {
72 lines: mode.contents_vec(contents),
73 mode,
74 }
75 }
76
77 pub fn mode(&self) -> Lines {
78 self.mode
79 }
80
81 pub fn contents(&self) -> String {
82 self.lines.join("")
83 }
84
85 pub fn last_index(&self) -> RopeIndex {
86 let line_index = self.lines.len() - 1;
87 RopeIndex::new(line_index, self.line(line_index).len())
88 }
89
90 pub fn replace_range(
93 &mut self,
94 mut range: Range<RopeIndex>,
95 string: impl Into<String>,
96 ) -> RopeIndex {
97 range.start = self.clamp_index(range.start);
98 range.end = self.clamp_index(range.end);
99 assert!(range.start <= range.end);
100
101 let start_index = range.start;
102 self.delete_range(range);
103
104 let mut new_contents = self.mode.contents_vec(string);
105 let Some(first_line_of_new_contents) = new_contents.first() else {
106 return start_index;
107 };
108
109 if new_contents.len() == 1 {
110 self.line_for_index_mut(start_index)
111 .insert_str(start_index.code_point, first_line_of_new_contents);
112 return RopeIndex::new(
113 start_index.line,
114 start_index.code_point + first_line_of_new_contents.len(),
115 );
116 }
117
118 let start_line = self.line_for_index_mut(start_index);
119 let last_line = new_contents.last().expect("Should have at least one line");
120 let last_index = RopeIndex::new(
121 start_index.line + new_contents.len().saturating_sub(1),
122 last_line.len(),
123 );
124
125 let remaining_string = start_line.split_off(start_index.code_point);
126 start_line.push_str(first_line_of_new_contents);
127 new_contents
128 .last_mut()
129 .expect("Should have at least one line")
130 .push_str(&remaining_string);
131
132 let splice_index = start_index.line + 1;
133 self.lines
134 .splice(splice_index..splice_index, new_contents.into_iter().skip(1));
135 last_index
136 }
137
138 fn delete_range(&mut self, mut range: Range<RopeIndex>) {
139 range.start = self.clamp_index(range.start);
140 range.end = self.clamp_index(range.end);
141 assert!(range.start <= range.end);
142
143 if range.start.line == range.end.line {
144 self.line_for_index_mut(range.start)
145 .replace_range(range.start.code_point..range.end.code_point, "");
146 return;
147 }
148
149 let removed_lines = self.lines.splice(range.start.line..range.end.line, []);
151 let first_line = removed_lines
152 .into_iter()
153 .nth(0)
154 .expect("Should have removed at least one line");
155
156 let first_line_prefix = &first_line[0..range.start.code_point];
157 let new_end_line = range.start.line;
158 self.lines[new_end_line].replace_range(0..range.end.code_point, first_line_prefix);
159 }
160
161 pub fn slice<'a>(&'a self, start: Option<RopeIndex>, end: Option<RopeIndex>) -> RopeSlice<'a> {
164 RopeSlice {
165 rope: self,
166 start: start.unwrap_or_default(),
167 end: end.unwrap_or_else(|| self.last_index()),
168 }
169 }
170
171 pub fn chars<'a>(&'a self) -> RopeChars<'a> {
172 self.slice(None, None).chars()
173 }
174
175 pub fn is_empty(&self) -> bool {
178 self.lines.first().is_none_or(String::is_empty)
179 }
180
181 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
183 Utf16CodeUnitLength(self.chars().map(char::len_utf16).sum())
184 }
185
186 fn line(&self, index: usize) -> &str {
187 &self.lines[index]
188 }
189
190 fn line_for_index(&self, index: RopeIndex) -> &String {
191 &self.lines[index.line]
192 }
193
194 fn line_for_index_mut(&mut self, index: RopeIndex) -> &mut String {
195 &mut self.lines[index.line]
196 }
197
198 fn last_index_in_line(&self, line: usize) -> RopeIndex {
199 if line >= self.lines.len() - 1 {
200 return self.last_index();
201 }
202 RopeIndex {
203 line,
204 code_point: self.line(line).len() - 1,
205 }
206 }
207
208 fn start_of_following_line(&self, index: RopeIndex) -> RopeIndex {
212 if index.line >= self.lines.len() - 1 {
213 return self.last_index();
214 }
215 RopeIndex::new(index.line + 1, 0)
216 }
217
218 fn end_of_preceding_line(&self, index: RopeIndex) -> RopeIndex {
222 if index.line == 0 {
223 return Default::default();
224 }
225 let line_index = index.line.saturating_sub(1);
226 RopeIndex::new(line_index, self.line(line_index).len())
227 }
228
229 pub fn move_by(&self, origin: RopeIndex, unit: RopeMovement, amount: isize) -> RopeIndex {
230 if amount == 0 {
231 return origin;
232 }
233
234 match unit {
235 RopeMovement::Grapheme | RopeMovement::Word => {
236 self.move_by_iterator(origin, unit, amount)
237 },
238 RopeMovement::Line => self.move_by_lines(origin, amount),
239 RopeMovement::LineStartOrEnd => {
240 if amount >= 0 {
241 self.last_index_in_line(origin.line)
242 } else {
243 RopeIndex::new(origin.line, 0)
244 }
245 },
246 RopeMovement::RopeStartOrEnd => {
247 if amount >= 0 {
248 self.last_index()
249 } else {
250 Default::default()
251 }
252 },
253 }
254 }
255
256 fn move_by_lines(&self, origin: RopeIndex, lines_to_move: isize) -> RopeIndex {
257 let new_line_index = (origin.line as isize) + lines_to_move;
258 if new_line_index < 0 {
259 return Default::default();
260 }
261 if new_line_index > (self.lines.len() - 1) as isize {
262 return self.last_index();
263 }
264
265 let new_line_index = new_line_index.unsigned_abs();
266 let char_count = self.line(origin.line)[0..origin.code_point].chars().count();
267 let new_code_point_index = self
268 .line(new_line_index)
269 .char_indices()
270 .take(char_count)
271 .last()
272 .map(|(byte_index, character)| byte_index + character.len_utf8())
273 .unwrap_or_default();
274 RopeIndex::new(new_line_index, new_code_point_index)
275 .min(self.last_index_in_line(new_line_index))
276 }
277
278 fn move_by_iterator(&self, origin: RopeIndex, unit: RopeMovement, amount: isize) -> RopeIndex {
279 assert_ne!(amount, 0);
280 let (boundary_value, slice) = if amount > 0 {
281 (self.last_index(), self.slice(Some(origin), None))
282 } else {
283 (RopeIndex::default(), self.slice(None, Some(origin)))
284 };
285
286 let iterator = match unit {
287 RopeMovement::Grapheme => slice.grapheme_indices(),
288 RopeMovement::Word => slice.word_indices(),
289 _ => unreachable!("Should not be called for other movement types"),
290 };
291 let iterator = if amount > 0 {
292 Either::Left(iterator)
293 } else {
294 Either::Right(iterator.rev())
295 };
296
297 let mut iterations = amount.unsigned_abs();
298 for (mut index, _) in iterator {
299 iterations = iterations.saturating_sub(1);
300 if iterations == 0 {
301 if index.code_point >= self.line_for_index(index).len() {
304 index = self.start_of_following_line(index);
305 }
306 return index;
307 }
308 }
309
310 boundary_value
311 }
312
313 pub fn clamp_index(&self, rope_index: RopeIndex) -> RopeIndex {
316 let last_line = self.lines.len().saturating_sub(1);
317 let line_index = rope_index.line.min(last_line);
318
319 let line_length_utf8 = if line_index == last_line {
327 self.lines[line_index].len()
328 } else {
329 self.lines[line_index].len() - 1
330 };
331
332 RopeIndex::new(line_index, rope_index.code_point.min(line_length_utf8))
333 }
334
335 pub fn index_to_utf8_offset(&self, rope_index: RopeIndex) -> Utf8CodeUnitLength {
337 let rope_index = self.clamp_index(rope_index);
338 Utf8CodeUnitLength(
339 self.lines
340 .iter()
341 .take(rope_index.line)
342 .map(String::len)
343 .sum::<usize>() +
344 rope_index.code_point,
345 )
346 }
347
348 pub fn index_to_utf16_offset(&self, rope_index: RopeIndex) -> Utf16CodeUnitLength {
349 let rope_index = self.clamp_index(rope_index);
350 let final_line = self.line(rope_index.line);
351
352 let final_line_offset = Utf16CodeUnitLength(
354 final_line[0..rope_index.code_point]
355 .chars()
356 .map(char::len_utf16)
357 .sum(),
358 );
359
360 self.lines
361 .iter()
362 .take(rope_index.line)
363 .map(|line| Utf16CodeUnitLength(line.chars().map(char::len_utf16).sum()))
364 .sum::<Utf16CodeUnitLength>() +
365 final_line_offset
366 }
367
368 pub fn utf8_offset_to_rope_index(&self, utf8_offset: Utf8CodeUnitLength) -> RopeIndex {
370 let mut current_utf8_offset = utf8_offset.0;
371 for (line_index, line) in self.lines.iter().enumerate() {
372 if current_utf8_offset == 0 || current_utf8_offset < line.len() {
373 return RopeIndex::new(line_index, current_utf8_offset);
374 }
375 current_utf8_offset -= line.len();
376 }
377 self.last_index()
378 }
379
380 pub fn utf16_offset_to_utf8_offset(
381 &self,
382 utf16_offset: Utf16CodeUnitLength,
383 ) -> Utf8CodeUnitLength {
384 let mut current_utf16_offset = Utf16CodeUnitLength::zero();
385 let mut current_utf8_offset = Utf8CodeUnitLength::zero();
386
387 for character in self.chars() {
388 let utf16_length = character.len_utf16();
389 if current_utf16_offset + Utf16CodeUnitLength(utf16_length) > utf16_offset {
390 return current_utf8_offset;
391 }
392 current_utf8_offset += Utf8CodeUnitLength(character.len_utf8());
393 current_utf16_offset += Utf16CodeUnitLength(utf16_length);
394 }
395 current_utf8_offset
396 }
397}
398
399#[derive(Clone, Copy, Debug, Default, Eq, MallocSizeOf, PartialEq, PartialOrd, Ord)]
408pub struct RopeIndex {
409 pub line: usize,
411 pub code_point: usize,
417}
418
419impl RopeIndex {
420 pub fn new(line: usize, code_point: usize) -> Self {
421 Self { line, code_point }
422 }
423}
424
425pub struct RopeSlice<'a> {
428 rope: &'a Rope,
430 start: RopeIndex,
432 end: RopeIndex,
434}
435
436impl From<RopeSlice<'_>> for String {
437 fn from(slice: RopeSlice<'_>) -> Self {
438 if slice.start.line == slice.end.line {
439 slice.rope.line_for_index(slice.start)[slice.start.code_point..slice.end.code_point]
440 .into()
441 } else {
442 once(&slice.rope.line_for_index(slice.start)[slice.start.code_point..])
443 .chain(
444 (slice.start.line + 1..slice.end.line)
445 .map(|line_index| slice.rope.line(line_index)),
446 )
447 .chain(once(
448 &slice.rope.line_for_index(slice.end)[..slice.end.code_point],
449 ))
450 .collect()
451 }
452 }
453}
454
455impl<'a> RopeSlice<'a> {
456 pub fn chars(self) -> RopeChars<'a> {
457 RopeChars {
458 movement_iterator: RopeMovementIterator {
459 slice: self,
460 end_of_forward_motion: |_, string| {
461 let (offset, character) = string.char_indices().next()?;
462 Some((offset + character.len_utf8(), character))
463 },
464 start_of_backward_motion: |_, string: &str| string.char_indices().next_back(),
465 },
466 }
467 }
468
469 fn grapheme_indices(self) -> RopeMovementIterator<'a, &'a str> {
470 RopeMovementIterator {
471 slice: self,
472 end_of_forward_motion: |_, string| {
473 let (offset, grapheme) = string.grapheme_indices(true).next()?;
474 Some((offset + grapheme.len(), grapheme))
475 },
476 start_of_backward_motion: |_, string| string.grapheme_indices(true).next_back(),
477 }
478 }
479
480 fn word_indices(self) -> RopeMovementIterator<'a, &'a str> {
481 RopeMovementIterator {
482 slice: self,
483 end_of_forward_motion: |_, string| {
484 let (offset, word) = string.unicode_word_indices().next()?;
485 Some((offset + word.len(), word))
486 },
487 start_of_backward_motion: |_, string| string.unicode_word_indices().next_back(),
488 }
489 }
490}
491
492struct RopeMovementIterator<'a, T> {
498 slice: RopeSlice<'a>,
499 end_of_forward_motion: fn(&RopeSlice, &'a str) -> Option<(usize, T)>,
500 start_of_backward_motion: fn(&RopeSlice, &'a str) -> Option<(usize, T)>,
501}
502
503impl<T> Iterator for RopeMovementIterator<'_, T> {
504 type Item = (RopeIndex, T);
505
506 fn next(&mut self) -> Option<Self::Item> {
507 if self.slice.start >= self.slice.end {
509 return None;
510 }
511
512 assert!(self.slice.start.line < self.slice.rope.lines.len());
513 let line = self.slice.rope.line_for_index(self.slice.start);
514
515 if self.slice.start.code_point < line.len() + 1 {
516 if let Some((end_offset, value)) =
517 (self.end_of_forward_motion)(&self.slice, &line[self.slice.start.code_point..])
518 {
519 self.slice.start.code_point += end_offset;
520 return Some((self.slice.start, value));
521 }
522 }
523
524 self.slice.start = self.slice.rope.start_of_following_line(self.slice.start);
526 self.next()
527 }
528}
529
530impl<T> DoubleEndedIterator for RopeMovementIterator<'_, T> {
531 fn next_back(&mut self) -> Option<Self::Item> {
532 if self.slice.end <= self.slice.start {
534 return None;
535 }
536
537 let line = self.slice.rope.line_for_index(self.slice.end);
538 if self.slice.end.code_point > 0 {
539 if let Some((new_start_index, value)) =
540 (self.start_of_backward_motion)(&self.slice, &line[..self.slice.end.code_point])
541 {
542 self.slice.end.code_point = new_start_index;
543 return Some((self.slice.end, value));
544 }
545 }
546
547 self.slice.end = self.slice.rope.end_of_preceding_line(self.slice.end);
549 self.next_back()
550 }
551}
552
553pub struct RopeChars<'a> {
555 movement_iterator: RopeMovementIterator<'a, char>,
556}
557
558impl Iterator for RopeChars<'_> {
559 type Item = char;
560 fn next(&mut self) -> Option<Self::Item> {
561 Some(self.movement_iterator.next()?.1)
562 }
563}
564
565impl DoubleEndedIterator for RopeChars<'_> {
566 fn next_back(&mut self) -> Option<Self::Item> {
567 Some(self.movement_iterator.next_back()?.1)
568 }
569}
570
571#[test]
572fn test_rope_index_conversion_to_utf8_offset() {
573 let rope = Rope::new("A\nBB\nCCC\nDDDD", Lines::Multiple);
574 assert_eq!(
575 rope.index_to_utf8_offset(RopeIndex::new(0, 0)),
576 Utf8CodeUnitLength(0),
577 );
578 assert_eq!(
579 rope.index_to_utf8_offset(RopeIndex::new(0, 1)),
580 Utf8CodeUnitLength(1),
581 );
582 assert_eq!(
583 rope.index_to_utf8_offset(RopeIndex::new(0, 10)),
584 Utf8CodeUnitLength(1),
585 "RopeIndex with offset past the end of the line should return final offset in line",
586 );
587 assert_eq!(
588 rope.index_to_utf8_offset(RopeIndex::new(1, 0)),
589 Utf8CodeUnitLength(2),
590 );
591 assert_eq!(
592 rope.index_to_utf8_offset(RopeIndex::new(1, 2)),
593 Utf8CodeUnitLength(4),
594 );
595
596 assert_eq!(
597 rope.index_to_utf8_offset(RopeIndex::new(3, 0)),
598 Utf8CodeUnitLength(9),
599 );
600 assert_eq!(
601 rope.index_to_utf8_offset(RopeIndex::new(3, 3)),
602 Utf8CodeUnitLength(12),
603 );
604 assert_eq!(
605 rope.index_to_utf8_offset(RopeIndex::new(3, 4)),
606 Utf8CodeUnitLength(13),
607 "There should be no newline at the end of the TextInput",
608 );
609 assert_eq!(
610 rope.index_to_utf8_offset(RopeIndex::new(3, 40)),
611 Utf8CodeUnitLength(13),
612 "There should be no newline at the end of the TextInput",
613 );
614}
615
616#[test]
617fn test_rope_index_conversion_to_utf16_offset() {
618 let rope = Rope::new("A\nBB\nCCC\n家家", Lines::Multiple);
619 assert_eq!(
620 rope.index_to_utf16_offset(RopeIndex::new(0, 0)),
621 Utf16CodeUnitLength(0),
622 );
623 assert_eq!(
624 rope.index_to_utf16_offset(RopeIndex::new(0, 1)),
625 Utf16CodeUnitLength(1),
626 );
627 assert_eq!(
628 rope.index_to_utf16_offset(RopeIndex::new(0, 10)),
629 Utf16CodeUnitLength(1),
630 "RopeIndex with offset past the end of the line should return final offset in line",
631 );
632 assert_eq!(
633 rope.index_to_utf16_offset(RopeIndex::new(3, 0)),
634 Utf16CodeUnitLength(9),
635 );
636
637 assert_eq!(
638 rope.index_to_utf16_offset(RopeIndex::new(3, 3)),
639 Utf16CodeUnitLength(10),
640 "3 code unit UTF-8 encodede character"
641 );
642 assert_eq!(
643 rope.index_to_utf16_offset(RopeIndex::new(3, 6)),
644 Utf16CodeUnitLength(11),
645 );
646 assert_eq!(
647 rope.index_to_utf16_offset(RopeIndex::new(3, 20)),
648 Utf16CodeUnitLength(11),
649 );
650}
651
652#[test]
653fn test_utf16_offset_to_utf8_offset() {
654 let rope = Rope::new("A\nBB\nCCC\n家家", Lines::Multiple);
655 assert_eq!(
656 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(0)),
657 Utf8CodeUnitLength(0),
658 );
659 assert_eq!(
660 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(1)),
661 Utf8CodeUnitLength(1),
662 );
663 assert_eq!(
664 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(2)),
665 Utf8CodeUnitLength(2),
666 "Offset past the end of the line",
667 );
668 assert_eq!(
669 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(9)),
670 Utf8CodeUnitLength(9),
671 );
672
673 assert_eq!(
674 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(10)),
675 Utf8CodeUnitLength(12),
676 "3 code unit UTF-8 encodede character"
677 );
678 assert_eq!(
679 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(11)),
680 Utf8CodeUnitLength(15),
681 );
682 assert_eq!(
683 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(300)),
684 Utf8CodeUnitLength(15),
685 );
686}
687
688#[test]
689fn test_rope_delete_slice() {
690 let mut rope = Rope::new("ABC\nDEF\n", Lines::Multiple);
691 rope.delete_range(RopeIndex::new(0, 1)..RopeIndex::new(0, 2));
692 assert_eq!(rope.contents(), "AC\nDEF\n");
693
694 let mut rope = Rope::new("ABC\nDEF\n", Lines::Multiple);
697 rope.delete_range(RopeIndex::new(0, 3)..RopeIndex::new(0, 4));
698 assert_eq!(rope.lines, ["ABC\n", "DEF\n", ""]);
699
700 let mut rope = Rope::new("ABC\nDEF\n", Lines::Multiple);
701 rope.delete_range(RopeIndex::new(0, 0)..RopeIndex::new(0, 4));
702 assert_eq!(rope.lines, ["\n", "DEF\n", ""]);
703
704 let mut rope = Rope::new("A\nBB\nCCC", Lines::Multiple);
705 rope.delete_range(RopeIndex::new(0, 2)..RopeIndex::new(1, 0));
706 assert_eq!(rope.lines, ["ABB\n", "CCC"]);
707}
708
709#[test]
710fn test_rope_replace_slice() {
711 let mut rope = Rope::new("AAA\nBBB\nCCC", Lines::Multiple);
712 rope.replace_range(RopeIndex::new(0, 1)..RopeIndex::new(0, 2), "x");
713 assert_eq!(rope.contents(), "AxA\nBBB\nCCC",);
714
715 let mut rope = Rope::new("A\nBB\nCCC", Lines::Multiple);
716 rope.replace_range(RopeIndex::new(0, 2)..RopeIndex::new(1, 0), "D");
717 assert_eq!(rope.lines, ["ADBB\n", "CCC"]);
718
719 let mut rope = Rope::new("AAA\nBBB\nCCC\nDDD", Lines::Multiple);
720 rope.replace_range(RopeIndex::new(0, 2)..RopeIndex::new(2, 1), "x");
721 assert_eq!(rope.lines, ["AAxCC\n", "DDD"]);
722}