From: Yuan Fu Date: Mon, 9 Sep 2024 00:50:52 +0000 (-0700) Subject: Fix the range handling in treesit.c X-Git-Tag: archive/raspbian/1%30.1+1-3+rpi1^2~2^2~20^2~448 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=3435464452b4947098b8ccbe93e5c0afdd2ed06e;p=emacs.git Fix the range handling in treesit.c 1. In treesit_sync_visible_region, reduce the ranges for a parser so it doesn't go beyond the visible range. 2. To avoid possible infinite recursion, add a within_reparse field to parsers. Previously we were using the need_reparse field to avoid infinite recursion, but lisp programs in a parser's after change hook might make some buffer edit which turns need_reparse to true. To avoid that, we now use an explicit field. If a parser's after change function makes a buffer edit, lisp program ends up with a desynced parse tree, but that's better than possible infinite recursion. Also after change function shouldn't edit the buffer. 3. In treesit_make_ranges, use parser's visible_beg instead of buffer's BEGV. I mean technically whenever we make ranges, buffer's BEGV should be equal to parser's visible_beg, but better not take that uncertainty, also makes the code more readable. 4. In Ftreesit_parser_included_ranges, move visible region sync code before the body of the function. * src/treesit.c (treesit_sync_visible_region): Minimally fix ranges so it doesn't exceed parser's visible range. (treesit_call_after_change_functions): Update calling sigature to treesit_make_ranges. (treesit_ensure_parsed, make_treesit_parser): Use the new field within_reparse. (treesit_make_ranges): Use parser's visible_beg instead of buffer's BEGV. (Ftreesit_parser_included_ranges): Move visible region check before function body. * src/treesit.h (Lisp_TS_Parser): Add new field within_reparse. --- diff --git a/src/treesit.c b/src/treesit.c index 351bd65819a..970754f3c1b 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1045,6 +1045,48 @@ treesit_sync_visible_region (Lisp_Object parser) XTS_PARSER (parser)->visible_beg = visible_beg; XTS_PARSER (parser)->visible_end = visible_end; + + /* Fix ranges so that the ranges stays with in visible_end. Here we + try to do minimal work so that the ranges is minimally correct such + that there's no OOB error. Usually treesit-update-ranges should + update the parser with actually correct ranges. */ + if (NILP (XTS_PARSER (parser)->last_set_ranges)) return; + uint32_t len; + const TSRange *ranges + = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len); + /* We might need to discard some ranges that exceeds visible_end, in + that case, new_len is the length of the new ranges array (which + will be shorter than len). */ + uint32_t new_len = 0; + uint32_t new_end = 0; + for (int idx = 0; idx < len; idx++) + { + TSRange range = ranges[idx]; + /* If this range starts after visible_end, we don't include this + range and the ranges after it in the new ranges. */ + if (range.start_byte + visible_beg >= visible_end) + break; + /* If this range's end is after visible_end, we don't include any + ranges after it, and changes the end of this range to + visible_end. */ + if (range.end_byte + visible_beg > visible_end) + { + new_end = visible_end - visible_beg; + new_len++; + break; + } + new_len++; + } + if (new_len != len || new_end != 0) + { + TSRange *new_ranges = xmalloc (sizeof (TSRange) * new_len); + memcpy (new_ranges, ranges, sizeof (TSRange) * new_len); + new_ranges[new_len - 1].end_byte = new_end; + /* TODO: What should we do if this fails? */ + ts_parser_set_included_ranges (XTS_PARSER (parser)->parser, + new_ranges, new_len); + xfree (new_ranges); + } } static void @@ -1057,7 +1099,8 @@ treesit_check_buffer_size (struct buffer *buffer) make_fixnum (buffer_size_bytes)); } -static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct buffer *); +static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, + Lisp_Object, struct buffer *); static void treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, @@ -1071,7 +1114,7 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, { uint32_t len; TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len); - lisp_ranges = treesit_make_ranges (ranges, len, buf); + lisp_ranges = treesit_make_ranges (ranges, len, parser, buf); xfree (ranges); } else @@ -1098,6 +1141,9 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, static void treesit_ensure_parsed (Lisp_Object parser) { + if (XTS_PARSER (parser)->within_reparse) return; + XTS_PARSER (parser)->within_reparse = true; + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); /* Before we parse, catch up with the narrowing situation. */ @@ -1106,10 +1152,11 @@ treesit_ensure_parsed (Lisp_Object parser) because it might set the flag to true. */ treesit_sync_visible_region (parser); - /* Make sure this comes before everything else, see comment - (ref:notifier-inside-ensure-parsed) for more detail. */ if (!XTS_PARSER (parser)->need_reparse) - return; + { + XTS_PARSER (parser)->within_reparse = false; + return; + } TSParser *treesit_parser = XTS_PARSER (parser)->parser; TSTree *tree = XTS_PARSER (parser)->tree; @@ -1134,14 +1181,10 @@ treesit_ensure_parsed (Lisp_Object parser) XTS_PARSER (parser)->need_reparse = false; XTS_PARSER (parser)->timestamp++; - /* After-change functions should run at the very end, most crucially - after need_reparse is set to false, this way if the function - calls some tree-sitter function which invokes - treesit_ensure_parsed again, it returns early and do not - recursively call the after change functions again. - (ref:notifier-inside-ensure-parsed) */ treesit_call_after_change_functions (tree, new_tree, parser); ts_tree_delete (tree); + + XTS_PARSER (parser)->within_reparse = false; } /* This is the read function provided to tree-sitter to read from a @@ -1224,6 +1267,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser, lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer)); lisp_parser->timestamp = 0; lisp_parser->deleted = false; + lisp_parser->within_reparse = false; eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); } @@ -1672,14 +1716,14 @@ treesit_check_range_argument (Lisp_Object ranges) convert between tree-sitter buffer offset and buffer position. */ static Lisp_Object treesit_make_ranges (const TSRange *ranges, uint32_t len, - struct buffer *buffer) + Lisp_Object parser, struct buffer *buffer) { Lisp_Object list = Qnil; for (int idx = 0; idx < len; idx++) { TSRange range = ranges[idx]; - uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer); - uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer); + uint32_t beg_byte = range.start_byte + XTS_PARSER (parser)->visible_beg; + uint32_t end_byte = range.end_byte + XTS_PARSER (parser)->visible_beg; eassert (BUF_BEGV_BYTE (buffer) <= beg_byte); eassert (beg_byte <= end_byte); eassert (end_byte <= BUF_ZV_BYTE (buffer)); @@ -1725,11 +1769,9 @@ buffer. */) if (NILP (ranges)) { /* If RANGES is nil, make parser to parse the whole document. - To do that we give tree-sitter a 0 length, the range is a - dummy. */ - TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0}; + To do that we give tree-sitter a 0 length. */ success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser, - &treesit_range , 0); + NULL , 0); } else { @@ -1742,7 +1784,6 @@ buffer. */) /* We can use XFIXNUM, XCAR, XCDR freely because we have checked the input by treesit_check_range_argument. */ - for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges)) { Lisp_Object range = XCAR (ranges); @@ -1787,6 +1828,10 @@ See also `treesit-parser-set-included-ranges'. */) treesit_check_parser (parser); treesit_initialize (); + /* Our return value depends on the buffer state (BUF_BEGV_BYTE, + etc), so we need to sync up. */ + treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer)); + treesit_sync_visible_region (parser); /* When the parser doesn't have a range set and we call ts_parser_included_ranges on it, it doesn't return an empty list, but rather return DEFAULT_RANGE. (A single range where start_byte @@ -1799,13 +1844,10 @@ See also `treesit-parser-set-included-ranges'. */) const TSRange *ranges = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len); - /* Our return value depends on the buffer state (BUF_BEGV_BYTE, - etc), so we need to sync up. */ - treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer)); - treesit_sync_visible_region (parser); - struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); - return treesit_make_ranges (ranges, len, buffer); + + + return treesit_make_ranges (ranges, len, parser, buffer); } DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers, diff --git a/src/treesit.h b/src/treesit.h index d3c6aa4c250..f8227a64b0a 100644 --- a/src/treesit.h +++ b/src/treesit.h @@ -45,9 +45,12 @@ struct Lisp_TS_Parser same tag. A tag is primarily used to differentiate between parsers for the same language. */ Lisp_Object tag; - /* The Lisp ranges last set. This is use to compare to the new - ranges the users wants to set, and avoid reparse if the new - ranges is the same as the last set one. */ + /* The Lisp ranges last set. This is use to compare to the new ranges + the users wants to set, and avoid reparse if the new ranges is the + same as the last set one. This might go out of sync with the + ranges we return from Ftreesit_parser_included_ranges, if we did a + ranges fix in treesit_sync_visible_region, but I don't think + that'll cause any harm. */ Lisp_Object last_set_ranges; /* The buffer associated with this parser. */ Lisp_Object buffer; @@ -82,6 +85,10 @@ struct Lisp_TS_Parser /* If this field is true, parser functions raises treesit-parser-deleted signal. */ bool deleted; + /* This field is set to true when treesit_ensure_parsed runs, to + prevent infinite recursion due to calling after change + functions. */ + bool within_reparse; }; /* A wrapper around a tree-sitter node. */