1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
|
local timer = require('cmp_buffer.timer')
local function clear_table(tbl)
for k in pairs(tbl) do
tbl[k] = nil
end
end
---@class cmp_buffer.Buffer
---@field public bufnr number
---@field public opts cmp_buffer.Options
---@field public regex any
---@field public timer cmp_buffer.Timer
---@field public lines_count number
---@field public timer_current_line number
---@field public lines_words table<number, false|string[]>
---@field public unique_words_curr_line table<string, boolean>
---@field public unique_words_other_lines table<string, boolean>
---@field public unique_words_curr_line_dirty boolean
---@field public unique_words_other_lines_dirty boolean
---@field public last_edit_first_line number
---@field public last_edit_last_line number
---@field public closed boolean
---@field public on_close_cb fun()|nil
---@field public words_distances table<string, number>
---@field public words_distances_last_cursor_row number
---@field public words_distances_dirty boolean
local buffer = {}
-- For some reason requesting this much lines multiple times in chunks leads to
-- much better memory usage than fetching the entire file in one go.
buffer.GET_LINES_CHUNK_SIZE = 1000
---Create new buffer object
---@param bufnr number
---@param opts cmp_buffer.Options
---@return cmp_buffer.Buffer
function buffer.new(bufnr, opts)
local self = setmetatable({}, { __index = buffer })
self.bufnr = bufnr
self.timer = timer.new()
self.closed = false
self.on_close_cb = nil
self.opts = opts
self.regex = vim.regex(self.opts.keyword_pattern)
self.lines_count = 0
self.timer_current_line = -1
self.lines_words = {}
self.unique_words_curr_line = {}
self.unique_words_other_lines = {}
self.unique_words_curr_line_dirty = true
self.unique_words_other_lines_dirty = true
self.last_edit_first_line = 0
self.last_edit_last_line = 0
self.words_distances = {}
self.words_distances_dirty = true
self.words_distances_last_cursor_row = 0
return self
end
---Close buffer
function buffer.close(self)
self.closed = true
self:stop_indexing_timer()
self.timer:close()
self.timer = nil
self.lines_count = 0
self.timer_current_line = -1
self.lines_words = {}
self.unique_words_curr_line = {}
self.unique_words_other_lines = {}
self.unique_words_curr_line_dirty = false
self.unique_words_other_lines_dirty = false
self.last_edit_first_line = 0
self.last_edit_last_line = 0
self.words_distances = {}
self.words_distances_dirty = false
self.words_distances_last_cursor_row = 0
if self.on_close_cb then
self.on_close_cb()
end
end
function buffer.stop_indexing_timer(self)
self.timer:stop()
self.timer_current_line = -1
end
function buffer.mark_all_lines_dirty(self)
self.unique_words_curr_line_dirty = true
self.unique_words_other_lines_dirty = true
self.last_edit_first_line = 0
self.last_edit_last_line = 0
self.words_distances_dirty = true
end
--- Workaround for https://github.com/neovim/neovim/issues/16729
function buffer.safe_buf_call(self, callback)
if vim.api.nvim_get_current_buf() == self.bufnr then
callback()
else
vim.api.nvim_buf_call(self.bufnr, callback)
end
end
function buffer.index_range(self, range_start, range_end, skip_already_indexed)
self:safe_buf_call(function()
local chunk_size = self.GET_LINES_CHUNK_SIZE
local chunk_start = range_start
while chunk_start < range_end do
local chunk_end = math.min(chunk_start + chunk_size, range_end)
local chunk_lines = vim.api.nvim_buf_get_lines(self.bufnr, chunk_start, chunk_end, true)
for i, line in ipairs(chunk_lines) do
if not skip_already_indexed or not self.lines_words[chunk_start + i] then
self:index_line(chunk_start + i, line)
end
end
chunk_start = chunk_end
end
end)
end
function buffer.start_indexing_timer(self)
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
self.timer_current_line = 0
-- Negative values result in an integer overflow in luv (vim.loop), and zero
-- disables timer repeat, so only intervals larger than 1 are valid.
local interval = math.max(1, self.opts.indexing_interval)
self.timer:start(0, interval, function()
if self.closed then
self:stop_indexing_timer()
return
end
-- Note that the async indexer is designed to not break even if the user is
-- editing the file while it is in the process of being indexed. Because
-- the indexing in watcher must use the synchronous algorithm, we assume
-- that the data already present in self.lines_words to be correct and
-- doesn't need refreshing here because even if we do receive text from
-- nvim_buf_get_lines different from what the watcher has seen so far, it
-- (the watcher) will catch up on the next on_lines event.
-- Skip over the already indexed lines
while self.lines_words[self.timer_current_line + 1] do
self.timer_current_line = self.timer_current_line + 1
end
local batch_start = self.timer_current_line
local batch_size = self.opts.indexing_batch_size
-- NOTE: self.lines_count may be modified by the indexer.
local batch_end = batch_size >= 1 and math.min(batch_start + batch_size, self.lines_count) or self.lines_count
if batch_end >= self.lines_count then
self:stop_indexing_timer()
end
self.timer_current_line = batch_end
self:mark_all_lines_dirty()
self:index_range(batch_start, batch_end, true)
end)
end
--- watch
function buffer.watch(self)
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
-- NOTE: As far as I know, indexing in watching can't be done asynchronously
-- because even built-in commands generate multiple consequent `on_lines`
-- events, and I'm not even mentioning plugins here. To get accurate results
-- we would have to either re-index the entire file on throttled events (slow
-- and looses the benefit of on_lines watching), or put the events in a
-- queue, which would complicate the plugin a lot. Plus, most changes which
-- trigger this event will be from regular editing, and so 99% of the time
-- they will affect only 1-2 lines.
vim.api.nvim_buf_attach(self.bufnr, false, {
-- NOTE: line indexes are 0-based and the last line is not inclusive.
on_lines = function(_, _, _, first_line, old_last_line, new_last_line, _, _, _)
if self.closed then
return true
end
if old_last_line == new_last_line and first_line == new_last_line then
-- This condition is really intended as a workaround for
-- https://github.com/hrsh7th/cmp-buffer/issues/28, but it will also
-- protect us from completely empty text edits.
return
end
local delta = new_last_line - old_last_line
local old_lines_count = self.lines_count
local new_lines_count = old_lines_count + delta
if new_lines_count == 0 then -- clear
-- This branch protects against bugs after full-file deletion. If you
-- do, for example, gdGG, the new_last_line of the event will be zero.
-- Which is not true, a buffer always contains at least one empty line,
-- only unloaded buffers contain zero lines.
new_lines_count = 1
for i = old_lines_count, 2, -1 do
self.lines_words[i] = nil
end
self.lines_words[1] = {}
elseif delta > 0 then -- append
-- Explicitly reserve more slots in the array part of the lines table,
-- all of them will be filled in the next loop, but in reverse order
-- (which is why I am concerned about preallocation). Why is there no
-- built-in function to do this in Lua???
for i = old_lines_count + 1, new_lines_count do
self.lines_words[i] = false
end
-- Move forwards the unchanged elements in the tail part.
for i = old_lines_count, old_last_line + 1, -1 do
self.lines_words[i + delta] = self.lines_words[i]
end
-- Fill in new tables for the added lines.
for i = old_last_line + 1, new_last_line do
self.lines_words[i] = {}
end
elseif delta < 0 then -- remove
-- Move backwards the unchanged elements in the tail part.
for i = old_last_line + 1, old_lines_count do
self.lines_words[i + delta] = self.lines_words[i]
end
-- Remove (already copied) tables from the end, in reverse order, so
-- that we don't make holes in the lines table.
for i = old_lines_count, new_lines_count + 1, -1 do
self.lines_words[i] = nil
end
end
self.lines_count = new_lines_count
-- This branch is support code for handling cases when the user is
-- editing the buffer while the async indexer is running. It solves the
-- problem that if new lines are inserted or old lines are deleted, the
-- indexes of each subsequent line will change, and so the indexer
-- current position must be adjusted to not accidentally skip any lines.
if self.timer:is_active() then
if first_line <= self.timer_current_line and self.timer_current_line < old_last_line then
-- The indexer was in the area of the current text edit. We will
-- synchronously index this area it in a moment, so the indexer
-- should resume from right after the edit range.
self.timer_current_line = new_last_line
elseif self.timer_current_line >= old_last_line then
-- The indexer was somewhere past the current text edit. This means
-- that the line numbers could have changed, and the indexing
-- position must be adjusted accordingly.
self.timer_current_line = self.timer_current_line + delta
end
end
if first_line == self.last_edit_first_line and old_last_line == self.last_edit_last_line and new_last_line == self.last_edit_last_line then
self.unique_words_curr_line_dirty = true
else
self.unique_words_curr_line_dirty = true
self.unique_words_other_lines_dirty = true
end
self.last_edit_first_line = first_line
self.last_edit_last_line = new_last_line
self.words_distances_dirty = true
-- replace lines
self:index_range(first_line, new_last_line)
end,
on_reload = function(_, _)
if self.closed then
return true
end
clear_table(self.lines_words)
self:stop_indexing_timer()
self:start_indexing_timer()
end,
on_detach = function(_, _)
if self.closed then
return true
end
self:close()
end,
})
end
---@param linenr number
---@param line string
function buffer.index_line(self, linenr, line)
local words = self.lines_words[linenr]
if not words then
words = {}
self.lines_words[linenr] = words
else
clear_table(words)
end
local word_i = 1
local remaining = line
-- The if statement checks the number of bytes in the line string, but slices
-- it on the number of characters. This is not a problem because the number
-- of characters is always equal to (if only ASCII characters are used) or
-- smaller than (if multibyte Unicode characters are used) the number of bytes.
-- In other words, if the line contains more characters than the max limit,
-- then it will always contain more bytes than the same limit.
-- This check is here because calling a Vimscript function is relatively slow.
if #remaining > self.opts.max_indexed_line_length then
remaining = vim.fn.strcharpart(line, 0, self.opts.max_indexed_line_length)
end
while #remaining > 0 do
-- NOTE: Both start and end indexes here are 0-based (unlike Lua strings),
-- and the end index is not inclusive.
local match_start, match_end = self.regex:match_str(remaining)
if match_start and match_end then
local word = remaining:sub(match_start + 1, match_end)
if #word >= self.opts.keyword_length then
words[word_i] = word
word_i = word_i + 1
end
remaining = remaining:sub(match_end + 1)
else
break
end
end
end
function buffer.get_words(self)
-- NOTE: unique_words are rebuilt on-demand because it is common for the
-- watcher callback to be fired VERY frequently, and a rebuild needs to go
-- over ALL lines, not just the changed ones.
if self.unique_words_other_lines_dirty then
clear_table(self.unique_words_other_lines)
self:rebuild_unique_words(self.unique_words_other_lines, 0, self.last_edit_first_line)
self:rebuild_unique_words(self.unique_words_other_lines, self.last_edit_last_line, self.lines_count)
self.unique_words_other_lines_dirty = false
end
if self.unique_words_curr_line_dirty then
clear_table(self.unique_words_curr_line)
self:rebuild_unique_words(self.unique_words_curr_line, self.last_edit_first_line, self.last_edit_last_line)
self.unique_words_curr_line_dirty = false
end
return { self.unique_words_other_lines, self.unique_words_curr_line }
end
--- rebuild_unique_words
function buffer.rebuild_unique_words(self, words_table, range_start, range_end)
for i = range_start + 1, range_end do
for _, w in ipairs(self.lines_words[i] or {}) do
words_table[w] = true
end
end
end
---@param cursor_row number
---@return table<string, number>
function buffer.get_words_distances(self, cursor_row)
if self.words_distances_dirty or cursor_row ~= self.words_distances_last_cursor_row then
local distances = self.words_distances
clear_table(distances)
for i = 1, self.lines_count do
for _, w in ipairs(self.lines_words[i] or {}) do
local dist = math.abs(cursor_row - i)
distances[w] = distances[w] and math.min(distances[w], dist) or dist
end
end
self.words_distances_last_cursor_row = cursor_row
self.words_distances_dirty = false
end
return self.words_distances
end
return buffer
|