-
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Expand file tree
/
Copy pathEntityVectorGenerator.php
More file actions
89 lines (74 loc) · 2.58 KB
/
EntityVectorGenerator.php
File metadata and controls
89 lines (74 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
<?php
declare(strict_types=1);
namespace BookStack\Search\Queries;
use BookStack\Activity\Models\Tag;
use BookStack\Entities\Models\Entity;
use BookStack\Search\Queries\Services\VectorQueryService;
use Illuminate\Support\Facades\DB;
class EntityVectorGenerator
{
public function __construct(
protected VectorQueryServiceProvider $vectorQueryServiceProvider
) {
}
public function generateAndStore(Entity $entity): void
{
$vectorService = $this->vectorQueryServiceProvider->get();
$text = $this->entityToPlainText($entity);
$chunks = $this->chunkText($text);
$embeddings = $this->chunksToEmbeddings($chunks, $vectorService);
$this->deleteExistingEmbeddingsForEntity($entity);
$this->storeEmbeddings($embeddings, $chunks, $entity);
}
protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
{
SearchVector::query()
->where('entity_type', '=', $entity->getMorphClass())
->where('entity_id', '=', $entity->id)
->delete();
}
protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
{
$toInsert = [];
foreach ($embeddings as $index => $embedding) {
$text = $textChunks[$index];
$toInsert[] = [
'entity_id' => $entity->id,
'entity_type' => $entity->getMorphClass(),
'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
'text' => $text,
];
}
$chunks = array_chunk($toInsert, 500);
foreach ($chunks as $chunk) {
SearchVector::query()->insert($chunk);
}
}
/**
* @param string[] $chunks
* @return float[] array
*/
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
{
$embeddings = [];
foreach ($chunks as $index => $chunk) {
$embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
}
return $embeddings;
}
/**
* @return string[]
*/
protected function chunkText(string $text): array
{
return (new TextChunker(500, ["\n", '.', ' ', '']))->chunk($text);
}
protected function entityToPlainText(Entity $entity): string
{
$tags = $entity->tags()->get();
$tagText = $tags->map(function (Tag $tag) {
return $tag->name . ': ' . $tag->value;
})->join('\n');
return $entity->name . "\n{$tagText}\n" . $entity->{$entity->textField};
}
}