Skip to content

Commit b2c6eb8

Browse files
committed
integrity error solved
1 parent 59dcc50 commit b2c6eb8

File tree

2 files changed

+19
-35
lines changed

2 files changed

+19
-35
lines changed

backend/apps/ai/common/base/chunk_command.py

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def help(self) -> str:
2020
def process_chunks_batch(self, entities: list[Model]) -> int:
2121
"""Process a batch of entities to create or update chunks."""
2222
processed = 0
23-
batch_chunks_to_create = {}
23+
batch_chunks_to_create = []
2424
content_type = ContentType.objects.get_for_model(self.model_class)
2525

2626
for entity in entities:
@@ -58,19 +58,19 @@ def process_chunks_batch(self, entities: list[Model]) -> int:
5858
continue
5959

6060
chunk_texts = Chunk.split_text(full_content)
61-
if not chunk_texts:
61+
unique_chunk_texts = list(dict.fromkeys(chunk_texts))
62+
63+
if not unique_chunk_texts:
6264
self.stdout.write(f"No chunks created for {self.entity_name} {entity_key}")
6365
continue
6466

6567
if chunks := create_chunks_and_embeddings(
66-
chunk_texts=chunk_texts,
68+
chunk_texts=unique_chunk_texts,
6769
context=context,
6870
openai_client=self.openai_client,
6971
save=False,
7072
):
71-
for chunk in chunks:
72-
key = (chunk.context_id, chunk.text)
73-
batch_chunks_to_create[key] = chunk
73+
batch_chunks_to_create.extend(chunks)
7474
processed += 1
7575
self.stdout.write(
7676
self.style.SUCCESS(f"Created {len(chunks)} new chunks for {entity_key}")
@@ -79,21 +79,7 @@ def process_chunks_batch(self, entities: list[Model]) -> int:
7979
self.stdout.write(f"Chunks for {entity_key} are already up to date.")
8080

8181
if batch_chunks_to_create:
82-
context_ids = {context_id for context_id, _ in batch_chunks_to_create}
83-
candidate_chunk_texts = {text for _, text in batch_chunks_to_create}
84-
85-
existing_keys = set(
86-
Chunk.objects.filter(
87-
context_id__in=context_ids, text__in=candidate_chunk_texts
88-
).values_list("context_id", "text")
89-
)
90-
91-
chunks_to_insert = [
92-
chunk for key, chunk in batch_chunks_to_create.items() if key not in existing_keys
93-
]
94-
95-
if chunks_to_insert:
96-
Chunk.bulk_save(chunks_to_insert)
82+
Chunk.bulk_save(batch_chunks_to_create)
9783

9884
return processed
9985

backend/tests/apps/ai/common/base/chunk_command_test.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def test_process_chunks_batch_multiple_entities(
280280
assert mock_create_chunks.call_count == 3
281281
mock_bulk_save.assert_called_once()
282282
bulk_save_args = mock_bulk_save.call_args[0][0]
283-
assert len(bulk_save_args) == 2
283+
assert len(bulk_save_args) == 6
284284

285285
@patch("apps.ai.common.base.chunk_command.ContentType.objects.get_for_model")
286286
@patch("apps.ai.common.base.chunk_command.Context.objects.filter")
@@ -449,27 +449,25 @@ def test_process_chunks_batch_with_duplicates(
449449
mock_content_type,
450450
mock_chunks,
451451
):
452-
"""Test that duplicate chunks are filtered out before bulk save."""
452+
"""Test that duplicate chunk texts are filtered out before processing."""
453453
mock_get_content_type.return_value = mock_content_type
454454
mock_context_filter.return_value.first.return_value = mock_context
455-
mock_split_text.return_value = ["chunk1", "chunk2", "chunk3"]
455+
mock_split_text.return_value = ["chunk1", "chunk2", "chunk1", "chunk3", "chunk2"]
456456
mock_create_chunks.return_value = mock_chunks
457457
command.openai_client = Mock()
458458

459-
with (
460-
patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter,
461-
patch.object(command.stdout, "write"),
462-
):
463-
mock_qs = Mock()
464-
mock_qs.values_list.return_value = [(1, "Chunk text 1")]
465-
mock_chunk_filter.return_value = mock_qs
466-
459+
with patch.object(command.stdout, "write"):
467460
result = command.process_chunks_batch([mock_entity])
468461

469462
assert result == 1
470-
mock_bulk_save.assert_called_once()
471-
bulk_save_args = mock_bulk_save.call_args[0][0]
472-
assert len(bulk_save_args) == 2
463+
mock_split_text.assert_called_once()
464+
mock_create_chunks.assert_called_once_with(
465+
chunk_texts=["chunk1", "chunk2", "chunk3"],
466+
context=mock_context,
467+
openai_client=command.openai_client,
468+
save=False,
469+
)
470+
mock_bulk_save.assert_called_once_with(mock_chunks)
473471

474472
def test_process_chunks_batch_whitespace_only_content(
475473
self, command, mock_entity, mock_context, mock_content_type

0 commit comments

Comments
 (0)