diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ea9ff665..74283d42 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,7 +72,7 @@ jobs: if: env.skip_backend_tests == 'false' run: | cd src/ContentProcessor - python -m pytest -vv --cov=. --cov-report=xml --cov-report=term-missing --cov-fail-under=80 + python -m pytest -vv --cov=src --cov-report=xml --cov-report=term-missing --cov-fail-under=80 - name: Skip Backend Tests if: env.skip_backend_tests == 'true' diff --git a/src/ContentProcessor/pyproject.toml b/src/ContentProcessor/pyproject.toml index f793a09d..16b359f7 100644 --- a/src/ContentProcessor/pyproject.toml +++ b/src/ContentProcessor/pyproject.toml @@ -41,5 +41,15 @@ addopts = "--maxfail=1" testpaths = ["tests"] pythonpath = ["src"] +[tool.coverage.run] +source = ["src"] +omit = ["src/tests/*", "**/test_*.py", "**/*_test.py"] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "if __name__ == .__main__.:", +] + [tool.poetry.scripts] start-app = "src.main:main" diff --git a/src/ContentProcessor/src/tests/models/test_content_process.py b/src/ContentProcessor/src/tests/models/test_content_process.py new file mode 100644 index 00000000..08bcfead --- /dev/null +++ b/src/ContentProcessor/src/tests/models/test_content_process.py @@ -0,0 +1,99 @@ +"""Tests for ContentProcess model.""" + +from unittest.mock import patch, MagicMock +from libs.models.content_process import ContentProcess, Step_Outputs + + +class TestStepOutputs: + """Tests for Step_Outputs class.""" + + def test_step_outputs_creation(self): + """Test creating Step_Outputs.""" + step = Step_Outputs( + step_name="extract", + processed_time="2026-01-01T00:00:00Z", + step_result={"extracted": "data"} + ) + assert step.step_name == "extract" + assert step.step_result == {"extracted": "data"} + + +class TestContentProcess: + """Tests for ContentProcess class.""" + + def test_content_process_creation(self): + """Test creating ContentProcess.""" + process = ContentProcess( + process_id="test-123", + status="processing" + ) + assert process.process_id == "test-123" + assert process.status == "processing" + + @patch("libs.models.content_process.CosmosMongDBHelper") + def test_update_process_status_to_cosmos_existing(self, mock_cosmos): + """Test updating existing process status in Cosmos.""" + mock_instance = MagicMock() + mock_instance.find_document.return_value = [{"process_id": "test-123"}] + mock_cosmos.return_value = mock_instance + + process = ContentProcess(process_id="test-123", status="completed") + process.update_process_status_to_cosmos( + "connection_string", + "database", + "collection" + ) + + mock_instance.find_document.assert_called_once() + mock_instance.update_document.assert_called_once() + + @patch("libs.models.content_process.CosmosMongDBHelper") + def test_update_process_status_to_cosmos_new(self, mock_cosmos): + """Test inserting new process status in Cosmos.""" + mock_instance = MagicMock() + mock_instance.find_document.return_value = [] + mock_cosmos.return_value = mock_instance + + process = ContentProcess(process_id="test-123", status="processing") + process.update_process_status_to_cosmos( + "connection_string", + "database", + "collection" + ) + + mock_instance.find_document.assert_called_once() + mock_instance.insert_document.assert_called_once() + + @patch("libs.models.content_process.CosmosMongDBHelper") + def test_update_status_to_cosmos_existing(self, mock_cosmos): + """Test updating existing status in Cosmos.""" + mock_instance = MagicMock() + mock_instance.find_document.return_value = [{"process_id": "test-123"}] + mock_cosmos.return_value = mock_instance + + process = ContentProcess(process_id="test-123", status="completed") + process.update_status_to_cosmos( + "connection_string", + "database", + "collection" + ) + + mock_instance.find_document.assert_called_once() + mock_instance.update_document.assert_called_once() + + @patch("libs.models.content_process.CosmosMongDBHelper") + def test_update_status_to_cosmos_new(self, mock_cosmos): + """Test inserting new status in Cosmos.""" + mock_instance = MagicMock() + mock_instance.find_document.return_value = [] + mock_cosmos.return_value = mock_instance + + process = ContentProcess(process_id="test-123", status="processing") + process.update_status_to_cosmos( + "connection_string", + "database", + "collection" + ) + + mock_instance.find_document.assert_called_once() + mock_instance.insert_document.assert_called_once() diff --git a/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_data.py b/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_data.py index 6ba309a3..8804786f 100644 --- a/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_data.py +++ b/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_data.py @@ -1,7 +1,9 @@ import pytest -from unittest.mock import Mock +from unittest.mock import Mock, patch, MagicMock from libs.pipeline.entities.pipeline_step_result import StepResult from libs.pipeline.entities.pipeline_status import PipelineStatus +from libs.pipeline.entities.pipeline_data import DataPipeline +from libs.pipeline.entities.pipeline_file import ArtifactType def test_update_step(): @@ -47,57 +49,6 @@ def test_get_previous_step_result(): assert result is None -# def test_save_to_persistent_storage(mocker): -# # Mock the StorageBlobHelper.upload_text method -# mock_upload_text = mocker.patch( -# "libs.azure_helper.storage_blob.StorageBlobHelper.upload_text" -# ) - -# # Mock the StorageBlobHelper constructor to return a mock instance -# mock_storage_blob_helper = mocker.patch( -# "libs.azure_helper.storage_blob.StorageBlobHelper", autospec=True -# ) -# mock_storage_blob_helper_instance = mock_storage_blob_helper.return_value - -# # Mock the create_container method on the container_client -# mock_container_client = Mock() -# mock_container_client.create_container = Mock() -# mock_storage_blob_helper_instance._invalidate_container = Mock() -# mock_storage_blob_helper_instance._invalidate_container.return_value = ( -# mock_container_client -# ) - -# # Create a PipelineStatus object with a process_id -# pipeline_status = PipelineStatus(process_id="123") - -# # Mock the update_step method using pytest-mock -# mock_update_step = mocker.patch.object( -# PipelineStatus, "update_step", return_value=None -# ) - -# # Mock the model_dump_json method using pytest-mock -# mock_model_dump_json = mocker.patch.object( -# PipelineStatus, "model_dump_json", return_value='{"key": "value"}' -# ) - -# account_url = "https://example.com" -# container_name = "container" - -# # Call the save_to_persistent_storage method -# pipeline_status.save_to_persistent_storage(account_url, container_name) - -# # Assert that update_step was called once -# mock_update_step.assert_called_once() - -# # Assert that model_dump_json was called once -# mock_model_dump_json.assert_called_once() - -# # Assert that upload_text was called with the correct arguments -# mock_upload_text.assert_called_once_with( -# container_name="123", blob_name="process-status.json", text='{"key": "value"}' -# ) - - def test_save_to_persistent_storage_no_process_id(): pipeline_status = PipelineStatus() with pytest.raises(ValueError, match="Process ID is required to save the result."): @@ -115,3 +66,91 @@ def test_move_to_next_step(): assert pipeline_status.completed_steps == ["step1", "step2"] assert pipeline_status.remaining_steps == [] assert pipeline_status.completed is True + + +# DataPipeline Tests +class TestDataPipeline: + """Tests for DataPipeline class.""" + + def test_get_object_valid_json(self): + """Test parsing valid JSON string to DataPipeline.""" + json_str = '{"process_id": "test-123", "PipelineStatus": {"Completed": false}, "Files": []}' + result = DataPipeline.get_object(json_str) + assert result.process_id == "test-123" + assert result.pipeline_status is not None + + def test_get_object_invalid_json(self): + """Test that invalid JSON raises ValueError.""" + with pytest.raises(ValueError, match="Failed to parse"): + DataPipeline.get_object("invalid json {") + + def test_add_file(self): + """Test adding a file to the pipeline.""" + pipeline_status = PipelineStatus(process_id="test-123", active_step="step1") + data_pipeline = DataPipeline(process_id="test-123", pipeline_status=pipeline_status) + + file = data_pipeline.add_file("document.pdf", ArtifactType.SourceContent) + + assert len(data_pipeline.files) == 1 + assert file.name == "document.pdf" + assert file.artifact_type == ArtifactType.SourceContent + assert file.processed_by == "step1" + + def test_get_step_result(self): + """Test getting step result from DataPipeline.""" + pipeline_status = PipelineStatus(process_id="test-123") + step_result = StepResult(step_name="extract", result={"data": "value"}) + pipeline_status.process_results.append(step_result) + + data_pipeline = DataPipeline(process_id="test-123", pipeline_status=pipeline_status) + + result = data_pipeline.get_step_result("extract") + assert result == step_result + + def test_get_previous_step_result(self): + """Test getting previous step result from DataPipeline.""" + pipeline_status = PipelineStatus(process_id="test-123", completed_steps=["step1"]) + step_result = StepResult(step_name="step1", result={"data": "value"}) + pipeline_status.process_results.append(step_result) + + data_pipeline = DataPipeline(process_id="test-123", pipeline_status=pipeline_status) + + result = data_pipeline.get_previous_step_result("step2") + assert result == step_result + + def test_get_source_files(self): + """Test getting source files from pipeline.""" + pipeline_status = PipelineStatus(process_id="test-123", active_step="step1") + data_pipeline = DataPipeline(process_id="test-123", pipeline_status=pipeline_status) + + # Add source file + data_pipeline.add_file("source.pdf", ArtifactType.SourceContent) + # Add extracted file + data_pipeline.add_file("output.json", ArtifactType.ExtractedContent) + + source_files = data_pipeline.get_source_files() + + assert len(source_files) == 1 + assert source_files[0].name == "source.pdf" + + def test_save_to_database_not_implemented(self): + """Test that save_to_database raises NotImplementedError.""" + pipeline_status = PipelineStatus(process_id="test-123") + data_pipeline = DataPipeline(process_id="test-123", pipeline_status=pipeline_status) + + with pytest.raises(NotImplementedError): + data_pipeline.save_to_database() + + @patch("libs.pipeline.entities.pipeline_data.StorageBlobHelper") + def test_save_to_persistent_storage(self, mock_storage_helper): + """Test saving pipeline to persistent storage.""" + mock_instance = MagicMock() + mock_storage_helper.return_value = mock_instance + + pipeline_status = PipelineStatus(process_id="test-123") + data_pipeline = DataPipeline(process_id="test-123", pipeline_status=pipeline_status) + + data_pipeline.save_to_persistent_storage("https://storage.blob.core.windows.net", "container") + + mock_storage_helper.assert_called_once() + mock_instance.upload_text.assert_called_once() diff --git a/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_file.py b/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_file.py new file mode 100644 index 00000000..af944de2 --- /dev/null +++ b/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_file.py @@ -0,0 +1,178 @@ +"""Tests for pipeline_file module""" + +from unittest.mock import patch, MagicMock + +from libs.pipeline.entities.pipeline_file import ( + ArtifactType, + PipelineLogEntry, + FileDetailBase, + FileDetails, +) + + +class TestArtifactType: + """Tests for ArtifactType enum.""" + + def test_artifact_types(self): + """Test all artifact types exist.""" + assert ArtifactType.Undefined == "undefined" + assert ArtifactType.ConvertedContent == "converted_content" + assert ArtifactType.ExtractedContent == "extracted_content" + assert ArtifactType.SchemaMappedData == "schema_mapped_data" + assert ArtifactType.ScoreMergedData == "score_merged_data" + assert ArtifactType.SourceContent == "source_content" + assert ArtifactType.SavedContent == "saved_content" + + +class TestPipelineLogEntry: + """Tests for PipelineLogEntry class.""" + + def test_log_entry_creation(self): + """Test creating a log entry.""" + entry = PipelineLogEntry(source="test_source", message="test message") + assert entry.source == "test_source" + assert entry.message == "test message" + assert entry.datetime_offset is not None + + +class TestFileDetailBase: + """Tests for FileDetailBase class.""" + + def test_file_detail_base_creation(self): + """Test creating a FileDetailBase.""" + detail = FileDetailBase( + id="file-123", + process_id="proc-456", + name="test.pdf", + size=1024, + mime_type="application/pdf", + artifact_type=ArtifactType.SourceContent, + processed_by="extract", + ) + assert detail.id == "file-123" + assert detail.process_id == "proc-456" + assert detail.name == "test.pdf" + assert detail.size == 1024 + assert detail.mime_type == "application/pdf" + assert detail.artifact_type == ArtifactType.SourceContent + assert detail.processed_by == "extract" + assert detail.log_entries == [] + + def test_add_log_entry(self): + """Test adding a log entry.""" + detail = FileDetailBase(process_id="proc-123") + result = detail.add_log_entry(source="extract", message="Processing started") + + assert result is detail # Returns self for chaining + assert len(detail.log_entries) == 1 + assert detail.log_entries[0].source == "extract" + assert detail.log_entries[0].message == "Processing started" + + def test_add_multiple_log_entries(self): + """Test adding multiple log entries.""" + detail = FileDetailBase(process_id="proc-123") + detail.add_log_entry(source="step1", message="Step 1 done") + detail.add_log_entry(source="step2", message="Step 2 done") + + assert len(detail.log_entries) == 2 + + +class TestFileDetails: + """Tests for FileDetails class.""" + + @patch("libs.pipeline.entities.pipeline_file.StorageBlobHelper") + def test_download_stream(self, mock_storage_helper): + """Test download_stream method.""" + mock_instance = MagicMock() + mock_instance.download_stream.return_value = b"file content" + mock_storage_helper.return_value = mock_instance + + detail = FileDetails(process_id="proc-123", name="test.pdf") + result = detail.download_stream( + account_url="https://storage.blob.core.windows.net", + container_name="container", + ) + + assert result == b"file content" + mock_storage_helper.assert_called_once_with( + account_url="https://storage.blob.core.windows.net", + container_name="container", + ) + mock_instance.download_stream.assert_called_once_with( + container_name="proc-123", + blob_name="test.pdf", + ) + + @patch("libs.pipeline.entities.pipeline_file.StorageBlobHelper") + def test_download_file(self, mock_storage_helper): + """Test download_file method.""" + mock_instance = MagicMock() + mock_storage_helper.return_value = mock_instance + + detail = FileDetails(process_id="proc-123", name="test.pdf") + detail.download_file( + account_url="https://storage.blob.core.windows.net", + container_name="container", + file_path="/tmp/test.pdf", + ) + + mock_storage_helper.assert_called_once_with( + account_url="https://storage.blob.core.windows.net", + container_name="container", + ) + mock_instance.download_file.assert_called_once_with( + container_name="proc-123", + blob_name="test.pdf", + download_path="/tmp/test.pdf", + ) + + @patch("libs.pipeline.entities.pipeline_file.StorageBlobHelper") + def test_upload_stream(self, mock_storage_helper): + """Test upload_stream method.""" + mock_instance = MagicMock() + mock_storage_helper.return_value = mock_instance + + detail = FileDetails(process_id="proc-123", name="output.bin") + stream_data = b"binary content data" + detail.upload_stream( + account_url="https://storage.blob.core.windows.net", + container_name="container", + stream=stream_data, + ) + + mock_storage_helper.assert_called_once_with( + account_url="https://storage.blob.core.windows.net", + container_name="container", + ) + mock_instance.upload_stream.assert_called_once_with( + container_name="proc-123", + blob_name="output.bin", + stream=stream_data, + ) + assert detail.size == len(stream_data) + + @patch("libs.pipeline.entities.pipeline_file.StorageBlobHelper") + def test_upload_json_text(self, mock_storage_helper): + """Test upload_json_text method.""" + mock_instance = MagicMock() + mock_storage_helper.return_value = mock_instance + + detail = FileDetails(process_id="proc-123", name="data.json") + json_text = '{"key": "value"}' + detail.upload_json_text( + account_url="https://storage.blob.core.windows.net", + container_name="container", + text=json_text, + ) + + mock_storage_helper.assert_called_once_with( + account_url="https://storage.blob.core.windows.net", + container_name="container", + ) + mock_instance.upload_text.assert_called_once_with( + container_name="proc-123", + blob_name="data.json", + text=json_text, + ) + assert detail.size == len(json_text) + assert detail.mime_type == "application/json" diff --git a/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_step_result.py b/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_step_result.py new file mode 100644 index 00000000..745813ca --- /dev/null +++ b/src/ContentProcessor/src/tests/pipeline/entities/test_pipeline_step_result.py @@ -0,0 +1,69 @@ +"""Tests for pipeline_step_result module.""" + +import pytest +from unittest.mock import patch, MagicMock + +from libs.pipeline.entities.pipeline_step_result import StepResult + + +class TestStepResult: + """Tests for StepResult class.""" + + def test_step_result_creation(self): + """Test creating a StepResult object.""" + result = StepResult( + process_id="test-123", + step_name="extract", + result={"extracted": "data"}, + elapsed="00:01:30", + ) + assert result.process_id == "test-123" + assert result.step_name == "extract" + assert result.result == {"extracted": "data"} + assert result.elapsed == "00:01:30" + + def test_step_result_default_values(self): + """Test StepResult with default values.""" + result = StepResult() + assert result.process_id is None + assert result.step_name is None + assert result.result is None + assert result.elapsed is None + + def test_save_to_persistent_storage_no_process_id_raises(self): + """Test that save_to_persistent_storage raises when process_id is None.""" + result = StepResult(step_name="extract", result={"data": "value"}) + + with pytest.raises(ValueError, match="Process ID is required"): + result.save_to_persistent_storage( + account_url="https://storage.blob.core.windows.net", + container_name="container", + ) + + @patch("libs.pipeline.entities.pipeline_step_result.StorageBlobHelper") + def test_save_to_persistent_storage_success(self, mock_storage_helper): + """Test successful save to persistent storage.""" + mock_instance = MagicMock() + mock_storage_helper.return_value = mock_instance + + result = StepResult( + process_id="test-123", + step_name="extract", + result={"extracted": "data"}, + ) + + result.save_to_persistent_storage( + account_url="https://storage.blob.core.windows.net", + container_name="container", + ) + + mock_storage_helper.assert_called_once_with( + account_url="https://storage.blob.core.windows.net", + container_name="container", + ) + mock_instance.upload_text.assert_called_once() + + # Verify the arguments passed to upload_text + call_args = mock_instance.upload_text.call_args + assert call_args.kwargs["container_name"] == "test-123" + assert call_args.kwargs["blob_name"] == "extract-result.json" diff --git a/src/ContentProcessor/src/tests/pipeline/entities/test_schema.py b/src/ContentProcessor/src/tests/pipeline/entities/test_schema.py new file mode 100644 index 00000000..cfe9746c --- /dev/null +++ b/src/ContentProcessor/src/tests/pipeline/entities/test_schema.py @@ -0,0 +1,102 @@ +"""Tests for schema module.""" + +import pytest +from unittest.mock import patch, MagicMock +from datetime import datetime + +from libs.pipeline.entities.schema import Schema + + +class TestSchema: + """Tests for Schema class.""" + + def test_schema_creation(self): + """Test creating a Schema object.""" + schema = Schema( + Id="test-schema-123", + ClassName="TestClass", + Description="Test description", + FileName="test.json", + ContentType="application/json", + ) + assert schema.Id == "test-schema-123" + assert schema.ClassName == "TestClass" + assert schema.Description == "Test description" + assert schema.FileName == "test.json" + assert schema.ContentType == "application/json" + + def test_schema_with_timestamps(self): + """Test creating a Schema object with timestamps.""" + now = datetime.now() + schema = Schema( + Id="test-schema-123", + ClassName="TestClass", + Description="Test description", + FileName="test.json", + ContentType="application/json", + Created_On=now, + Updated_On=now, + ) + assert schema.Created_On == now + assert schema.Updated_On == now + + def test_get_schema_empty_id_raises(self): + """Test that get_schema raises when schema_id is empty.""" + with pytest.raises(Exception, match="Schema Id is not provided"): + Schema.get_schema( + connection_string="conn_str", + database_name="db", + collection_name="collection", + schema_id="", + ) + + def test_get_schema_none_id_raises(self): + """Test that get_schema raises when schema_id is None.""" + with pytest.raises(Exception, match="Schema Id is not provided"): + Schema.get_schema( + connection_string="conn_str", + database_name="db", + collection_name="collection", + schema_id=None, + ) + + @patch("libs.pipeline.entities.schema.CosmosMongDBHelper") + def test_get_schema_not_found_raises(self, mock_cosmos): + """Test that get_schema raises when schema is not found.""" + mock_instance = MagicMock() + mock_instance.find_document.return_value = [] + mock_cosmos.return_value = mock_instance + + with pytest.raises(Exception, match="not found in"): + Schema.get_schema( + connection_string="conn_str", + database_name="db", + collection_name="collection", + schema_id="nonexistent-id", + ) + + @patch("libs.pipeline.entities.schema.CosmosMongDBHelper") + def test_get_schema_success(self, mock_cosmos): + """Test successful schema retrieval.""" + mock_instance = MagicMock() + mock_instance.find_document.return_value = [ + { + "Id": "test-123", + "ClassName": "TestClass", + "Description": "Test", + "FileName": "test.json", + "ContentType": "application/json", + } + ] + mock_cosmos.return_value = mock_instance + + result = Schema.get_schema( + connection_string="conn_str", + database_name="db", + collection_name="collection", + schema_id="test-123", + ) + + assert result.Id == "test-123" + assert result.ClassName == "TestClass" + mock_instance.find_document.assert_called_once_with({"Id": "test-123"}) diff --git a/src/ContentProcessor/src/tests/pipeline/handlers/logics/evaluate_handler/test_comparison.py b/src/ContentProcessor/src/tests/pipeline/handlers/logics/evaluate_handler/test_comparison.py new file mode 100644 index 00000000..8efff641 --- /dev/null +++ b/src/ContentProcessor/src/tests/pipeline/handlers/logics/evaluate_handler/test_comparison.py @@ -0,0 +1,152 @@ +"""Tests for comparison module.""" + +import pytest +from libs.pipeline.handlers.logics.evaluate_handler.comparison import ( + ExtractionComparisonItem, + ExtractionComparisonData, + get_extraction_comparison_data, + get_extraction_comparison, +) + + +class TestExtractionComparisonItem: + """Tests for ExtractionComparisonItem class.""" + + def test_to_dict(self): + """Test that to_dict returns a dictionary representation.""" + item = ExtractionComparisonItem( + Field="test_field", + Extracted="test_value", + Confidence="95.00%", + IsAboveThreshold=True, + ) + result = item.to_dict() + assert isinstance(result, dict) + assert result["Field"] == "test_field" + assert result["Extracted"] == "test_value" + assert result["Confidence"] == "95.00%" + assert result["IsAboveThreshold"] is True + + def test_to_json(self): + """Test that to_json returns a JSON string representation.""" + item = ExtractionComparisonItem( + Field="test_field", + Extracted="test_value", + Confidence="95.00%", + IsAboveThreshold=True, + ) + result = item.to_json() + assert isinstance(result, str) + assert "test_field" in result + assert "test_value" in result + + +class TestExtractionComparisonData: + """Tests for ExtractionComparisonData class.""" + + def test_to_dict(self): + """Test that to_dict returns a dictionary representation.""" + item = ExtractionComparisonItem( + Field="field1", Extracted="value1", Confidence="90.00%", IsAboveThreshold=True + ) + data = ExtractionComparisonData(items=[item]) + result = data.to_dict() + assert isinstance(result, dict) + assert "items" in result + assert len(result["items"]) == 1 + + def test_to_json(self): + """Test that to_json returns a JSON string representation.""" + item = ExtractionComparisonItem( + Field="field1", Extracted="value1", Confidence="90.00%", IsAboveThreshold=True + ) + data = ExtractionComparisonData(items=[item]) + result = data.to_json() + assert isinstance(result, str) + assert "field1" in result + + +class TestGetExtractionComparisonData: + """Tests for get_extraction_comparison_data function.""" + + def test_basic_comparison(self): + """Test basic extraction comparison data generation.""" + actual = {"name": "John", "age": 30} + confidence = {"name_confidence": 0.95, "age_confidence": 0.85} + threshold = 0.8 + + result = get_extraction_comparison_data(actual, confidence, threshold) + + assert isinstance(result, ExtractionComparisonData) + assert len(result.items) == 2 + + def test_above_threshold(self): + """Test that IsAboveThreshold is set correctly when above threshold.""" + actual = {"field1": "value1"} + confidence = {"field1_confidence": 0.95} + threshold = 0.8 + + result = get_extraction_comparison_data(actual, confidence, threshold) + + assert result.items[0].IsAboveThreshold in (True, "True") + + def test_below_threshold(self): + """Test that IsAboveThreshold is set correctly when below threshold.""" + actual = {"field1": "value1"} + confidence = {"field1_confidence": 0.5} + threshold = 0.8 + + result = get_extraction_comparison_data(actual, confidence, threshold) + + assert result.items[0].IsAboveThreshold in (False, "False") + + def test_nested_dict(self): + """Test comparison with nested dictionary.""" + actual = {"person": {"name": "John"}} + confidence = {"person.name_confidence": 0.9} + threshold = 0.8 + + result = get_extraction_comparison_data(actual, confidence, threshold) + + assert len(result.items) >= 1 + + +class TestGetExtractionComparison: + """Tests for get_extraction_comparison function.""" + + def test_basic_comparison_dataframe(self): + """Test that get_extraction_comparison returns a styled DataFrame.""" + pytest.importorskip("jinja2") + expected = {"name": "John", "age": 30} + actual = {"name": "John", "age": 30} + confidence = {"name_confidence": 0.95, "age_confidence": 0.85} + accuracy = {"accuracy_name": 1.0, "accuracy_age": 1.0} + + result = get_extraction_comparison(expected, actual, confidence, accuracy) + + # Result should be a styled DataFrame + assert result is not None + + def test_mismatch_detection(self): + """Test that mismatches are detected correctly.""" + pytest.importorskip("jinja2") + expected = {"name": "John"} + actual = {"name": "Jane"} + confidence = {"name_confidence": 0.95} + accuracy = {"accuracy_name": 0.0} + + result = get_extraction_comparison(expected, actual, confidence, accuracy) + + assert result is not None + + def test_match_detection(self): + """Test that matches are detected correctly.""" + pytest.importorskip("jinja2") + expected = {"field1": "value1"} + actual = {"field1": "value1"} + confidence = {"field1_confidence": 0.95} + accuracy = {"accuracy_field1": 1.0} + + result = get_extraction_comparison(expected, actual, confidence, accuracy) + + assert result is not None diff --git a/src/ContentProcessor/src/tests/pipeline/test_pipeline_step_helper.py b/src/ContentProcessor/src/tests/pipeline/test_pipeline_step_helper.py new file mode 100644 index 00000000..f9258ff8 --- /dev/null +++ b/src/ContentProcessor/src/tests/pipeline/test_pipeline_step_helper.py @@ -0,0 +1,41 @@ +"""Tests for pipeline_step_helper module.""" + +from libs.pipeline.pipeline_step_helper import get_next_step_name +from libs.pipeline.entities.pipeline_status import PipelineStatus + + +class TestGetNextStepName: + """Tests for get_next_step_name function.""" + + def test_get_next_step_name_returns_next_step(self): + """Test that get_next_step_name returns the next step in the pipeline.""" + status = PipelineStatus( + steps=["step1", "step2", "step3"], + active_step="step1", + remaining_steps=["step2", "step3"], + completed_steps=[], + ) + result = get_next_step_name(status) + assert result == "step2" + + def test_get_next_step_name_returns_none_on_last_step(self): + """Test that get_next_step_name returns None when on the last step.""" + status = PipelineStatus( + steps=["step1", "step2", "step3"], + active_step="step3", + remaining_steps=[], + completed_steps=["step1", "step2"], + ) + result = get_next_step_name(status) + assert result is None + + def test_get_next_step_name_middle_step(self): + """Test get_next_step_name when in the middle of the pipeline.""" + status = PipelineStatus( + steps=["extract", "validate", "transform", "complete"], + active_step="validate", + remaining_steps=["transform", "complete"], + completed_steps=["extract"], + ) + result = get_next_step_name(status) + assert result == "transform" diff --git a/src/ContentProcessor/src/tests/pipeline/test_queue_handler_base.py b/src/ContentProcessor/src/tests/pipeline/test_queue_handler_base.py index 34bd161c..24530b0d 100644 --- a/src/ContentProcessor/src/tests/pipeline/test_queue_handler_base.py +++ b/src/ContentProcessor/src/tests/pipeline/test_queue_handler_base.py @@ -1,48 +1,38 @@ import pytest -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch from azure.storage.queue import QueueClient from libs.pipeline.entities.pipeline_message_context import MessageContext from libs.pipeline.entities.pipeline_step_result import StepResult +from libs.pipeline.entities.pipeline_data import DataPipeline +from libs.pipeline.entities.pipeline_file import ArtifactType, FileDetails from libs.pipeline.queue_handler_base import HandlerBase from libs.application.application_context import AppContext -@pytest.fixture -def mock_queue_helper(mocker): - # Mock the helper methods - mocker.patch( - "libs.pipeline.pipeline_queue_helper.create_queue_client_name", - return_value="test-queue", - ) - mocker.patch( - "libs.pipeline.pipeline_queue_helper.create_dead_letter_queue_client_name", - return_value="test-dlq", - ) - mocker.patch( - "libs.pipeline.pipeline_queue_helper.create_or_get_queue_client", - return_value=MagicMock(spec=QueueClient), - ) - return mocker - - @pytest.fixture def mock_app_context(): - # Create a mock AppContext instance - mock_app_context = MagicMock(spec=AppContext) + """Create a mock AppContext instance.""" + mock_context = MagicMock(spec=AppContext) - # Mock the necessary fields for AppContext mock_configuration = MagicMock() mock_configuration.app_storage_queue_url = "https://testqueueurl.com" mock_configuration.app_storage_blob_url = "https://testbloburl.com" mock_configuration.app_cps_processes = "TestProcess" + mock_configuration.app_message_queue_interval = 1 + mock_configuration.app_message_queue_process_timeout = 30 + mock_configuration.app_message_queue_visibility_timeout = 30 + mock_configuration.app_cosmos_connstr = "AccountEndpoint=https://test.documents.azure.com:443/;AccountKey=test==" + mock_configuration.app_cosmos_database = "testdb" + mock_configuration.app_cosmos_container_process = "processes" - mock_app_context.configuration = mock_configuration - mock_app_context.credential = MagicMock() + mock_context.configuration = mock_configuration + mock_context.credential = MagicMock() - return mock_app_context + return mock_context class MockHandler(HandlerBase): + """Concrete implementation of HandlerBase for testing.""" async def execute(self, context: MessageContext) -> StepResult: return StepResult( process_id="1234", @@ -51,22 +41,10 @@ async def execute(self, context: MessageContext) -> StepResult: ) -@pytest.mark.asyncio -async def test_execute_method(): - mock_handler = MockHandler(appContext=MagicMock(), step_name="extract") - message_context = MagicMock(spec=MessageContext) - - # Execute the handler - result = await mock_handler.execute(message_context) - - assert result.step_name == "extract" - assert result.result == {"result": "success", "data": {"key": "value"}} - - -def test_show_queue_information(mock_queue_helper, mock_app_context): +def test_show_queue_information(mock_app_context): + """Test _show_queue_information method.""" handler = MockHandler(appContext=mock_app_context, step_name="extract") - # Mock the queue client properties mock_queue_client = MagicMock(spec=QueueClient) mock_queue_client.url = "https://testurl" mock_queue_client.get_queue_properties.return_value = MagicMock( @@ -75,3 +53,92 @@ def test_show_queue_information(mock_queue_helper, mock_app_context): handler.queue_client = mock_queue_client handler._show_queue_information() + mock_queue_client.get_queue_properties.assert_called_once() + + +@patch("libs.pipeline.queue_handler_base.pipeline_queue_helper") +def test_initialize_handler(mock_queue_helper, mock_app_context): + """Test the __initialize_handler method.""" + mock_queue_client = MagicMock(spec=QueueClient) + mock_queue_client.url = "https://testurl" + mock_queue_client.get_queue_properties.return_value = MagicMock( + approximate_message_count=0 + ) + mock_queue_helper.create_queue_client_name.return_value = "test-queue" + mock_queue_helper.create_dead_letter_queue_client_name.return_value = "test-dlq" + mock_queue_helper.create_or_get_queue_client.return_value = mock_queue_client + + handler = MockHandler(appContext=mock_app_context, step_name="extract") + handler._HandlerBase__initialize_handler(mock_app_context, "extract") + + assert handler.handler_name == "extract" + assert handler.application_context == mock_app_context + assert handler.queue_name == "test-queue" + assert handler.dead_letter_queue_name == "test-dlq" + assert handler.queue_client == mock_queue_client + mock_queue_helper.create_queue_client_name.assert_called_with("extract") + mock_queue_helper.create_dead_letter_queue_client_name.assert_called_with("extract") + + +@patch("libs.pipeline.queue_handler_base.asyncio.run") +@patch("libs.pipeline.queue_handler_base.pipeline_queue_helper") +def test_connect_queue(mock_queue_helper, mock_asyncio_run, mock_app_context): + """Test the connect_queue method.""" + handler = MockHandler(appContext=mock_app_context, step_name="extract") + + handler.connect_queue( + show_information=False, + app_context=mock_app_context, + step_name="extract" + ) + mock_asyncio_run.assert_called_once() + + +def test_download_output_file_to_json_string(mock_app_context): + """Test downloading output file and converting to JSON string.""" + handler = MockHandler(appContext=mock_app_context, step_name="extract") + handler.application_context = mock_app_context + + mock_file = MagicMock(spec=FileDetails) + mock_file.processed_by = "extract" + mock_file.artifact_type = ArtifactType.ExtractedContent + mock_file.download_stream.return_value = b'{"key": "value"}' + + mock_data_pipeline = MagicMock(spec=DataPipeline) + mock_data_pipeline.files = [mock_file] + + handler._current_message_context = MagicMock(spec=MessageContext) + handler._current_message_context.data_pipeline = mock_data_pipeline + + result = handler.download_output_file_to_json_string( + processed_by="extract", + artifact_type=ArtifactType.ExtractedContent + ) + + assert result == '{"key": "value"}' + mock_file.download_stream.assert_called_once_with( + "https://testbloburl.com", + "TestProcess" + ) + + +def test_download_output_file_no_matching_file_raises_error(mock_app_context): + """Test download raises IndexError when no matching file is found.""" + handler = MockHandler(appContext=mock_app_context, step_name="extract") + handler.application_context = mock_app_context + + mock_file = MagicMock(spec=FileDetails) + mock_file.processed_by = "other-step" + mock_file.artifact_type = ArtifactType.SourceContent + + mock_data_pipeline = MagicMock(spec=DataPipeline) + mock_data_pipeline.files = [mock_file] + + handler._current_message_context = MagicMock(spec=MessageContext) + handler._current_message_context.data_pipeline = mock_data_pipeline + + with pytest.raises(IndexError): + handler.download_output_file_to_json_string( + processed_by="extract", + artifact_type=ArtifactType.ExtractedContent + ) diff --git a/src/ContentProcessor/src/tests/process_host/test_handler_process_host.py b/src/ContentProcessor/src/tests/process_host/test_handler_process_host.py new file mode 100644 index 00000000..2b375f34 --- /dev/null +++ b/src/ContentProcessor/src/tests/process_host/test_handler_process_host.py @@ -0,0 +1,43 @@ +"""Tests for handler_process_host module.""" + +from unittest.mock import Mock, patch + +from libs.process_host.handler_process_host import HandlerInfo, HandlerHostManager + + +class TestHandlerInfo: + """Tests for HandlerInfo class.""" + + def test_handler_info_creation(self): + """Test creating HandlerInfo.""" + handler_info = HandlerInfo() + assert handler_info.handler is None + assert handler_info.target_function is None + assert handler_info.args is None + + +class TestHandlerHostManager: + """Tests for HandlerHostManager class.""" + + def test_init(self): + """Test HandlerHostManager initialization.""" + manager = HandlerHostManager() + assert manager.handlers == [] + + @patch("libs.process_host.handler_process_host.Process") + def test_restart_handler(self, mock_process_class): + """Test restarting a handler.""" + mock_process = Mock() + mock_process.start = Mock() + mock_process.name = "test_handler" + mock_process_class.return_value = mock_process + + manager = HandlerHostManager() + mock_func = Mock() + args = ("queue", Mock(), "handler") + + result = manager._restart_handler("test_handler", mock_func, args) + + mock_process_class.assert_called_once() + mock_process.start.assert_called_once() + assert result == mock_process