Spaces:
Running
Running
| """ | |
| Integration tests for Flask routes | |
| """ | |
| import pytest | |
| import json | |
| import tempfile | |
| import os | |
| from unittest.mock import patch, Mock | |
| from io import BytesIO | |
| from werkzeug.datastructures import FileStorage | |
| class TestMainRoutes: | |
| """Integration tests for main application routes.""" | |
| def test_index_get_basic(self, client): | |
| """Test basic GET request to index.""" | |
| response = client.get('/') | |
| assert response.status_code == 200 | |
| assert b'Tokenizer Pro' in response.data | |
| assert b'Advanced tokenization analysis' in response.data | |
| assert b'textarea' in response.data | |
| def test_index_get_with_parameters(self, client): | |
| """Test GET request with query parameters.""" | |
| response = client.get('/?model=gpt2&model_type=predefined') | |
| assert response.status_code == 200 | |
| assert b'gpt2' in response.data or b'GPT-2' in response.data | |
| def test_index_post_text_analysis(self, mock_file_service, mock_tokenizer_service, client): | |
| """Test POST request with text analysis.""" | |
| # Mock services | |
| mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} | |
| mock_file_service.process_text_for_tokenization.return_value = { | |
| 'tokens': [ | |
| { | |
| 'display': 'Hello', | |
| 'original': 'Hello', | |
| 'token_id': 15496, | |
| 'colors': {'background': '#FF5733', 'text': '#FFFFFF'}, | |
| 'newline': False | |
| } | |
| ], | |
| 'stats': { | |
| 'basic_stats': { | |
| 'total_tokens': 1, | |
| 'unique_tokens': 1, | |
| 'unique_percentage': '100.0', | |
| 'special_tokens': 0, | |
| 'space_tokens': 0, | |
| 'newline_tokens': 0, | |
| 'compression_ratio': '5.0' | |
| }, | |
| 'length_stats': { | |
| 'avg_length': '5.0', | |
| 'median_length': '5.0', | |
| 'std_dev': '0.0' | |
| } | |
| }, | |
| 'display_limit_reached': False, | |
| 'total_tokens': 1, | |
| 'preview_only': False, | |
| 'tokenizer_info': { | |
| 'vocab_size': 50257, | |
| 'tokenizer_type': 'GPT2TokenizerFast' | |
| } | |
| } | |
| response = client.post('/', data={ | |
| 'text': 'Hello', | |
| 'model': 'gpt2', | |
| 'model_type': 'predefined' | |
| }) | |
| assert response.status_code == 200 | |
| mock_file_service.process_text_for_tokenization.assert_called_once() | |
| def test_index_post_ajax_request(self, mock_file_service, mock_tokenizer_service, client): | |
| """Test AJAX POST request for text analysis.""" | |
| # Mock services | |
| mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} | |
| expected_response = { | |
| 'tokens': [], | |
| 'stats': {'basic_stats': {}, 'length_stats': {}}, | |
| 'display_limit_reached': False, | |
| 'total_tokens': 0 | |
| } | |
| mock_file_service.process_text_for_tokenization.return_value = expected_response | |
| response = client.post('/', | |
| data={'text': 'Test', 'model': 'gpt2', 'model_type': 'predefined'}, | |
| headers={'X-Requested-With': 'XMLHttpRequest'} | |
| ) | |
| assert response.status_code == 200 | |
| assert response.content_type == 'application/json' | |
| data = json.loads(response.data) | |
| assert 'tokens' in data | |
| assert 'stats' in data | |
| def test_index_post_file_upload(self, mock_tokenizer_service, mock_file_service, client, app): | |
| """Test POST request with file upload.""" | |
| with app.app_context(): | |
| # Mock services | |
| mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} | |
| # Mock file processing | |
| mock_file_service.save_uploaded_file.return_value = '/tmp/test_file.txt' | |
| mock_file_service.process_file_for_tokenization.return_value = { | |
| 'tokens': [], | |
| 'stats': {'basic_stats': {}, 'length_stats': {}}, | |
| 'display_limit_reached': False, | |
| 'total_tokens': 0, | |
| 'preview_only': True | |
| } | |
| mock_file_service.cleanup_file.return_value = None | |
| # Create test file data | |
| file_data = BytesIO(b"Hello world! This is a test file.") | |
| response = client.post('/', | |
| data={ | |
| 'file': (file_data, 'test.txt'), | |
| 'model': 'gpt2', | |
| 'model_type': 'predefined' | |
| }, | |
| content_type='multipart/form-data' | |
| ) | |
| assert response.status_code == 200 | |
| mock_file_service.save_uploaded_file.assert_called_once() | |
| mock_file_service.process_file_for_tokenization.assert_called_once() | |
| mock_file_service.cleanup_file.assert_called_once() | |
| def test_index_post_validation_error(self, mock_validators, client): | |
| """Test POST request with validation error.""" | |
| from app.utils.validators import ValidationError | |
| # Mock validation to raise error | |
| mock_validators.validate_text_input.side_effect = ValidationError("Invalid input") | |
| response = client.post('/', | |
| data={'text': 'Invalid text', 'model': 'gpt2'}, | |
| headers={'X-Requested-With': 'XMLHttpRequest'} | |
| ) | |
| assert response.status_code == 400 | |
| data = json.loads(response.data) | |
| assert 'error' in data | |
| assert 'Invalid input' in data['error'] | |
| def test_index_post_empty_data(self, client): | |
| """Test POST request with empty data.""" | |
| response = client.post('/', data={}) | |
| assert response.status_code == 200 | |
| # Should return the form again without processing | |
| class TestTokenizerInfoRoute: | |
| """Integration tests for tokenizer info route.""" | |
| def test_tokenizer_info_predefined_model(self, mock_tokenizer_service, client): | |
| """Test tokenizer info for predefined model.""" | |
| # Mock service | |
| mock_tokenizer_service.is_predefined_model.return_value = True | |
| mock_tokenizer_service.load_tokenizer.return_value = ( | |
| Mock(), | |
| { | |
| 'vocab_size': 50257, | |
| 'tokenizer_type': 'GPT2TokenizerFast', | |
| 'model_max_length': 1024, | |
| 'special_tokens': {'eos_token': '</s>'} | |
| }, | |
| None | |
| ) | |
| response = client.get('/tokenizer-info?model_id=gpt2&is_custom=false') | |
| assert response.status_code == 200 | |
| assert response.content_type == 'application/json' | |
| data = json.loads(response.data) | |
| assert 'vocab_size' in data | |
| assert 'tokenizer_type' in data | |
| assert data['vocab_size'] == 50257 | |
| def test_tokenizer_info_custom_model(self, mock_validators, mock_tokenizer_service, client): | |
| """Test tokenizer info for custom model.""" | |
| # Mock validation | |
| mock_validators.validate_model_path.return_value = None | |
| # Mock service | |
| mock_tokenizer_service.is_predefined_model.return_value = False | |
| mock_tokenizer_service.load_tokenizer.return_value = ( | |
| Mock(), | |
| { | |
| 'vocab_size': 32000, | |
| 'tokenizer_type': 'LlamaTokenizerFast', | |
| 'special_tokens': {} | |
| }, | |
| None | |
| ) | |
| response = client.get('/tokenizer-info?model_id=meta-llama/Llama-2-7b-hf&is_custom=true') | |
| assert response.status_code == 200 | |
| data = json.loads(response.data) | |
| assert data['vocab_size'] == 32000 | |
| def test_tokenizer_info_missing_model_id(self, client): | |
| """Test tokenizer info without model_id.""" | |
| response = client.get('/tokenizer-info') | |
| assert response.status_code == 400 | |
| data = json.loads(response.data) | |
| assert 'error' in data | |
| assert 'No model ID provided' in data['error'] | |
| def test_tokenizer_info_validation_error(self, mock_validators, client): | |
| """Test tokenizer info with validation error.""" | |
| from app.utils.validators import ValidationError | |
| # Mock validation to raise error | |
| mock_validators.validate_model_path.side_effect = ValidationError("Invalid model path") | |
| response = client.get('/tokenizer-info?model_id=invalid/path&is_custom=true') | |
| assert response.status_code == 400 | |
| data = json.loads(response.data) | |
| assert 'error' in data | |
| assert 'Invalid model path' in data['error'] | |
| def test_tokenizer_info_service_error(self, mock_tokenizer_service, client): | |
| """Test tokenizer info with service error.""" | |
| # Mock service to return error | |
| mock_tokenizer_service.is_predefined_model.return_value = True | |
| mock_tokenizer_service.load_tokenizer.return_value = (None, {}, "Failed to load tokenizer") | |
| response = client.get('/tokenizer-info?model_id=gpt2&is_custom=false') | |
| assert response.status_code == 400 | |
| data = json.loads(response.data) | |
| assert 'error' in data | |
| assert 'Failed to load tokenizer' in data['error'] | |
| class TestHealthCheckRoutes: | |
| """Integration tests for health check routes.""" | |
| def test_basic_health_check(self, client): | |
| """Test basic health check endpoint.""" | |
| response = client.get('/health') | |
| assert response.status_code == 200 | |
| assert response.content_type == 'application/json' | |
| data = json.loads(response.data) | |
| assert 'status' in data | |
| assert 'timestamp' in data | |
| assert 'version' in data | |
| assert data['status'] == 'healthy' | |
| def test_detailed_health_check(self, mock_disk, mock_memory, mock_cpu, mock_tokenizer_service, client): | |
| """Test detailed health check endpoint.""" | |
| # Mock system info | |
| mock_cpu.return_value = 25.5 | |
| mock_memory.return_value = Mock(total=8000000000, available=4000000000, percent=50.0, used=4000000000) | |
| mock_disk.return_value = Mock(total=100000000000, used=50000000000, free=50000000000) | |
| # Mock tokenizer service | |
| mock_tokenizer_service.tokenizers = {} | |
| mock_tokenizer_service.custom_tokenizers = {} | |
| mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {}} | |
| mock_tokenizer_service.load_tokenizer.return_value = (Mock(), {}, None) | |
| response = client.get('/health/detailed') | |
| assert response.status_code == 200 | |
| data = json.loads(response.data) | |
| assert 'status' in data | |
| assert 'system' in data | |
| assert 'services' in data | |
| assert 'configuration' in data | |
| # Check system info | |
| assert 'cpu_percent' in data['system'] | |
| assert 'memory' in data['system'] | |
| assert 'disk' in data['system'] | |
| # Check services info | |
| assert 'tokenizer_service' in data['services'] | |
| assert 'file_service' in data['services'] | |
| def test_readiness_check_ready(self, mock_tokenizer_service, client, app): | |
| """Test readiness check when application is ready.""" | |
| with app.app_context(): | |
| # Mock successful tokenizer loading | |
| mock_tokenizer_service.load_tokenizer.return_value = (Mock(), {}, None) | |
| response = client.get('/health/ready') | |
| assert response.status_code == 200 | |
| data = json.loads(response.data) | |
| assert 'ready' in data | |
| assert 'checks' in data | |
| assert isinstance(data['checks'], dict) | |
| def test_readiness_check_not_ready(self, mock_tokenizer_service, client): | |
| """Test readiness check when application is not ready.""" | |
| # Mock failed tokenizer loading | |
| mock_tokenizer_service.load_tokenizer.return_value = (None, {}, "Failed to load") | |
| response = client.get('/health/ready') | |
| assert response.status_code == 503 | |
| data = json.loads(response.data) | |
| assert data['ready'] is False | |
| assert 'checks' in data | |
| class TestErrorHandling: | |
| """Test error handling across routes.""" | |
| def test_404_handling(self, client): | |
| """Test 404 error handling.""" | |
| response = client.get('/nonexistent-route') | |
| assert response.status_code == 404 | |
| def test_405_method_not_allowed(self, client): | |
| """Test 405 error for wrong HTTP method.""" | |
| response = client.put('/') # PUT not allowed | |
| assert response.status_code == 405 | |
| def test_500_internal_error(self, mock_tokenizer_service, client): | |
| """Test 500 error handling.""" | |
| # Mock service to raise unexpected exception | |
| mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} | |
| with patch('app.services.file_service.file_service') as mock_file_service: | |
| mock_file_service.process_text_for_tokenization.side_effect = Exception("Unexpected error") | |
| response = client.post('/', | |
| data={'text': 'Test', 'model': 'gpt2', 'model_type': 'predefined'}, | |
| headers={'X-Requested-With': 'XMLHttpRequest'} | |
| ) | |
| assert response.status_code == 400 # Our app returns 400 for processing errors | |
| data = json.loads(response.data) | |
| assert 'error' in data | |
| class TestSecurityFeatures: | |
| """Test security features in routes.""" | |
| def test_malicious_filename_blocked(self, mock_validators, client): | |
| """Test that malicious filenames are blocked.""" | |
| from app.utils.validators import ValidationError | |
| # Mock validation to detect malicious filename | |
| mock_validators.validate_filename.side_effect = ValidationError("Malicious filename detected") | |
| file_data = BytesIO(b"test content") | |
| response = client.post('/', | |
| data={ | |
| 'file': (file_data, '../../../etc/passwd'), | |
| 'model': 'gpt2', | |
| 'model_type': 'predefined' | |
| }, | |
| content_type='multipart/form-data', | |
| headers={'X-Requested-With': 'XMLHttpRequest'} | |
| ) | |
| assert response.status_code == 400 | |
| data = json.loads(response.data) | |
| assert 'error' in data | |
| def test_malicious_model_path_blocked(self, mock_validators, client): | |
| """Test that malicious model paths are blocked.""" | |
| from app.utils.validators import ValidationError | |
| # Mock validation to detect malicious model path | |
| mock_validators.validate_model_path.side_effect = ValidationError("Untrusted model path") | |
| response = client.post('/', | |
| data={ | |
| 'text': 'Test', | |
| 'custom_model': 'malicious/backdoor-model', | |
| 'model_type': 'custom' | |
| }, | |
| headers={'X-Requested-With': 'XMLHttpRequest'} | |
| ) | |
| assert response.status_code == 400 | |
| data = json.loads(response.data) | |
| assert 'error' in data | |
| assert 'Untrusted model path' in data['error'] |