""" Query parser tests for HXBooks search functionality. Tests the QueryParser class methods for type conversion, operator parsing, field filters, and edge case handling. """ from datetime import date import pytest from hxbooks.search import ( ComparisonOperator, Field, IsOperatorValue, QueryParser, SearchQuery, SortDirection, _convert_value, # noqa: PLC2701 ) @pytest.fixture def parser() -> QueryParser: """Create a QueryParser instance for testing.""" return QueryParser() class TestQueryParser: """Test the QueryParser class functionality.""" def test_parse_empty_query(self, parser: QueryParser) -> None: """Test parsing an empty query string.""" result = parser.parse("") assert result.text_terms == [] assert result.field_filters == [] def test_parse_whitespace_only(self, parser: QueryParser) -> None: """Test parsing a query with only whitespace.""" result = parser.parse(" \t\n ") assert result.text_terms == [] assert result.field_filters == [] def test_parse_simple_text_terms(self, parser: QueryParser) -> None: """Test parsing simple text search terms.""" result = parser.parse("hobbit tolkien") assert result.text_terms == ["hobbit", "tolkien"] assert result.field_filters == [] def test_parse_quoted_text_terms(self, parser: QueryParser) -> None: """Test parsing quoted text search terms.""" result = parser.parse('"the hobbit" tolkien') assert result.text_terms == ["the hobbit", "tolkien"] assert result.field_filters == [] def test_parse_quoted_text_with_spaces(self, parser: QueryParser) -> None: """Test parsing quoted text containing multiple spaces.""" result = parser.parse('"lord of the rings"') assert result.text_terms == ["lord of the rings"] assert result.field_filters == [] class TestFieldFilters: """Test field filter parsing.""" def test_parse_title_filter(self, parser: QueryParser) -> None: """Test parsing title field filter.""" result = parser.parse("title:hobbit") assert len(result.field_filters) == 1 filter = result.field_filters[0] assert filter.field == Field.TITLE assert filter.operator == ComparisonOperator.EQUALS assert filter.value == "hobbit" assert filter.negated is False def test_parse_quoted_title_filter(self, parser: QueryParser) -> None: """Test parsing quoted title field filter.""" result = parser.parse('title:"the hobbit"') assert len(result.field_filters) == 1 filter = result.field_filters[0] assert filter.field == Field.TITLE assert filter.value == "the hobbit" def test_parse_author_filter(self, parser: QueryParser) -> None: """Test parsing author field filter.""" result = parser.parse("author:tolkien") assert len(result.field_filters) == 1 filter = result.field_filters[0] assert filter.field == Field.AUTHOR assert filter.value == "tolkien" def test_parse_is_filter(self, parser: QueryParser) -> None: """Test parsing 'is' operator field filter.""" result = parser.parse("is:reading") assert len(result.field_filters) == 1 filter = result.field_filters[0] assert filter.field == Field.IS assert filter.value == IsOperatorValue.READING def test_parse_negated_filter(self, parser: QueryParser) -> None: """Test parsing negated field filter.""" result = parser.parse("-genre:romance") assert len(result.field_filters) == 1 filter = result.field_filters[0] assert filter.field == Field.GENRE assert filter.value == "romance" assert filter.negated is True def test_parse_multiple_filters(self, parser: QueryParser) -> None: """Test parsing multiple field filters.""" result = parser.parse("author:tolkien genre:fantasy") assert len(result.field_filters) == 2 author_filter = next(f for f in result.field_filters if f.field == Field.AUTHOR) assert author_filter.value == "tolkien" genre_filter = next(f for f in result.field_filters if f.field == Field.GENRE) assert genre_filter.value == "fantasy" def test_parse_mixed_filters_and_text(self, parser: QueryParser) -> None: """Test parsing mix of field filters and text terms.""" result = parser.parse('epic author:tolkien "middle earth"') assert "epic" in result.text_terms assert "middle earth" in result.text_terms assert len(result.field_filters) == 1 assert result.field_filters[0].field == Field.AUTHOR class TestComparisonOperators: """Test comparison operator parsing.""" @pytest.mark.parametrize( "operator_str,expected_operator", [ (">=", ComparisonOperator.GREATER_EQUAL), ("<=", ComparisonOperator.LESS_EQUAL), (">", ComparisonOperator.GREATER), ("<", ComparisonOperator.LESS), ("=", ComparisonOperator.EQUALS), ("!=", ComparisonOperator.NOT_EQUALS), (":", ComparisonOperator.EQUALS), # : defaults to equals ], ) def test_parse_comparison_operators( self, parser: QueryParser, operator_str: str, expected_operator: ComparisonOperator, ) -> None: """Test parsing all supported comparison operators.""" query = f"rating{operator_str}4" result = parser.parse(query) assert len(result.field_filters) == 1 filter = result.field_filters[0] assert filter.field == Field.RATING assert filter.operator == expected_operator assert filter.value == 4 def test_parse_date_comparison(self, parser: QueryParser) -> None: """Test parsing date comparison operators.""" result = parser.parse("added>=2026-03-15") assert len(result.field_filters) == 1 filter = result.field_filters[0] assert filter.field == Field.ADDED_DATE assert filter.operator == ComparisonOperator.GREATER_EQUAL assert filter.value == date(2026, 3, 15) def test_parse_numeric_comparison(self, parser: QueryParser) -> None: """Test parsing numeric comparison operators.""" result = parser.parse("shelf>2") assert len(result.field_filters) == 1 filter = result.field_filters[0] assert filter.field == Field.SHELF assert filter.operator == ComparisonOperator.GREATER assert filter.value == 2 class TestTypeConversion: """Test the _convert_value method for different field types.""" def test_convert_date_field_valid(self, parser: QueryParser) -> None: """Test converting valid date strings for date fields.""" result = _convert_value(Field.BOUGHT_DATE, "2026-03-15") assert result == date(2026, 3, 15) result = _convert_value(Field.READ_DATE, "2025-12-31") assert result == date(2025, 12, 31) result = _convert_value(Field.ADDED_DATE, "2024-01-01") assert result == date(2024, 1, 1) def test_convert_date_field_invalid(self, parser: QueryParser) -> None: """Test converting invalid date strings falls back to string.""" result = _convert_value(Field.BOUGHT_DATE, "invalid-date") assert result == "invalid-date" result = _convert_value(Field.READ_DATE, "2026-13-45") # Invalid month/day assert result == "2026-13-45" result = _convert_value(Field.ADDED_DATE, "not-a-date") assert result == "not-a-date" def test_convert_numeric_field_integers(self, parser: QueryParser) -> None: """Test converting integer strings for numeric fields.""" result = _convert_value(Field.RATING, "5") assert result == 5 assert isinstance(result, int) result = _convert_value(Field.SHELF, "10") assert result == 10 result = _convert_value(Field.YEAR, "2026") assert result == 2026 def test_convert_numeric_field_floats(self, parser: QueryParser) -> None: """Test converting float strings for numeric fields.""" result = _convert_value(Field.RATING, "4.5") assert result == pytest.approx(4.5) assert isinstance(result, float) result = _convert_value(Field.SHELF, "2.0") assert result == pytest.approx(2.0) def test_convert_numeric_field_invalid(self, parser: QueryParser) -> None: """Test converting invalid numeric strings falls back to string.""" result = _convert_value(Field.RATING, "not-a-number") assert result == "not-a-number" result = _convert_value(Field.SHELF, "abc") assert result == "abc" result = _convert_value(Field.YEAR, "twenty-twenty-six") assert result == "twenty-twenty-six" def test_convert_string_fields(self, parser: QueryParser) -> None: """Test converting values for string fields returns as-is.""" result = _convert_value(Field.TITLE, "The Hobbit") assert result == "The Hobbit" result = _convert_value(Field.AUTHOR, "Tolkien") assert result == "Tolkien" result = _convert_value(Field.GENRE, "Fantasy") assert result == "Fantasy" # Even things that look like dates/numbers should stay as strings for string fields result = _convert_value(Field.TITLE, "2026-03-15") assert result == "2026-03-15" assert isinstance(result, str) result = _convert_value(Field.AUTHOR, "123") assert result == "123" assert isinstance(result, str) def test_convert_is_operator(self, parser: QueryParser) -> None: """Test converting values for 'is' operator fields.""" result = _convert_value(Field.IS, "reading") assert result == IsOperatorValue.READING result = _convert_value(Field.IS, "dropped") assert result == IsOperatorValue.DROPPED result = _convert_value(Field.IS, "wished") assert result == IsOperatorValue.WISHED # Invalid value should return UNKNOWN result = _convert_value(Field.IS, "invalid-status") assert result == IsOperatorValue.UNKNOWN def test_convert_sort_field(self, parser: QueryParser) -> None: """Test converting values for 'sort' field.""" result = _convert_value(Field.SORT, "added") assert result == (Field.ADDED_DATE, SortDirection.ASC) result = _convert_value(Field.SORT, "added-desc") assert result == (Field.ADDED_DATE, SortDirection.DESC) # Invalid field or direction should fallback to a default value result = _convert_value(Field.SORT, "added-invalid") assert result == (Field.SORT, SortDirection.ASC) result = _convert_value(Field.SORT, "invalid-asc") assert result == (Field.SORT, SortDirection.ASC) class TestParsingEdgeCases: """Test edge cases and error handling in query parsing.""" def test_parse_invalid_field_name(self, parser: QueryParser) -> None: """Test parsing with invalid field names falls back to text search.""" result = parser.parse("invalid_field:value") # Should fall back to treating the whole thing as text assert len(result.text_terms) >= 1 or len(result.field_filters) == 0 def test_parse_mixed_quotes_and_operators(self, parser: QueryParser) -> None: """Test parsing complex queries with quotes and operators.""" result = parser.parse('title:"The Lord" author:tolkien rating>=4') # Should have both field filters title_filter = next( (f for f in result.field_filters if f.field == Field.TITLE), None ) author_filter = next( (f for f in result.field_filters if f.field == Field.AUTHOR), None ) rating_filter = next( (f for f in result.field_filters if f.field == Field.RATING), None ) assert title_filter is not None assert title_filter.value == "The Lord" assert author_filter is not None assert author_filter.value == "tolkien" assert rating_filter is not None assert rating_filter.value == 4 assert rating_filter.operator == ComparisonOperator.GREATER_EQUAL def test_parse_escaped_quotes(self, parser: QueryParser) -> None: """Test parsing strings with escaped quotes.""" result = parser.parse(r'title:"She said \"hello\""') if result.field_filters: # If parsing succeeds, check the escaped quote handling filter = result.field_filters[0] assert isinstance(filter.value, str) assert "hello" in filter.value # If parsing fails, it should fall back gracefully def test_parse_special_characters(self, parser: QueryParser) -> None: """Test parsing queries with special characters.""" result = parser.parse("title:C++ author:Stroustrup") # Should handle the + characters gracefully assert len(result.field_filters) >= 1 or len(result.text_terms) >= 1 def test_parse_very_long_query(self, parser: QueryParser) -> None: """Test parsing very long query strings.""" long_value = "a" * 1000 result = parser.parse(f"title:{long_value}") # Should handle long strings without crashing assert isinstance(result, SearchQuery) def test_parse_unicode_characters(self, parser: QueryParser) -> None: """Test parsing queries with unicode characters.""" result = parser.parse("title:Café author:José") # Should handle unicode gracefully assert isinstance(result, SearchQuery) def test_fallback_behavior_on_parse_error(self, parser: QueryParser) -> None: """Test that invalid syntax falls back to text search.""" # Construct a query that should cause parse errors invalid_queries = [ "(((", # Unmatched parentheses "field::", # Double colon ":", # Just a colon ">=<=", # Invalid operator combination ] for query in invalid_queries: result = parser.parse(query) # Should not crash and should return some kind of result assert isinstance(result, SearchQuery) # Most likely falls back to text terms assert len(result.text_terms) >= 1 or len(result.field_filters) == 0 class TestComplexQueries: """Test parsing of complex, real-world query examples.""" def test_parse_realistic_book_search(self, parser: QueryParser) -> None: """Test parsing realistic book search queries.""" result = parser.parse( 'author:tolkien genre:fantasy -genre:romance rating>=4 "middle earth"' ) # Should have multiple field filters and text terms assert len(result.field_filters) >= 3 assert "middle earth" in result.text_terms # Check specific filters tolkien_filter = next( (f for f in result.field_filters if f.field == Field.AUTHOR), None ) assert tolkien_filter is not None assert tolkien_filter.value == "tolkien" fantasy_filter = next( ( f for f in result.field_filters if f.field == Field.GENRE and not f.negated ), None, ) assert fantasy_filter is not None assert fantasy_filter.value == "fantasy" romance_filter = next( (f for f in result.field_filters if f.field == Field.GENRE and f.negated), None, ) assert romance_filter is not None assert romance_filter.value == "romance" assert romance_filter.negated is True def test_parse_location_and_date_filters(self, parser: QueryParser) -> None: """Test parsing location and date-based queries.""" result = parser.parse("place:home bookshelf:fantasy shelf>=2 added>=2026-01-01") assert len(result.field_filters) == 4 place_filter = next( (f for f in result.field_filters if f.field == Field.PLACE), None ) assert place_filter is not None assert place_filter.value == "home" shelf_filter = next( (f for f in result.field_filters if f.field == Field.SHELF), None ) assert shelf_filter is not None assert shelf_filter.value == 2 assert shelf_filter.operator == ComparisonOperator.GREATER_EQUAL added_filter = next( (f for f in result.field_filters if f.field == Field.ADDED_DATE), None ) assert added_filter is not None assert added_filter.value == date(2026, 1, 1) assert added_filter.operator == ComparisonOperator.GREATER_EQUAL def test_parse_mixed_types_comprehensive(self, parser: QueryParser) -> None: """Test parsing query with all major field types.""" query = 'title:"Complex Book" author:Author year=2020 rating>=4 bought<=2025-12-31 -genre:boring epic adventure' result = parser.parse(query) # Should have a good mix of field filters and text terms assert len(result.field_filters) >= 5 assert len(result.text_terms) >= 2 # Verify we got the expected mix of string, numeric, and date fields field_types = {f.field for f in result.field_filters} assert Field.TITLE in field_types assert Field.AUTHOR in field_types assert Field.YEAR in field_types assert Field.RATING in field_types assert Field.BOUGHT_DATE in field_types assert Field.GENRE in field_types