410 lines
16 KiB
Python
410 lines
16 KiB
Python
"""
|
|
Query parser tests for HXBooks search functionality.
|
|
|
|
Tests the QueryParser class methods for type conversion, operator parsing,
|
|
field filters, and edge case handling.
|
|
"""
|
|
|
|
from datetime import date
|
|
|
|
import pytest
|
|
|
|
from hxbooks.search import (
|
|
ComparisonOperator,
|
|
Field,
|
|
QueryParser,
|
|
SearchQuery,
|
|
_convert_value, # noqa: PLC2701
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def parser() -> QueryParser:
|
|
"""Create a QueryParser instance for testing."""
|
|
return QueryParser()
|
|
|
|
|
|
class TestQueryParser:
|
|
"""Test the QueryParser class functionality."""
|
|
|
|
def test_parse_empty_query(self, parser: QueryParser) -> None:
|
|
"""Test parsing an empty query string."""
|
|
result = parser.parse("")
|
|
assert result.text_terms == []
|
|
assert result.field_filters == []
|
|
|
|
def test_parse_whitespace_only(self, parser: QueryParser) -> None:
|
|
"""Test parsing a query with only whitespace."""
|
|
result = parser.parse(" \t\n ")
|
|
assert result.text_terms == []
|
|
assert result.field_filters == []
|
|
|
|
def test_parse_simple_text_terms(self, parser: QueryParser) -> None:
|
|
"""Test parsing simple text search terms."""
|
|
result = parser.parse("hobbit tolkien")
|
|
assert result.text_terms == ["hobbit", "tolkien"]
|
|
assert result.field_filters == []
|
|
|
|
def test_parse_quoted_text_terms(self, parser: QueryParser) -> None:
|
|
"""Test parsing quoted text search terms."""
|
|
result = parser.parse('"the hobbit" tolkien')
|
|
assert result.text_terms == ["the hobbit", "tolkien"]
|
|
assert result.field_filters == []
|
|
|
|
def test_parse_quoted_text_with_spaces(self, parser: QueryParser) -> None:
|
|
"""Test parsing quoted text containing multiple spaces."""
|
|
result = parser.parse('"lord of the rings"')
|
|
assert result.text_terms == ["lord of the rings"]
|
|
assert result.field_filters == []
|
|
|
|
|
|
class TestFieldFilters:
|
|
"""Test field filter parsing."""
|
|
|
|
def test_parse_title_filter(self, parser: QueryParser) -> None:
|
|
"""Test parsing title field filter."""
|
|
result = parser.parse("title:hobbit")
|
|
assert len(result.field_filters) == 1
|
|
filter = result.field_filters[0]
|
|
assert filter.field == Field.TITLE
|
|
assert filter.operator == ComparisonOperator.EQUALS
|
|
assert filter.value == "hobbit"
|
|
assert filter.negated is False
|
|
|
|
def test_parse_quoted_title_filter(self, parser: QueryParser) -> None:
|
|
"""Test parsing quoted title field filter."""
|
|
result = parser.parse('title:"the hobbit"')
|
|
assert len(result.field_filters) == 1
|
|
filter = result.field_filters[0]
|
|
assert filter.field == Field.TITLE
|
|
assert filter.value == "the hobbit"
|
|
|
|
def test_parse_author_filter(self, parser: QueryParser) -> None:
|
|
"""Test parsing author field filter."""
|
|
result = parser.parse("author:tolkien")
|
|
assert len(result.field_filters) == 1
|
|
filter = result.field_filters[0]
|
|
assert filter.field == Field.AUTHOR
|
|
assert filter.value == "tolkien"
|
|
|
|
def test_parse_negated_filter(self, parser: QueryParser) -> None:
|
|
"""Test parsing negated field filter."""
|
|
result = parser.parse("-genre:romance")
|
|
assert len(result.field_filters) == 1
|
|
filter = result.field_filters[0]
|
|
assert filter.field == Field.GENRE
|
|
assert filter.value == "romance"
|
|
assert filter.negated is True
|
|
|
|
def test_parse_multiple_filters(self, parser: QueryParser) -> None:
|
|
"""Test parsing multiple field filters."""
|
|
result = parser.parse("author:tolkien genre:fantasy")
|
|
assert len(result.field_filters) == 2
|
|
|
|
author_filter = next(f for f in result.field_filters if f.field == Field.AUTHOR)
|
|
assert author_filter.value == "tolkien"
|
|
|
|
genre_filter = next(f for f in result.field_filters if f.field == Field.GENRE)
|
|
assert genre_filter.value == "fantasy"
|
|
|
|
def test_parse_mixed_filters_and_text(self, parser: QueryParser) -> None:
|
|
"""Test parsing mix of field filters and text terms."""
|
|
result = parser.parse('epic author:tolkien "middle earth"')
|
|
assert "epic" in result.text_terms
|
|
assert "middle earth" in result.text_terms
|
|
assert len(result.field_filters) == 1
|
|
assert result.field_filters[0].field == Field.AUTHOR
|
|
|
|
|
|
class TestComparisonOperators:
|
|
"""Test comparison operator parsing."""
|
|
|
|
@pytest.mark.parametrize(
|
|
"operator_str,expected_operator",
|
|
[
|
|
(">=", ComparisonOperator.GREATER_EQUAL),
|
|
("<=", ComparisonOperator.LESS_EQUAL),
|
|
(">", ComparisonOperator.GREATER),
|
|
("<", ComparisonOperator.LESS),
|
|
("=", ComparisonOperator.EQUALS),
|
|
("!=", ComparisonOperator.NOT_EQUALS),
|
|
(":", ComparisonOperator.EQUALS), # : defaults to equals
|
|
],
|
|
)
|
|
def test_parse_comparison_operators(
|
|
self,
|
|
parser: QueryParser,
|
|
operator_str: str,
|
|
expected_operator: ComparisonOperator,
|
|
) -> None:
|
|
"""Test parsing all supported comparison operators."""
|
|
query = f"rating{operator_str}4"
|
|
result = parser.parse(query)
|
|
|
|
assert len(result.field_filters) == 1
|
|
filter = result.field_filters[0]
|
|
assert filter.field == Field.RATING
|
|
assert filter.operator == expected_operator
|
|
assert filter.value == 4
|
|
|
|
def test_parse_date_comparison(self, parser: QueryParser) -> None:
|
|
"""Test parsing date comparison operators."""
|
|
result = parser.parse("added>=2026-03-15")
|
|
assert len(result.field_filters) == 1
|
|
filter = result.field_filters[0]
|
|
assert filter.field == Field.ADDED_DATE
|
|
assert filter.operator == ComparisonOperator.GREATER_EQUAL
|
|
assert filter.value == date(2026, 3, 15)
|
|
|
|
def test_parse_numeric_comparison(self, parser: QueryParser) -> None:
|
|
"""Test parsing numeric comparison operators."""
|
|
result = parser.parse("shelf>2")
|
|
assert len(result.field_filters) == 1
|
|
filter = result.field_filters[0]
|
|
assert filter.field == Field.SHELF
|
|
assert filter.operator == ComparisonOperator.GREATER
|
|
assert filter.value == 2
|
|
|
|
|
|
class TestTypeConversion:
|
|
"""Test the _convert_value method for different field types."""
|
|
|
|
def test_convert_date_field_valid(self, parser: QueryParser) -> None:
|
|
"""Test converting valid date strings for date fields."""
|
|
result = _convert_value(Field.BOUGHT_DATE, "2026-03-15")
|
|
assert result == date(2026, 3, 15)
|
|
|
|
result = _convert_value(Field.READ_DATE, "2025-12-31")
|
|
assert result == date(2025, 12, 31)
|
|
|
|
result = _convert_value(Field.ADDED_DATE, "2024-01-01")
|
|
assert result == date(2024, 1, 1)
|
|
|
|
def test_convert_date_field_invalid(self, parser: QueryParser) -> None:
|
|
"""Test converting invalid date strings falls back to string."""
|
|
result = _convert_value(Field.BOUGHT_DATE, "invalid-date")
|
|
assert result == "invalid-date"
|
|
|
|
result = _convert_value(Field.READ_DATE, "2026-13-45") # Invalid month/day
|
|
assert result == "2026-13-45"
|
|
|
|
result = _convert_value(Field.ADDED_DATE, "not-a-date")
|
|
assert result == "not-a-date"
|
|
|
|
def test_convert_numeric_field_integers(self, parser: QueryParser) -> None:
|
|
"""Test converting integer strings for numeric fields."""
|
|
result = _convert_value(Field.RATING, "5")
|
|
assert result == 5
|
|
assert isinstance(result, int)
|
|
|
|
result = _convert_value(Field.SHELF, "10")
|
|
assert result == 10
|
|
|
|
result = _convert_value(Field.YEAR, "2026")
|
|
assert result == 2026
|
|
|
|
def test_convert_numeric_field_floats(self, parser: QueryParser) -> None:
|
|
"""Test converting float strings for numeric fields."""
|
|
result = _convert_value(Field.RATING, "4.5")
|
|
assert result == pytest.approx(4.5)
|
|
assert isinstance(result, float)
|
|
|
|
result = _convert_value(Field.SHELF, "2.0")
|
|
assert result == pytest.approx(2.0)
|
|
|
|
def test_convert_numeric_field_invalid(self, parser: QueryParser) -> None:
|
|
"""Test converting invalid numeric strings falls back to string."""
|
|
result = _convert_value(Field.RATING, "not-a-number")
|
|
assert result == "not-a-number"
|
|
|
|
result = _convert_value(Field.SHELF, "abc")
|
|
assert result == "abc"
|
|
|
|
result = _convert_value(Field.YEAR, "twenty-twenty-six")
|
|
assert result == "twenty-twenty-six"
|
|
|
|
def test_convert_string_fields(self, parser: QueryParser) -> None:
|
|
"""Test converting values for string fields returns as-is."""
|
|
result = _convert_value(Field.TITLE, "The Hobbit")
|
|
assert result == "The Hobbit"
|
|
|
|
result = _convert_value(Field.AUTHOR, "Tolkien")
|
|
assert result == "Tolkien"
|
|
|
|
result = _convert_value(Field.GENRE, "Fantasy")
|
|
assert result == "Fantasy"
|
|
|
|
# Even things that look like dates/numbers should stay as strings for string fields
|
|
result = _convert_value(Field.TITLE, "2026-03-15")
|
|
assert result == "2026-03-15"
|
|
assert isinstance(result, str)
|
|
|
|
result = _convert_value(Field.AUTHOR, "123")
|
|
assert result == "123"
|
|
assert isinstance(result, str)
|
|
|
|
|
|
class TestParsingEdgeCases:
|
|
"""Test edge cases and error handling in query parsing."""
|
|
|
|
def test_parse_invalid_field_name(self, parser: QueryParser) -> None:
|
|
"""Test parsing with invalid field names falls back to text search."""
|
|
result = parser.parse("invalid_field:value")
|
|
# Should fall back to treating the whole thing as text
|
|
assert len(result.text_terms) >= 1 or len(result.field_filters) == 0
|
|
|
|
def test_parse_mixed_quotes_and_operators(self, parser: QueryParser) -> None:
|
|
"""Test parsing complex queries with quotes and operators."""
|
|
result = parser.parse('title:"The Lord" author:tolkien rating>=4')
|
|
|
|
# Should have both field filters
|
|
title_filter = next(
|
|
(f for f in result.field_filters if f.field == Field.TITLE), None
|
|
)
|
|
author_filter = next(
|
|
(f for f in result.field_filters if f.field == Field.AUTHOR), None
|
|
)
|
|
rating_filter = next(
|
|
(f for f in result.field_filters if f.field == Field.RATING), None
|
|
)
|
|
|
|
assert title_filter is not None
|
|
assert title_filter.value == "The Lord"
|
|
|
|
assert author_filter is not None
|
|
assert author_filter.value == "tolkien"
|
|
|
|
assert rating_filter is not None
|
|
assert rating_filter.value == 4
|
|
assert rating_filter.operator == ComparisonOperator.GREATER_EQUAL
|
|
|
|
def test_parse_escaped_quotes(self, parser: QueryParser) -> None:
|
|
"""Test parsing strings with escaped quotes."""
|
|
result = parser.parse(r'title:"She said \"hello\""')
|
|
if result.field_filters:
|
|
# If parsing succeeds, check the escaped quote handling
|
|
filter = result.field_filters[0]
|
|
assert isinstance(filter.value, str)
|
|
assert "hello" in filter.value
|
|
# If parsing fails, it should fall back gracefully
|
|
|
|
def test_parse_special_characters(self, parser: QueryParser) -> None:
|
|
"""Test parsing queries with special characters."""
|
|
result = parser.parse("title:C++ author:Stroustrup")
|
|
# Should handle the + characters gracefully
|
|
assert len(result.field_filters) >= 1 or len(result.text_terms) >= 1
|
|
|
|
def test_parse_very_long_query(self, parser: QueryParser) -> None:
|
|
"""Test parsing very long query strings."""
|
|
long_value = "a" * 1000
|
|
result = parser.parse(f"title:{long_value}")
|
|
# Should handle long strings without crashing
|
|
assert isinstance(result, SearchQuery)
|
|
|
|
def test_parse_unicode_characters(self, parser: QueryParser) -> None:
|
|
"""Test parsing queries with unicode characters."""
|
|
result = parser.parse("title:Café author:José")
|
|
# Should handle unicode gracefully
|
|
assert isinstance(result, SearchQuery)
|
|
|
|
def test_fallback_behavior_on_parse_error(self, parser: QueryParser) -> None:
|
|
"""Test that invalid syntax falls back to text search."""
|
|
# Construct a query that should cause parse errors
|
|
invalid_queries = [
|
|
"(((", # Unmatched parentheses
|
|
"field::", # Double colon
|
|
":", # Just a colon
|
|
">=<=", # Invalid operator combination
|
|
]
|
|
|
|
for query in invalid_queries:
|
|
result = parser.parse(query)
|
|
# Should not crash and should return some kind of result
|
|
assert isinstance(result, SearchQuery)
|
|
# Most likely falls back to text terms
|
|
assert len(result.text_terms) >= 1 or len(result.field_filters) == 0
|
|
|
|
|
|
class TestComplexQueries:
|
|
"""Test parsing of complex, real-world query examples."""
|
|
|
|
def test_parse_realistic_book_search(self, parser: QueryParser) -> None:
|
|
"""Test parsing realistic book search queries."""
|
|
result = parser.parse(
|
|
'author:tolkien genre:fantasy -genre:romance rating>=4 "middle earth"'
|
|
)
|
|
|
|
# Should have multiple field filters and text terms
|
|
assert len(result.field_filters) >= 3
|
|
assert "middle earth" in result.text_terms
|
|
|
|
# Check specific filters
|
|
tolkien_filter = next(
|
|
(f for f in result.field_filters if f.field == Field.AUTHOR), None
|
|
)
|
|
assert tolkien_filter is not None
|
|
assert tolkien_filter.value == "tolkien"
|
|
|
|
fantasy_filter = next(
|
|
(
|
|
f
|
|
for f in result.field_filters
|
|
if f.field == Field.GENRE and not f.negated
|
|
),
|
|
None,
|
|
)
|
|
assert fantasy_filter is not None
|
|
assert fantasy_filter.value == "fantasy"
|
|
|
|
romance_filter = next(
|
|
(f for f in result.field_filters if f.field == Field.GENRE and f.negated),
|
|
None,
|
|
)
|
|
assert romance_filter is not None
|
|
assert romance_filter.value == "romance"
|
|
assert romance_filter.negated is True
|
|
|
|
def test_parse_location_and_date_filters(self, parser: QueryParser) -> None:
|
|
"""Test parsing location and date-based queries."""
|
|
result = parser.parse("place:home bookshelf:fantasy shelf>=2 added>=2026-01-01")
|
|
|
|
assert len(result.field_filters) == 4
|
|
|
|
place_filter = next(
|
|
(f for f in result.field_filters if f.field == Field.PLACE), None
|
|
)
|
|
assert place_filter is not None
|
|
assert place_filter.value == "home"
|
|
|
|
shelf_filter = next(
|
|
(f for f in result.field_filters if f.field == Field.SHELF), None
|
|
)
|
|
assert shelf_filter is not None
|
|
assert shelf_filter.value == 2
|
|
assert shelf_filter.operator == ComparisonOperator.GREATER_EQUAL
|
|
|
|
added_filter = next(
|
|
(f for f in result.field_filters if f.field == Field.ADDED_DATE), None
|
|
)
|
|
assert added_filter is not None
|
|
assert added_filter.value == date(2026, 1, 1)
|
|
assert added_filter.operator == ComparisonOperator.GREATER_EQUAL
|
|
|
|
def test_parse_mixed_types_comprehensive(self, parser: QueryParser) -> None:
|
|
"""Test parsing query with all major field types."""
|
|
query = 'title:"Complex Book" author:Author year=2020 rating>=4 bought<=2025-12-31 -genre:boring epic adventure'
|
|
result = parser.parse(query)
|
|
|
|
# Should have a good mix of field filters and text terms
|
|
assert len(result.field_filters) >= 5
|
|
assert len(result.text_terms) >= 2
|
|
|
|
# Verify we got the expected mix of string, numeric, and date fields
|
|
field_types = {f.field for f in result.field_filters}
|
|
assert Field.TITLE in field_types
|
|
assert Field.AUTHOR in field_types
|
|
assert Field.YEAR in field_types
|
|
assert Field.RATING in field_types
|
|
assert Field.BOUGHT_DATE in field_types
|
|
assert Field.GENRE in field_types
|