Files
hxbooks/tests/test_search.py

410 lines
16 KiB
Python

"""
Query parser tests for HXBooks search functionality.
Tests the QueryParser class methods for type conversion, operator parsing,
field filters, and edge case handling.
"""
from datetime import date
import pytest
from hxbooks.search import (
ComparisonOperator,
Field,
QueryParser,
SearchQuery,
_convert_value, # noqa: PLC2701
)
@pytest.fixture
def parser() -> QueryParser:
"""Create a QueryParser instance for testing."""
return QueryParser()
class TestQueryParser:
"""Test the QueryParser class functionality."""
def test_parse_empty_query(self, parser: QueryParser) -> None:
"""Test parsing an empty query string."""
result = parser.parse("")
assert result.text_terms == []
assert result.field_filters == []
def test_parse_whitespace_only(self, parser: QueryParser) -> None:
"""Test parsing a query with only whitespace."""
result = parser.parse(" \t\n ")
assert result.text_terms == []
assert result.field_filters == []
def test_parse_simple_text_terms(self, parser: QueryParser) -> None:
"""Test parsing simple text search terms."""
result = parser.parse("hobbit tolkien")
assert result.text_terms == ["hobbit", "tolkien"]
assert result.field_filters == []
def test_parse_quoted_text_terms(self, parser: QueryParser) -> None:
"""Test parsing quoted text search terms."""
result = parser.parse('"the hobbit" tolkien')
assert result.text_terms == ["the hobbit", "tolkien"]
assert result.field_filters == []
def test_parse_quoted_text_with_spaces(self, parser: QueryParser) -> None:
"""Test parsing quoted text containing multiple spaces."""
result = parser.parse('"lord of the rings"')
assert result.text_terms == ["lord of the rings"]
assert result.field_filters == []
class TestFieldFilters:
"""Test field filter parsing."""
def test_parse_title_filter(self, parser: QueryParser) -> None:
"""Test parsing title field filter."""
result = parser.parse("title:hobbit")
assert len(result.field_filters) == 1
filter = result.field_filters[0]
assert filter.field == Field.TITLE
assert filter.operator == ComparisonOperator.EQUALS
assert filter.value == "hobbit"
assert filter.negated is False
def test_parse_quoted_title_filter(self, parser: QueryParser) -> None:
"""Test parsing quoted title field filter."""
result = parser.parse('title:"the hobbit"')
assert len(result.field_filters) == 1
filter = result.field_filters[0]
assert filter.field == Field.TITLE
assert filter.value == "the hobbit"
def test_parse_author_filter(self, parser: QueryParser) -> None:
"""Test parsing author field filter."""
result = parser.parse("author:tolkien")
assert len(result.field_filters) == 1
filter = result.field_filters[0]
assert filter.field == Field.AUTHOR
assert filter.value == "tolkien"
def test_parse_negated_filter(self, parser: QueryParser) -> None:
"""Test parsing negated field filter."""
result = parser.parse("-genre:romance")
assert len(result.field_filters) == 1
filter = result.field_filters[0]
assert filter.field == Field.GENRE
assert filter.value == "romance"
assert filter.negated is True
def test_parse_multiple_filters(self, parser: QueryParser) -> None:
"""Test parsing multiple field filters."""
result = parser.parse("author:tolkien genre:fantasy")
assert len(result.field_filters) == 2
author_filter = next(f for f in result.field_filters if f.field == Field.AUTHOR)
assert author_filter.value == "tolkien"
genre_filter = next(f for f in result.field_filters if f.field == Field.GENRE)
assert genre_filter.value == "fantasy"
def test_parse_mixed_filters_and_text(self, parser: QueryParser) -> None:
"""Test parsing mix of field filters and text terms."""
result = parser.parse('epic author:tolkien "middle earth"')
assert "epic" in result.text_terms
assert "middle earth" in result.text_terms
assert len(result.field_filters) == 1
assert result.field_filters[0].field == Field.AUTHOR
class TestComparisonOperators:
"""Test comparison operator parsing."""
@pytest.mark.parametrize(
"operator_str,expected_operator",
[
(">=", ComparisonOperator.GREATER_EQUAL),
("<=", ComparisonOperator.LESS_EQUAL),
(">", ComparisonOperator.GREATER),
("<", ComparisonOperator.LESS),
("=", ComparisonOperator.EQUALS),
("!=", ComparisonOperator.NOT_EQUALS),
(":", ComparisonOperator.EQUALS), # : defaults to equals
],
)
def test_parse_comparison_operators(
self,
parser: QueryParser,
operator_str: str,
expected_operator: ComparisonOperator,
) -> None:
"""Test parsing all supported comparison operators."""
query = f"rating{operator_str}4"
result = parser.parse(query)
assert len(result.field_filters) == 1
filter = result.field_filters[0]
assert filter.field == Field.RATING
assert filter.operator == expected_operator
assert filter.value == 4
def test_parse_date_comparison(self, parser: QueryParser) -> None:
"""Test parsing date comparison operators."""
result = parser.parse("added>=2026-03-15")
assert len(result.field_filters) == 1
filter = result.field_filters[0]
assert filter.field == Field.ADDED_DATE
assert filter.operator == ComparisonOperator.GREATER_EQUAL
assert filter.value == date(2026, 3, 15)
def test_parse_numeric_comparison(self, parser: QueryParser) -> None:
"""Test parsing numeric comparison operators."""
result = parser.parse("shelf>2")
assert len(result.field_filters) == 1
filter = result.field_filters[0]
assert filter.field == Field.SHELF
assert filter.operator == ComparisonOperator.GREATER
assert filter.value == 2
class TestTypeConversion:
"""Test the _convert_value method for different field types."""
def test_convert_date_field_valid(self, parser: QueryParser) -> None:
"""Test converting valid date strings for date fields."""
result = _convert_value(Field.BOUGHT_DATE, "2026-03-15")
assert result == date(2026, 3, 15)
result = _convert_value(Field.READ_DATE, "2025-12-31")
assert result == date(2025, 12, 31)
result = _convert_value(Field.ADDED_DATE, "2024-01-01")
assert result == date(2024, 1, 1)
def test_convert_date_field_invalid(self, parser: QueryParser) -> None:
"""Test converting invalid date strings falls back to string."""
result = _convert_value(Field.BOUGHT_DATE, "invalid-date")
assert result == "invalid-date"
result = _convert_value(Field.READ_DATE, "2026-13-45") # Invalid month/day
assert result == "2026-13-45"
result = _convert_value(Field.ADDED_DATE, "not-a-date")
assert result == "not-a-date"
def test_convert_numeric_field_integers(self, parser: QueryParser) -> None:
"""Test converting integer strings for numeric fields."""
result = _convert_value(Field.RATING, "5")
assert result == 5
assert isinstance(result, int)
result = _convert_value(Field.SHELF, "10")
assert result == 10
result = _convert_value(Field.YEAR, "2026")
assert result == 2026
def test_convert_numeric_field_floats(self, parser: QueryParser) -> None:
"""Test converting float strings for numeric fields."""
result = _convert_value(Field.RATING, "4.5")
assert result == pytest.approx(4.5)
assert isinstance(result, float)
result = _convert_value(Field.SHELF, "2.0")
assert result == pytest.approx(2.0)
def test_convert_numeric_field_invalid(self, parser: QueryParser) -> None:
"""Test converting invalid numeric strings falls back to string."""
result = _convert_value(Field.RATING, "not-a-number")
assert result == "not-a-number"
result = _convert_value(Field.SHELF, "abc")
assert result == "abc"
result = _convert_value(Field.YEAR, "twenty-twenty-six")
assert result == "twenty-twenty-six"
def test_convert_string_fields(self, parser: QueryParser) -> None:
"""Test converting values for string fields returns as-is."""
result = _convert_value(Field.TITLE, "The Hobbit")
assert result == "The Hobbit"
result = _convert_value(Field.AUTHOR, "Tolkien")
assert result == "Tolkien"
result = _convert_value(Field.GENRE, "Fantasy")
assert result == "Fantasy"
# Even things that look like dates/numbers should stay as strings for string fields
result = _convert_value(Field.TITLE, "2026-03-15")
assert result == "2026-03-15"
assert isinstance(result, str)
result = _convert_value(Field.AUTHOR, "123")
assert result == "123"
assert isinstance(result, str)
class TestParsingEdgeCases:
"""Test edge cases and error handling in query parsing."""
def test_parse_invalid_field_name(self, parser: QueryParser) -> None:
"""Test parsing with invalid field names falls back to text search."""
result = parser.parse("invalid_field:value")
# Should fall back to treating the whole thing as text
assert len(result.text_terms) >= 1 or len(result.field_filters) == 0
def test_parse_mixed_quotes_and_operators(self, parser: QueryParser) -> None:
"""Test parsing complex queries with quotes and operators."""
result = parser.parse('title:"The Lord" author:tolkien rating>=4')
# Should have both field filters
title_filter = next(
(f for f in result.field_filters if f.field == Field.TITLE), None
)
author_filter = next(
(f for f in result.field_filters if f.field == Field.AUTHOR), None
)
rating_filter = next(
(f for f in result.field_filters if f.field == Field.RATING), None
)
assert title_filter is not None
assert title_filter.value == "The Lord"
assert author_filter is not None
assert author_filter.value == "tolkien"
assert rating_filter is not None
assert rating_filter.value == 4
assert rating_filter.operator == ComparisonOperator.GREATER_EQUAL
def test_parse_escaped_quotes(self, parser: QueryParser) -> None:
"""Test parsing strings with escaped quotes."""
result = parser.parse(r'title:"She said \"hello\""')
if result.field_filters:
# If parsing succeeds, check the escaped quote handling
filter = result.field_filters[0]
assert isinstance(filter.value, str)
assert "hello" in filter.value
# If parsing fails, it should fall back gracefully
def test_parse_special_characters(self, parser: QueryParser) -> None:
"""Test parsing queries with special characters."""
result = parser.parse("title:C++ author:Stroustrup")
# Should handle the + characters gracefully
assert len(result.field_filters) >= 1 or len(result.text_terms) >= 1
def test_parse_very_long_query(self, parser: QueryParser) -> None:
"""Test parsing very long query strings."""
long_value = "a" * 1000
result = parser.parse(f"title:{long_value}")
# Should handle long strings without crashing
assert isinstance(result, SearchQuery)
def test_parse_unicode_characters(self, parser: QueryParser) -> None:
"""Test parsing queries with unicode characters."""
result = parser.parse("title:Café author:José")
# Should handle unicode gracefully
assert isinstance(result, SearchQuery)
def test_fallback_behavior_on_parse_error(self, parser: QueryParser) -> None:
"""Test that invalid syntax falls back to text search."""
# Construct a query that should cause parse errors
invalid_queries = [
"(((", # Unmatched parentheses
"field::", # Double colon
":", # Just a colon
">=<=", # Invalid operator combination
]
for query in invalid_queries:
result = parser.parse(query)
# Should not crash and should return some kind of result
assert isinstance(result, SearchQuery)
# Most likely falls back to text terms
assert len(result.text_terms) >= 1 or len(result.field_filters) == 0
class TestComplexQueries:
"""Test parsing of complex, real-world query examples."""
def test_parse_realistic_book_search(self, parser: QueryParser) -> None:
"""Test parsing realistic book search queries."""
result = parser.parse(
'author:tolkien genre:fantasy -genre:romance rating>=4 "middle earth"'
)
# Should have multiple field filters and text terms
assert len(result.field_filters) >= 3
assert "middle earth" in result.text_terms
# Check specific filters
tolkien_filter = next(
(f for f in result.field_filters if f.field == Field.AUTHOR), None
)
assert tolkien_filter is not None
assert tolkien_filter.value == "tolkien"
fantasy_filter = next(
(
f
for f in result.field_filters
if f.field == Field.GENRE and not f.negated
),
None,
)
assert fantasy_filter is not None
assert fantasy_filter.value == "fantasy"
romance_filter = next(
(f for f in result.field_filters if f.field == Field.GENRE and f.negated),
None,
)
assert romance_filter is not None
assert romance_filter.value == "romance"
assert romance_filter.negated is True
def test_parse_location_and_date_filters(self, parser: QueryParser) -> None:
"""Test parsing location and date-based queries."""
result = parser.parse("place:home bookshelf:fantasy shelf>=2 added>=2026-01-01")
assert len(result.field_filters) == 4
place_filter = next(
(f for f in result.field_filters if f.field == Field.PLACE), None
)
assert place_filter is not None
assert place_filter.value == "home"
shelf_filter = next(
(f for f in result.field_filters if f.field == Field.SHELF), None
)
assert shelf_filter is not None
assert shelf_filter.value == 2
assert shelf_filter.operator == ComparisonOperator.GREATER_EQUAL
added_filter = next(
(f for f in result.field_filters if f.field == Field.ADDED_DATE), None
)
assert added_filter is not None
assert added_filter.value == date(2026, 1, 1)
assert added_filter.operator == ComparisonOperator.GREATER_EQUAL
def test_parse_mixed_types_comprehensive(self, parser: QueryParser) -> None:
"""Test parsing query with all major field types."""
query = 'title:"Complex Book" author:Author year=2020 rating>=4 bought<=2025-12-31 -genre:boring epic adventure'
result = parser.parse(query)
# Should have a good mix of field filters and text terms
assert len(result.field_filters) >= 5
assert len(result.text_terms) >= 2
# Verify we got the expected mix of string, numeric, and date fields
field_types = {f.field for f in result.field_filters}
assert Field.TITLE in field_types
assert Field.AUTHOR in field_types
assert Field.YEAR in field_types
assert Field.RATING in field_types
assert Field.BOUGHT_DATE in field_types
assert Field.GENRE in field_types