diff --git a/README.md b/README.md index 2e3d505..b895595 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Choose from multiple Redis deployment options: 2. [Redis on Docker](https://hub.docker.com/_/redis): Docker image for development ```bash - docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest + docker run -d --name redis -p 6379:6379 -p 8001:8001 redis:latest ``` 3. [Redis Enterprise](https://redis.io/enterprise/): Commercial, self-hosted database @@ -201,25 +201,43 @@ Define queries and perform advanced searches over your indices, including the co List> results = index.query(filteredQuery); ``` -- **HybridQuery** - Combines text and vector search with weighted scoring: +- **HybridQuery** - Native hybrid search combining text and vector similarity using Redis 8.4+ `FT.HYBRID` command with built-in score fusion: ```java import com.redis.vl.query.HybridQuery; import com.redis.vl.query.Filter; - // Hybrid search: text + vector with alpha weighting + // Native hybrid search with LINEAR combination (Redis 8.4+) HybridQuery hybridQuery = HybridQuery.builder() .text("machine learning algorithms") .textFieldName("description") .vector(queryVector) .vectorFieldName("embedding") .filterExpression(Filter.tag("category", "AI")) - .alpha(0.7f) // 70% vector, 30% text + .combinationMethod(HybridQuery.CombinationMethod.LINEAR) + .linearAlpha(0.3f) // 30% text, 70% vector .numResults(10) .build(); List> results = index.query(hybridQuery); - // Results scored by: alpha * vector_similarity + (1-alpha) * text_score + // Automatically falls back to AggregateHybridQuery on older Redis versions + ``` + +- **AggregateHybridQuery** - Backward-compatible hybrid search using `FT.AGGREGATE` for Redis versions before 8.4: + + ```java + import com.redis.vl.query.AggregateHybridQuery; + + AggregateHybridQuery aggQuery = AggregateHybridQuery.builder() + .text("machine learning algorithms") + .textFieldName("description") + .vector(queryVector) + .vectorFieldName("embedding") + .alpha(0.7f) // 70% vector, 30% text + .numResults(10) + .build(); + + List> results = index.query(aggQuery); ``` - **VectorRangeQuery** - Vector search within a defined range paired with customizable filters @@ -479,7 +497,7 @@ String response = vcrChat.call("What is Redis?"); ### How It Works -1. **Container Management**: VCR starts a Redis Stack container with persistence +1. **Container Management**: VCR starts a Redis container with persistence 2. **Model Wrapping**: `@VCRModel` fields are wrapped with VCR proxies 3. **Cassette Storage**: Responses stored as Redis JSON documents 4. **Persistence**: Data saved to `src/test/resources/vcr-data/` via Redis AOF/RDB @@ -515,6 +533,7 @@ Check out the [notebooks](notebooks/) directory for interactive Jupyter notebook - [Getting Started](notebooks/01_getting_started.ipynb) - Introduction to RedisVL basics - [Hybrid Queries](notebooks/02_hybrid_queries.ipynb) - Combining vector and metadata search +- [Advanced Queries](notebooks/11_advanced_queries.ipynb) - TextQuery, HybridQuery, and MultiVectorQuery - [LLM Cache](notebooks/03_llmcache.ipynb) - Semantic caching for LLMs - [Hash vs JSON Storage](notebooks/05_hash_vs_json.ipynb) - Storage type comparison - [Vectorizers](notebooks/06_vectorizers.ipynb) - Working with embedding models @@ -559,7 +578,8 @@ Please help us by contributing PRs, opening GitHub issues for bugs or new featur ## Requirements - Java 17+ -- Redis Stack 7.2+ or Redis with RediSearch module +- Redis 8.0+ (includes built-in search and vector capabilities) +- For native `FT.HYBRID` support: Redis 8.4+ ## License diff --git a/build.gradle.kts b/build.gradle.kts index 40341b6..913e19d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -148,7 +148,7 @@ tasks.wrapper { // Task to copy jar to notebooks directory for Jupyter tasks.register("copyJarToNotebooks") { dependsOn(":core:jar") - from("core/build/libs/redisvl-0.12.2.jar") + from(project(":core").tasks.named("jar").map { it.archiveFile }) into("notebooks") } diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 86bf8f9..c77b71c 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -8,8 +8,8 @@ plugins { description = "RedisVL - Vector Library for Java" dependencies { - // Redis client - upgraded to 7.2.0 for new RedisClient/RedisSentinelClient API - api("redis.clients:jedis:7.2.0") + // Redis client - upgraded to 7.3.0 for native FT.HYBRID support + api("redis.clients:jedis:7.3.0") // JSON processing implementation("com.fasterxml.jackson.core:jackson-databind:2.18.2") diff --git a/core/src/main/java/com/redis/vl/extensions/cache/BaseCache.java b/core/src/main/java/com/redis/vl/extensions/cache/BaseCache.java index f862dc8..5c32c21 100644 --- a/core/src/main/java/com/redis/vl/extensions/cache/BaseCache.java +++ b/core/src/main/java/com/redis/vl/extensions/cache/BaseCache.java @@ -189,7 +189,7 @@ protected void setWithTtl(String key, String value, Integer ttl) { protected void setWithTtl(byte[] key, byte[] value, Integer ttl) { if (ttl != null || this.ttl != null) { Integer effectiveTtl = ttl != null ? ttl : this.ttl; - redisClient.setex(key, effectiveTtl, value); + redisClient.set(key, value, new redis.clients.jedis.params.SetParams().ex(effectiveTtl)); } else { redisClient.set(key, value); } diff --git a/core/src/main/java/com/redis/vl/extensions/cache/EmbeddingsCache.java b/core/src/main/java/com/redis/vl/extensions/cache/EmbeddingsCache.java index 29df1a6..e5e0a89 100644 --- a/core/src/main/java/com/redis/vl/extensions/cache/EmbeddingsCache.java +++ b/core/src/main/java/com/redis/vl/extensions/cache/EmbeddingsCache.java @@ -167,7 +167,7 @@ public void mset(Map embeddings, String modelName) { byte[] valueBytes = ArrayUtils.floatArrayToBytes(entry.getValue()); if (ttl != null && ttl > 0) { - pipeline.setex(keyBytes, ttl, valueBytes); + pipeline.set(keyBytes, valueBytes, new redis.clients.jedis.params.SetParams().ex(ttl)); } else { pipeline.set(keyBytes, valueBytes); } diff --git a/core/src/main/java/com/redis/vl/extensions/messagehistory/MessageHistory.java b/core/src/main/java/com/redis/vl/extensions/messagehistory/MessageHistory.java index 22a2e36..a88750e 100644 --- a/core/src/main/java/com/redis/vl/extensions/messagehistory/MessageHistory.java +++ b/core/src/main/java/com/redis/vl/extensions/messagehistory/MessageHistory.java @@ -6,6 +6,7 @@ import com.redis.vl.query.Filter; import com.redis.vl.query.FilterQuery; import com.redis.vl.schema.IndexSchema; +import com.redis.vl.utils.Utils; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.ArrayList; import java.util.HashMap; @@ -218,12 +219,18 @@ public void addMessages(List> messages, String sessionTag) { String effectiveSessionTag = (sessionTag != null) ? sessionTag : this.sessionTag; List> chatMessages = new ArrayList<>(); - for (Map message : messages) { + // Use a base timestamp and increment by a small offset per message to guarantee + // insertion order is preserved when sorting by timestamp. + double baseTimestamp = Utils.currentTimestamp(); + + for (int i = 0; i < messages.size(); i++) { + Map message = messages.get(i); ChatMessage.ChatMessageBuilder builder = ChatMessage.builder() .role(message.get(ROLE_FIELD_NAME)) .content(message.get(CONTENT_FIELD_NAME)) - .sessionTag(effectiveSessionTag); + .sessionTag(effectiveSessionTag) + .timestamp(baseTimestamp + (i * 0.000001)); // 1 microsecond offset per message if (message.containsKey(TOOL_FIELD_NAME)) { builder.toolCallId(message.get(TOOL_FIELD_NAME)); diff --git a/core/src/main/java/com/redis/vl/index/SearchIndex.java b/core/src/main/java/com/redis/vl/index/SearchIndex.java index 8046b18..48d6a02 100644 --- a/core/src/main/java/com/redis/vl/index/SearchIndex.java +++ b/core/src/main/java/com/redis/vl/index/SearchIndex.java @@ -22,10 +22,13 @@ import lombok.extern.slf4j.Slf4j; import redis.clients.jedis.RedisClient; import redis.clients.jedis.UnifiedJedis; +import redis.clients.jedis.search.Document; import redis.clients.jedis.search.FTCreateParams; import redis.clients.jedis.search.FTSearchParams; import redis.clients.jedis.search.IndexDataType; import redis.clients.jedis.search.SearchResult; +import redis.clients.jedis.search.hybrid.FTHybridParams; +import redis.clients.jedis.search.hybrid.HybridResult; import redis.clients.jedis.search.schemafields.SchemaField; /** @@ -1301,6 +1304,50 @@ public SearchResult search(String query, Map params, int offset, } } + /** + * Execute a TextQuery with full support for return fields, scorer, sorting, and numResults. + * + * @param tq The TextQuery to execute + * @return Search results + */ + private SearchResult searchTextQuery(TextQuery tq) { + if (!exists()) { + throw new RedisVLException("Index " + getName() + " does not exist"); + } + + UnifiedJedis jedis = getUnifiedJedis(); + try { + redis.clients.jedis.search.FTSearchParams searchParams = + new redis.clients.jedis.search.FTSearchParams(); + + searchParams.dialect(2); + searchParams.limit(0, tq.getNumResults() != null ? tq.getNumResults() : DEFAULT_NUM_RESULTS); + + // Set return fields if specified + if (tq.getReturnFields() != null && !tq.getReturnFields().isEmpty()) { + searchParams.returnFields(tq.getReturnFields().toArray(new String[0])); + } + + // Set scorer if specified + if (tq.getScorer() != null && !tq.getScorer().isEmpty()) { + searchParams.scorer(tq.getScorer()); + } + + // Set sorting if specified + if (tq.getSortBy() != null && !tq.getSortBy().isEmpty()) { + redis.clients.jedis.args.SortingOrder order = + tq.isSortDescending() + ? redis.clients.jedis.args.SortingOrder.DESC + : redis.clients.jedis.args.SortingOrder.ASC; + searchParams.sortBy(tq.getSortBy(), order); + } + + return jedis.ftSearch(schema.getName(), tq.toQueryString(), searchParams); + } catch (Exception e) { + throw new RuntimeException("Failed to execute text query: " + e.getMessage(), e); + } + } + /** * Search with sorting and/or inOrder support. * @@ -1502,7 +1549,7 @@ public List> query(Object query) { SearchResult result = search(fq.build()); return processSearchResult(result); } else if (query instanceof TextQuery tq) { - SearchResult result = search(tq.toString()); + SearchResult result = searchTextQuery(tq); return processSearchResult(result); } else if (query instanceof FilterQuery fq) { // FilterQuery: metadata-only query without vector search @@ -1511,13 +1558,31 @@ public List> query(Object query) { UnifiedJedis jedis = getUnifiedJedis(); SearchResult result = jedis.ftSearch(schema.getName(), redisQuery); return processSearchResult(result); + } else if (query instanceof HybridQuery hq) { + // HybridQuery: native FT.HYBRID command (Redis 8.4+) + // Falls back to AggregateHybridQuery (FT.AGGREGATE) if FT.HYBRID is not available + try { + FTHybridParams hybridParams = hq.buildFTHybridParams(); + UnifiedJedis jedis = getUnifiedJedis(); + HybridResult result = jedis.ftHybrid(schema.getName(), hybridParams); + return processHybridResult(result); + } catch (Exception e) { + // Fall back to AggregateHybridQuery (FT.AGGREGATE) when FT.HYBRID fails. + // This handles: unknown command (Redis < 8.4), unsupported parameters + // (e.g., YIELD_SCORE_AS on older versions), and other server-side errors. + log.warn( + "FT.HYBRID failed, falling back to AggregateHybridQuery (FT.AGGREGATE): {}", + e.getMessage()); + AggregateHybridQuery fallback = hq.toAggregateHybridQuery(); + return query(fallback); + } } else if (query instanceof AggregationQuery aq) { - // AggregationQuery: HybridQuery and other aggregation-based queries - // Python: HybridQuery (redisvl/query/aggregate.py:23) + // AggregationQuery: AggregateHybridQuery and other aggregation-based queries + // Python: AggregateHybridQuery (redisvl/query/aggregate.py:23) redis.clients.jedis.search.aggr.AggregationBuilder aggregation = aq.buildRedisAggregation(); UnifiedJedis jedis = getUnifiedJedis(); - // Add parameters if present (e.g., vector parameter for HybridQuery) + // Add parameters if present (e.g., vector parameter for AggregateHybridQuery) Map params = aq.getParams(); if (params != null && !params.isEmpty()) { aggregation.params(params); @@ -1610,6 +1675,57 @@ private List> processAggregationResult( return processed; } + /** + * Process HybridResult from FT.HYBRID into List of Maps. + * + *

Converts Redis hybrid search results into a list of maps, where each map represents a + * document from the hybrid search result. + * + * @param result the HybridResult from Redis FT.HYBRID command + * @return list of maps containing hybrid search results + */ + private List> processHybridResult(HybridResult result) { + List> processed = new ArrayList<>(); + if (result != null && result.getDocuments() != null) { + for (Document doc : result.getDocuments()) { + Map docMap = new HashMap<>(); + docMap.put("id", doc.getId()); + if (doc.getScore() != null) { + docMap.put("score", doc.getScore()); + } + + if (getStorageType() == IndexSchema.StorageType.JSON) { + Object jsonField = doc.get("$"); + if (jsonField instanceof String) { + try { + @SuppressWarnings("unchecked") + Map parsedDoc = jsonMapper.readValue((String) jsonField, Map.class); + for (Map.Entry entry : parsedDoc.entrySet()) { + docMap.put("$." + entry.getKey(), entry.getValue()); + } + } catch (Exception e) { + log.warn("Failed to parse JSON document in hybrid result", e); + for (Map.Entry entry : doc.getProperties()) { + docMap.put(entry.getKey(), entry.getValue()); + } + } + } else { + for (Map.Entry entry : doc.getProperties()) { + docMap.put(entry.getKey(), entry.getValue()); + } + } + } else { + for (Map.Entry entry : doc.getProperties()) { + docMap.put(entry.getKey(), entry.getValue()); + } + } + + processed.add(docMap); + } + } + return processed; + } + /** * Execute multiple search queries in batch * diff --git a/core/src/main/java/com/redis/vl/query/AggregateHybridQuery.java b/core/src/main/java/com/redis/vl/query/AggregateHybridQuery.java index 2c4588c..83bfecc 100644 --- a/core/src/main/java/com/redis/vl/query/AggregateHybridQuery.java +++ b/core/src/main/java/com/redis/vl/query/AggregateHybridQuery.java @@ -1,17 +1,37 @@ package com.redis.vl.query; -import java.util.Set; +import com.redis.vl.utils.ArrayUtils; +import com.redis.vl.utils.FullTextQueryHelper; +import java.util.*; +import redis.clients.jedis.search.aggr.AggregationBuilder; +import redis.clients.jedis.search.aggr.SortedField; /** - * AggregateHybridQuery combines text and vector search in Redis using aggregation. + * AggregateHybridQuery combines text and vector search in Redis using FT.AGGREGATE. * - *

This is the primary name for hybrid queries in RedisVL, matching the Python implementation. It - * is a type alias for {@link HybridQuery}. - * - *

Ported from Python: redisvl/query/aggregate.py:57-315 (AggregateHybridQuery class) + *

Ported from Python: redisvl/query/aggregate.py:57-329 (AggregateHybridQuery class) * *

It allows you to perform a hybrid search using both text and vector similarity. It scores - * documents based on a weighted combination of text and vector similarity. + * documents based on a weighted combination of text and vector similarity using the formula: + * + *

+ * hybrid_score = (1 - alpha) * text_score + alpha * vector_similarity
+ * 
+ * + *

Where {@code text_score} is the BM25 score from the text search and {@code vector_similarity} + * is the normalized cosine similarity from the vector search. + * + *

Redis Version Requirements: This query uses the ADDSCORES option in + * FT.AGGREGATE to expose the internal text search score (@__score). This feature requires + * Redis 7.4.0 or later. On older Redis versions, the query will fail. + * + *

Note on Runtime Parameters: AggregateHybridQuery uses Redis FT.AGGREGATE for + * aggregation-based hybrid search. As of Redis Stack 7.2+, runtime parameters (efRuntime, epsilon, + * etc.) are NOT supported in FT.AGGREGATE queries. If you need runtime parameter support, use + * {@link VectorQuery} or {@link VectorRangeQuery} instead. + * + *

For native FT.HYBRID support with built-in score fusion (RRF, LINEAR), use {@link HybridQuery} + * instead. This requires Redis 8.4+. * *

Python equivalent: * @@ -36,7 +56,7 @@ *

Java equivalent: * *

- * HybridQuery query = AggregateHybridQuery.builder()
+ * AggregateHybridQuery query = AggregateHybridQuery.builder()
  *     .text("example text")
  *     .textFieldName("text_field")
  *     .vector(new float[]{0.1f, 0.2f, 0.3f})
@@ -47,37 +67,268 @@
  *     .dtype("float32")
  *     .numResults(10)
  *     .returnFields(List.of("field1", "field2"))
- *     .stopwords(HybridQuery.loadDefaultStopwords("english"))
+ *     .stopwords(FullTextQueryHelper.loadDefaultStopwords("english"))
  *     .dialect(2)
  *     .build();
  * List<Map<String, Object>> results = index.query(query);
  * 
* + *

This class is final to prevent finalizer attacks, as it throws exceptions in constructors for + * input validation (SEI CERT OBJ11-J). + * * @since 0.1.0 */ -public final class AggregateHybridQuery { +public final class AggregateHybridQuery extends AggregationQuery { - // Private constructor to prevent instantiation - private AggregateHybridQuery() { - throw new UnsupportedOperationException("AggregateHybridQuery is a type alias for HybridQuery"); + private static final String DISTANCE_ID = "vector_distance"; + private static final String VECTOR_PARAM = "vector"; + + private final String text; + private final String textFieldName; + private final float[] vector; + private final String vectorFieldName; + private final String textScorer; + private final Object filterExpression; + private final float alpha; + private final String dtype; + private final int numResults; + private final List returnFields; + private final Set stopwords; + private final int dialect; + + private AggregateHybridQuery(AggregateHybridQueryBuilder builder) { + this.text = builder.text; + this.textFieldName = builder.textFieldName; + this.vector = builder.vector != null ? builder.vector.clone() : null; + this.vectorFieldName = builder.vectorFieldName; + this.textScorer = builder.textScorer; + this.filterExpression = builder.filterExpression; + this.alpha = builder.alpha; + this.dtype = builder.dtype; + this.numResults = builder.numResults; + this.returnFields = + builder.returnFields != null ? List.copyOf(builder.returnFields) : List.of(); + this.stopwords = builder.stopwords != null ? Set.copyOf(builder.stopwords) : Set.of(); + this.dialect = builder.dialect; + + if (this.text == null || this.text.trim().isEmpty()) { + throw new IllegalArgumentException("text string cannot be empty"); + } + + String tokenized = FullTextQueryHelper.tokenizeAndEscapeQuery(this.text, this.stopwords); + if (tokenized.isEmpty()) { + throw new IllegalArgumentException("text string cannot be empty after removing stopwords"); + } } /** - * Create a new builder for AggregateHybridQuery (delegates to HybridQuery.builder()). + * Create a new builder for AggregateHybridQuery. * - * @return A new HybridQuery.HybridQueryBuilder instance + * @return A new AggregateHybridQueryBuilder instance */ - public static HybridQuery.HybridQueryBuilder builder() { - return HybridQuery.builder(); + public static AggregateHybridQueryBuilder builder() { + return new AggregateHybridQueryBuilder(); } /** - * Load default stopwords for a given language (delegates to HybridQuery.loadDefaultStopwords()). + * Load default stopwords for a given language. * * @param language the language (e.g., "english", "german") * @return set of stopwords */ public static Set loadDefaultStopwords(String language) { - return HybridQuery.loadDefaultStopwords(language); + return FullTextQueryHelper.loadDefaultStopwords(language); + } + + public String getText() { + return text; + } + + public String getTextFieldName() { + return textFieldName; + } + + public float[] getVector() { + return vector != null ? vector.clone() : null; + } + + public String getVectorFieldName() { + return vectorFieldName; + } + + public String getTextScorer() { + return textScorer; + } + + public Object getFilterExpression() { + return filterExpression; + } + + public float getAlpha() { + return alpha; + } + + public String getDtype() { + return dtype; + } + + public int getNumResults() { + return numResults; + } + + public List getReturnFields() { + return Collections.unmodifiableList(returnFields); + } + + public Set getStopwords() { + return Collections.unmodifiableSet(stopwords); + } + + public int getDialect() { + return dialect; + } + + /** Builder for creating AggregateHybridQuery instances with fluent API. */ + public static class AggregateHybridQueryBuilder { + private String text; + private String textFieldName; + private float[] vector; + private String vectorFieldName; + private String textScorer = "BM25STD"; + private Object filterExpression; + private float alpha = 0.7f; + private String dtype = "float32"; + private int numResults = 10; + private List returnFields = List.of(); + private Set stopwords = FullTextQueryHelper.loadDefaultStopwords("english"); + private int dialect = 2; + + AggregateHybridQueryBuilder() {} + + public AggregateHybridQueryBuilder text(String text) { + this.text = text; + return this; + } + + public AggregateHybridQueryBuilder textFieldName(String textFieldName) { + this.textFieldName = textFieldName; + return this; + } + + public AggregateHybridQueryBuilder vector(float[] vector) { + this.vector = vector != null ? vector.clone() : null; + return this; + } + + public AggregateHybridQueryBuilder vectorFieldName(String vectorFieldName) { + this.vectorFieldName = vectorFieldName; + return this; + } + + public AggregateHybridQueryBuilder textScorer(String textScorer) { + this.textScorer = textScorer; + return this; + } + + public AggregateHybridQueryBuilder filterExpression(Filter filterExpression) { + this.filterExpression = filterExpression; + return this; + } + + public AggregateHybridQueryBuilder filterExpression(String filterExpression) { + this.filterExpression = filterExpression; + return this; + } + + public AggregateHybridQueryBuilder alpha(float alpha) { + this.alpha = alpha; + return this; + } + + public AggregateHybridQueryBuilder dtype(String dtype) { + this.dtype = dtype; + return this; + } + + public AggregateHybridQueryBuilder numResults(int numResults) { + this.numResults = numResults; + return this; + } + + public AggregateHybridQueryBuilder returnFields(List returnFields) { + this.returnFields = returnFields != null ? List.copyOf(returnFields) : List.of(); + return this; + } + + public AggregateHybridQueryBuilder stopwords(Set stopwords) { + this.stopwords = stopwords != null ? Set.copyOf(stopwords) : Set.of(); + return this; + } + + public AggregateHybridQueryBuilder dialect(int dialect) { + this.dialect = dialect; + return this; + } + + public AggregateHybridQuery build() { + return new AggregateHybridQuery(this); + } + } + + @Override + public String buildQueryString() { + String filterStr = null; + if (filterExpression instanceof Filter) { + filterStr = ((Filter) filterExpression).build(); + } else if (filterExpression instanceof String) { + filterStr = (String) filterExpression; + } + + String knnQuery = + String.format( + "KNN %d @%s $%s AS %s", numResults, vectorFieldName, VECTOR_PARAM, DISTANCE_ID); + + String textQuery = + String.format( + "(~@%s:(%s)", + textFieldName, FullTextQueryHelper.tokenizeAndEscapeQuery(text, stopwords)); + + if (filterStr != null && !filterStr.equals("*")) { + textQuery += " AND " + filterStr; + } + + return String.format("%s)=>[%s]", textQuery, knnQuery); + } + + @Override + public AggregationBuilder buildRedisAggregation() { + String queryString = buildQueryString(); + AggregationBuilder aggregation = new AggregationBuilder(queryString); + + aggregation.dialect(dialect); + aggregation.addScores(); + + aggregation.apply("(2 - @" + DISTANCE_ID + ")/2", "vector_similarity"); + aggregation.apply("@__score", "text_score"); + + String hybridScoreFormula = + String.format("%f*@text_score + %f*@vector_similarity", (1 - alpha), alpha); + aggregation.apply(hybridScoreFormula, "hybrid_score"); + + aggregation.sortBy(numResults, SortedField.desc("@hybrid_score")); + + if (!returnFields.isEmpty()) { + aggregation.load(returnFields.toArray(String[]::new)); + } + + return aggregation; + } + + @Override + public Map getParams() { + byte[] vectorBytes = ArrayUtils.floatArrayToBytes(vector); + Map params = new HashMap<>(); + params.put(VECTOR_PARAM, vectorBytes); + return params; } } diff --git a/core/src/main/java/com/redis/vl/query/HybridQuery.java b/core/src/main/java/com/redis/vl/query/HybridQuery.java index c086671..7d61b76 100644 --- a/core/src/main/java/com/redis/vl/query/HybridQuery.java +++ b/core/src/main/java/com/redis/vl/query/HybridQuery.java @@ -1,39 +1,34 @@ package com.redis.vl.query; import com.redis.vl.utils.ArrayUtils; -import com.redis.vl.utils.TokenEscaper; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; +import com.redis.vl.utils.FullTextQueryHelper; import java.util.*; -import java.util.stream.Collectors; -import redis.clients.jedis.search.aggr.AggregationBuilder; -import redis.clients.jedis.search.aggr.SortedField; +import redis.clients.jedis.search.Combiner; +import redis.clients.jedis.search.Combiners; +import redis.clients.jedis.search.Limit; +import redis.clients.jedis.search.Scorer; +import redis.clients.jedis.search.Scorers; +import redis.clients.jedis.search.hybrid.FTHybridParams; +import redis.clients.jedis.search.hybrid.FTHybridPostProcessingParams; +import redis.clients.jedis.search.hybrid.FTHybridSearchParams; +import redis.clients.jedis.search.hybrid.FTHybridVectorParams; /** - * HybridQuery combines text and vector search in Redis using aggregation. + * HybridQuery combines text and vector search using the native Redis FT.HYBRID command. * - *

Ported from Python: redisvl/query/aggregate.py:57-329 (AggregateHybridQuery class) + *

Ported from Python: redisvl/query/hybrid.py (HybridQuery class) * - *

It allows you to perform a hybrid search using both text and vector similarity. It scores - * documents based on a weighted combination of text and vector similarity using the formula: + *

This query uses the native FT.HYBRID command available in Redis 8.4+ which provides built-in + * score fusion via RRF (Reciprocal Rank Fusion) or LINEAR combination methods. This is the + * recommended approach for hybrid search on Redis 8.4+. * - *

- * hybrid_score = (1 - alpha) * text_score + alpha * vector_similarity
- * 
- * - *

Where {@code text_score} is the BM25 score from the text search and {@code vector_similarity} - * is the normalized cosine similarity from the vector search. - * - *

Redis Version Requirements: This query uses the ADDSCORES option in - * FT.AGGREGATE to expose the internal text search score (@__score). This feature requires - * Redis 7.4.0 or later. On older Redis versions, the query will fail. + *

For older Redis versions (7.4+) that use FT.AGGREGATE with manual score fusion, see {@link + * AggregateHybridQuery}. * - *

Note on Runtime Parameters: HybridQuery uses Redis FT.AGGREGATE for - * aggregation-based hybrid search. As of Redis Stack 7.2+, runtime parameters (efRuntime, epsilon, - * etc.) are NOT supported in FT.AGGREGATE queries. If you need runtime parameter support, use - * {@link VectorQuery} or {@link VectorRangeQuery} instead. See Python PR #439 for details. + *

Alpha Semantics: In this class, {@code linearAlpha} represents the text + * weight in the LINEAR combiner (matching the Python convention). This differs from {@link + * AggregateHybridQuery} where {@code alpha} represents the vector weight. The different parameter + * names prevent confusion. * *

Python equivalent: * @@ -44,13 +39,8 @@ * vector=[0.1, 0.2, 0.3], * vector_field_name="vector_field", * text_scorer="BM25STD", - * filter_expression=None, - * alpha=0.7, - * dtype="float32", + * combination_method="RRF", * num_results=10, - * return_fields=["field1", "field2"], - * stopwords="english", - * dialect=2, * ) * results = index.query(query) * @@ -64,13 +54,8 @@ * .vector(new float[]{0.1f, 0.2f, 0.3f}) * .vectorFieldName("vector_field") * .textScorer("BM25STD") - * .filterExpression(null) - * .alpha(0.7f) - * .dtype("float32") + * .combinationMethod(CombinationMethod.RRF) * .numResults(10) - * .returnFields(List.of("field1", "field2")) - * .stopwords(loadDefaultStopwords("english")) - * .dialect(2) * .build(); * List<Map<String, Object>> results = index.query(query); * @@ -78,239 +63,350 @@ *

This class is final to prevent finalizer attacks, as it throws exceptions in constructors for * input validation (SEI CERT OBJ11-J). * - * @since 0.1.0 + * @since 0.2.0 */ -public final class HybridQuery extends AggregationQuery { +public final class HybridQuery { - private static final String DISTANCE_ID = "vector_distance"; - private static final String VECTOR_PARAM = "vector"; + /** Vector search method for FT.HYBRID VSIM clause. */ + public enum VectorSearchMethod { + KNN, + RANGE + } - /** The text to search for. */ - private final String text; + /** Score combination method for FT.HYBRID COMBINE clause. */ + public enum CombinationMethod { + RRF, + LINEAR + } - /** The text field name to search in. */ - private final String textFieldName; + private static final String DEFAULT_VECTOR_PARAM = "vector"; - /** The vector to perform vector similarity search. */ + private final String text; + private final String textFieldName; private final float[] vector; - - /** The vector field name to search in. */ private final String vectorFieldName; - - /** - * The text scorer to use. - * - *

Options: TFIDF, TFIDF.DOCNORM, BM25, DISMAX, DOCSCORE, BM25STD - * - *

Defaults to "BM25STD". - */ + private final String vectorParamName; private final String textScorer; - - /** - * The filter expression to use. - * - *

Can be either a Filter object or a String containing a raw Redis filter expression. - * - *

Defaults to null (no filter). - */ + private final String yieldTextScoreAs; + private final VectorSearchMethod vectorSearchMethod; + private final int knnEfRuntime; + private final Float rangeRadius; + private final float rangeEpsilon; + private final String yieldVsimScoreAs; private final Object filterExpression; - - /** - * The weight of the vector similarity. - * - *

Documents will be scored as: hybrid_score = (alpha) * vector_score + (1-alpha) * text_score - * - *

Defaults to 0.7. - */ - private final float alpha; - - /** - * The data type of the vector. - * - *

Defaults to "float32". - */ + private final CombinationMethod combinationMethod; + private final int rrfWindow; + private final int rrfConstant; + private final float linearAlpha; + private final String yieldCombinedScoreAs; private final String dtype; - - /** - * The number of results to return. - * - *

Defaults to 10. - */ private final int numResults; - - /** - * The fields to return. - * - *

Defaults to empty list (return all). - */ private final List returnFields; - - /** - * The stopwords to remove from the provided text prior to search. - * - *

If "english" "german" etc is provided then a default set of stopwords for that language will - * be used. If a set of strings is provided then those will be used as stopwords. - * - *

Defaults to English stopwords. - */ private final Set stopwords; - /** - * The Redis dialect version. - * - *

Defaults to 2. - */ - private final int dialect; - - // Private constructor for builder private HybridQuery(HybridQueryBuilder builder) { this.text = builder.text; this.textFieldName = builder.textFieldName; - // Defensive copy of vector array this.vector = builder.vector != null ? builder.vector.clone() : null; this.vectorFieldName = builder.vectorFieldName; + this.vectorParamName = builder.vectorParamName; this.textScorer = builder.textScorer; + this.yieldTextScoreAs = builder.yieldTextScoreAs; + this.vectorSearchMethod = builder.vectorSearchMethod; + this.knnEfRuntime = builder.knnEfRuntime; + this.rangeRadius = builder.rangeRadius; + this.rangeEpsilon = builder.rangeEpsilon; + this.yieldVsimScoreAs = builder.yieldVsimScoreAs; this.filterExpression = builder.filterExpression; - this.alpha = builder.alpha; + this.combinationMethod = builder.combinationMethod; + this.rrfWindow = builder.rrfWindow; + this.rrfConstant = builder.rrfConstant; + this.linearAlpha = builder.linearAlpha; + this.yieldCombinedScoreAs = builder.yieldCombinedScoreAs; this.dtype = builder.dtype; this.numResults = builder.numResults; - // Defensive copy to prevent external modification this.returnFields = builder.returnFields != null ? List.copyOf(builder.returnFields) : List.of(); this.stopwords = builder.stopwords != null ? Set.copyOf(builder.stopwords) : Set.of(); - this.dialect = builder.dialect; - // Validate text is not empty if (this.text == null || this.text.trim().isEmpty()) { throw new IllegalArgumentException("text string cannot be empty"); } - // Validate tokenized text is not empty after stopwords removal - String tokenized = tokenizeAndEscapeQuery(this.text); + String tokenized = FullTextQueryHelper.tokenizeAndEscapeQuery(this.text, this.stopwords); if (tokenized.isEmpty()) { throw new IllegalArgumentException("text string cannot be empty after removing stopwords"); } + + if (this.vectorSearchMethod == VectorSearchMethod.RANGE && this.rangeRadius == null) { + throw new IllegalArgumentException( + "rangeRadius is required when vectorSearchMethod is RANGE"); + } } - /** - * Create a new builder for HybridQuery. - * - * @return A new HybridQueryBuilder instance - */ public static HybridQueryBuilder builder() { return new HybridQueryBuilder(); } - // Getters with defensive copies for mutable fields + public static Set loadDefaultStopwords(String language) { + return FullTextQueryHelper.loadDefaultStopwords(language); + } + + // Getters - /** - * Get the text query string. - * - * @return The text to search for - */ public String getText() { return text; } - /** - * Get the text field name. - * - * @return The field name containing text data - */ public String getTextFieldName() { return textFieldName; } - /** - * Get a copy of the query vector. - * - * @return A defensive copy of the vector array - */ public float[] getVector() { return vector != null ? vector.clone() : null; } - /** - * Get the vector field name. - * - * @return The field name containing vector data - */ public String getVectorFieldName() { return vectorFieldName; } - /** - * Get the text scoring algorithm. - * - * @return The text scorer (e.g., "BM25", "TFIDF") - */ + public String getVectorParamName() { + return vectorParamName; + } + public String getTextScorer() { return textScorer; } - /** - * Get the filter expression. - * - *

Can be either a Filter object or a String containing a raw Redis filter expression. - * - * @return The filter to apply (Filter or String), or null if no filter - */ + public String getYieldTextScoreAs() { + return yieldTextScoreAs; + } + + public VectorSearchMethod getVectorSearchMethod() { + return vectorSearchMethod; + } + + public int getKnnEfRuntime() { + return knnEfRuntime; + } + + public Float getRangeRadius() { + return rangeRadius; + } + + public float getRangeEpsilon() { + return rangeEpsilon; + } + + public String getYieldVsimScoreAs() { + return yieldVsimScoreAs; + } + public Object getFilterExpression() { return filterExpression; } - /** - * Get the alpha weighting factor. - * - * @return Weight between 0.0 (vector only) and 1.0 (text only) - */ - public float getAlpha() { - return alpha; + public CombinationMethod getCombinationMethod() { + return combinationMethod; + } + + public int getRrfWindow() { + return rrfWindow; + } + + public int getRrfConstant() { + return rrfConstant; + } + + public float getLinearAlpha() { + return linearAlpha; + } + + public String getYieldCombinedScoreAs() { + return yieldCombinedScoreAs; } - /** - * Get the data type for vector storage. - * - * @return The data type (e.g., "float32", "float64") - */ public String getDtype() { return dtype; } + public int getNumResults() { + return numResults; + } + + public List getReturnFields() { + return Collections.unmodifiableList(returnFields); + } + + public Set getStopwords() { + return Collections.unmodifiableSet(stopwords); + } + /** - * Get the maximum number of results. + * Build the query string for the SEARCH clause. * - * @return The result limit + *

Tokenizes the text, removes stopwords, escapes special characters, and adds optional filter + * expression. + * + * @return the query string for FT.HYBRID SEARCH clause */ - public int getNumResults() { - return numResults; + public String buildQueryString() { + String tokenized = FullTextQueryHelper.tokenizeAndEscapeQuery(text, stopwords); + String textQuery = String.format("@%s:(%s)", textFieldName, tokenized); + + String filterStr = resolveFilterString(); + if (filterStr != null && !filterStr.equals("*")) { + return String.format("(%s %s)", textQuery, filterStr); + } + + return textQuery; } /** - * Get an unmodifiable view of the return fields. + * Build the {@link FTHybridParams} for the native FT.HYBRID command. * - * @return Unmodifiable list of return fields + * @return the configured FTHybridParams */ - public List getReturnFields() { - return Collections.unmodifiableList(returnFields); + public FTHybridParams buildFTHybridParams() { + // Build SEARCH clause + FTHybridSearchParams.Builder searchBuilder = + FTHybridSearchParams.builder().query(buildQueryString()).scorer(resolveScorer(textScorer)); + if (yieldTextScoreAs != null) { + searchBuilder.scoreAlias(yieldTextScoreAs); + } + + // Build VSIM clause + FTHybridVectorParams.Builder vsimBuilder = + FTHybridVectorParams.builder() + .field("@" + vectorFieldName) + .vector("$" + vectorParamName) + .method(buildVectorMethod()); + + String filterStr = resolveFilterString(); + if (filterStr != null && !filterStr.equals("*")) { + vsimBuilder.filter(filterStr); + } + if (yieldVsimScoreAs != null) { + vsimBuilder.scoreAlias(yieldVsimScoreAs); + } + + // Build COMBINE clause + Combiner combiner = buildCombiner(); + + // Build POST-PROCESSING clause + FTHybridPostProcessingParams.Builder postBuilder = FTHybridPostProcessingParams.builder(); + if (!returnFields.isEmpty()) { + postBuilder.load(returnFields.toArray(new String[0])); + } + postBuilder.limit(Limit.of(0, numResults)); + + // Assemble FTHybridParams + FTHybridParams.Builder paramsBuilder = + FTHybridParams.builder() + .search(searchBuilder.build()) + .vectorSearch(vsimBuilder.build()) + .combine(combiner) + .postProcessing(postBuilder.build()) + .param(vectorParamName, ArrayUtils.floatArrayToBytes(vector)); + + return paramsBuilder.build(); } /** - * Get an unmodifiable view of the stopwords. + * Convert this HybridQuery to an AggregateHybridQuery for fallback when FT.HYBRID is not + * available. + * + *

Maps the LINEAR alpha semantics: HybridQuery's linearAlpha (text weight) maps to + * AggregateHybridQuery's alpha as (1 - linearAlpha) since AggregateHybridQuery's alpha is the + * vector weight. * - * @return Unmodifiable set of stopwords + * @return an AggregateHybridQuery with equivalent parameters */ - public Set getStopwords() { - return Collections.unmodifiableSet(stopwords); + public AggregateHybridQuery toAggregateHybridQuery() { + // Map linearAlpha (text weight in FT.HYBRID) to alpha (vector weight in FT.AGGREGATE) + float aggregateAlpha = + (combinationMethod == CombinationMethod.LINEAR) ? (1.0f - linearAlpha) : 0.7f; + + AggregateHybridQuery.AggregateHybridQueryBuilder builder = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textFieldName) + .vector(vector) + .vectorFieldName(vectorFieldName) + .textScorer(textScorer) + .alpha(aggregateAlpha) + .dtype(dtype) + .numResults(numResults) + .returnFields(returnFields) + .stopwords(stopwords); + + if (filterExpression instanceof Filter f) { + builder.filterExpression(f); + } else if (filterExpression instanceof String s) { + builder.filterExpression(s); + } + + return builder.build(); } /** - * Get the query dialect version. + * Get the parameters map for the query (vector parameter). * - * @return The dialect version (default 2) + * @return parameter map with vector bytes */ - public int getDialect() { - return dialect; + public Map getParams() { + byte[] vectorBytes = ArrayUtils.floatArrayToBytes(vector); + Map params = new HashMap<>(); + params.put(vectorParamName, vectorBytes); + return params; + } + + private String resolveFilterString() { + if (filterExpression instanceof Filter) { + return ((Filter) filterExpression).build(); + } else if (filterExpression instanceof String) { + return (String) filterExpression; + } + return null; + } + + private FTHybridVectorParams.VectorMethod buildVectorMethod() { + if (vectorSearchMethod == VectorSearchMethod.RANGE) { + FTHybridVectorParams.Range range = + FTHybridVectorParams.Range.of(rangeRadius).epsilon(rangeEpsilon); + return range; + } + // Default: KNN + return FTHybridVectorParams.Knn.of(numResults).efRuntime(knnEfRuntime); + } + + private Combiner buildCombiner() { + if (combinationMethod == CombinationMethod.LINEAR) { + Combiners.Linear linear = Combiners.linear().alpha(linearAlpha).beta(1.0 - linearAlpha); + if (yieldCombinedScoreAs != null) { + return linear.as(yieldCombinedScoreAs); + } + return linear; + } + // Default: RRF + Combiners.RRF rrf = Combiners.rrf().window(rrfWindow).constant(rrfConstant); + if (yieldCombinedScoreAs != null) { + return rrf.as(yieldCombinedScoreAs); + } + return rrf; + } + + private static Scorer resolveScorer(String textScorer) { + return switch (textScorer.toUpperCase()) { + case "BM25STD" -> Scorers.bm25std(); + case "BM25STD.NORM" -> Scorers.bm25stdNorm(); + case "TFIDF" -> Scorers.tfidf(); + case "TFIDF.DOCNORM" -> Scorers.tfidfDocnorm(); + case "DISMAX" -> Scorers.dismax(); + case "DOCSCORE" -> Scorers.docscore(); + case "HAMMING" -> Scorers.hamming(); + default -> throw new IllegalArgumentException("Unknown scorer: " + textScorer); + }; } /** Builder for creating HybridQuery instances with fluent API. */ @@ -319,341 +415,144 @@ public static class HybridQueryBuilder { private String textFieldName; private float[] vector; private String vectorFieldName; + private String vectorParamName = DEFAULT_VECTOR_PARAM; private String textScorer = "BM25STD"; + private String yieldTextScoreAs; + private VectorSearchMethod vectorSearchMethod = VectorSearchMethod.KNN; + private int knnEfRuntime = 10; + private Float rangeRadius; + private float rangeEpsilon = 0.01f; + private String yieldVsimScoreAs; private Object filterExpression; - private float alpha = 0.7f; + private CombinationMethod combinationMethod = CombinationMethod.RRF; + private int rrfWindow = 20; + private int rrfConstant = 60; + private float linearAlpha = 0.3f; + private String yieldCombinedScoreAs; private String dtype = "float32"; private int numResults = 10; private List returnFields = List.of(); - private Set stopwords = loadDefaultStopwords("english"); - private int dialect = 2; + private Set stopwords = FullTextQueryHelper.loadDefaultStopwords("english"); - /** Package-private constructor used by builder() method. */ HybridQueryBuilder() {} - /** - * Set the text query string. - * - * @param text The text to search for - * @return This builder for chaining - */ public HybridQueryBuilder text(String text) { this.text = text; return this; } - /** - * Set the name of the text field to search. - * - * @param textFieldName The field name containing text data - * @return This builder for chaining - */ public HybridQueryBuilder textFieldName(String textFieldName) { this.textFieldName = textFieldName; return this; } - /** - * Set the query vector for similarity search. Makes a defensive copy. - * - * @param vector The embedding vector to search with - * @return This builder for chaining - */ public HybridQueryBuilder vector(float[] vector) { this.vector = vector != null ? vector.clone() : null; return this; } - /** - * Set the name of the vector field to search. - * - * @param vectorFieldName The field name containing vector data - * @return This builder for chaining - */ public HybridQueryBuilder vectorFieldName(String vectorFieldName) { this.vectorFieldName = vectorFieldName; return this; } - /** - * Set the scoring algorithm for text search. - * - * @param textScorer The text scorer (e.g., "BM25", "TFIDF") - * @return This builder for chaining - */ + public HybridQueryBuilder vectorParamName(String vectorParamName) { + this.vectorParamName = vectorParamName; + return this; + } + public HybridQueryBuilder textScorer(String textScorer) { this.textScorer = textScorer; return this; } - /** - * Set an additional filter expression for the query using a Filter object. - * - * @param filterExpression The filter to apply - * @return This builder for chaining - */ - public HybridQueryBuilder filterExpression(Filter filterExpression) { - this.filterExpression = filterExpression; + public HybridQueryBuilder yieldTextScoreAs(String yieldTextScoreAs) { + this.yieldTextScoreAs = yieldTextScoreAs; return this; } - /** - * Set an additional filter expression for the query using a raw Redis query string. - * - *

This allows passing custom Redis filter syntax directly, such as: - * "@category:{tech|science|engineering}" - * - *

Ported from Python PR #375 to support string filter expressions. - * - * @param filterExpression The raw Redis filter string - * @return This builder for chaining - */ - public HybridQueryBuilder filterExpression(String filterExpression) { - this.filterExpression = filterExpression; + public HybridQueryBuilder vectorSearchMethod(VectorSearchMethod vectorSearchMethod) { + this.vectorSearchMethod = vectorSearchMethod; return this; } - /** - * Set the weight for combining text and vector scores. - * - * @param alpha Weight between 0.0 (vector only) and 1.0 (text only), default 0.7 - * @return This builder for chaining - */ - public HybridQueryBuilder alpha(float alpha) { - this.alpha = alpha; + public HybridQueryBuilder knnEfRuntime(int knnEfRuntime) { + this.knnEfRuntime = knnEfRuntime; return this; } - /** - * Set the data type for vector storage. - * - * @param dtype The data type (e.g., "float32", "float64") - * @return This builder for chaining - */ - public HybridQueryBuilder dtype(String dtype) { - this.dtype = dtype; + public HybridQueryBuilder rangeRadius(float rangeRadius) { + this.rangeRadius = rangeRadius; return this; } - /** - * Set the maximum number of results to return. - * - * @param numResults The result limit - * @return This builder for chaining - */ - public HybridQueryBuilder numResults(int numResults) { - this.numResults = numResults; + public HybridQueryBuilder rangeEpsilon(float rangeEpsilon) { + this.rangeEpsilon = rangeEpsilon; return this; } - /** - * Set the fields to return in results. Makes a defensive copy. - * - * @param returnFields List of field names to return - * @return This builder for chaining - */ - public HybridQueryBuilder returnFields(List returnFields) { - this.returnFields = returnFields != null ? List.copyOf(returnFields) : List.of(); + public HybridQueryBuilder yieldVsimScoreAs(String yieldVsimScoreAs) { + this.yieldVsimScoreAs = yieldVsimScoreAs; return this; } - /** - * Set custom stopwords for text search. Makes a defensive copy. - * - * @param stopwords Set of words to exclude from text search - * @return This builder for chaining - */ - public HybridQueryBuilder stopwords(Set stopwords) { - this.stopwords = stopwords != null ? Set.copyOf(stopwords) : Set.of(); + public HybridQueryBuilder filterExpression(Filter filterExpression) { + this.filterExpression = filterExpression; return this; } - /** - * Set the query dialect version. - * - * @param dialect The dialect version (default 2) - * @return This builder for chaining - */ - public HybridQueryBuilder dialect(int dialect) { - this.dialect = dialect; + public HybridQueryBuilder filterExpression(String filterExpression) { + this.filterExpression = filterExpression; return this; } - /** - * Build the HybridQuery instance. - * - * @return The configured HybridQuery - * @throws IllegalArgumentException if required fields are missing or invalid - */ - public HybridQuery build() { - return new HybridQuery(this); + public HybridQueryBuilder combinationMethod(CombinationMethod combinationMethod) { + this.combinationMethod = combinationMethod; + return this; } - } - /** - * Load default stopwords for a given language. - * - *

Python uses nltk, we'll use a simple file-based approach. - * - * @param language the language (e.g., "english", "german") - * @return set of stopwords - */ - public static Set loadDefaultStopwords(String language) { - if (language == null || language.isEmpty()) { - return Set.of(); + public HybridQueryBuilder rrfWindow(int rrfWindow) { + this.rrfWindow = rrfWindow; + return this; } - // Try to load stopwords from resources - String resourcePath = "/stopwords/" + language + ".txt"; - java.io.InputStream inputStream = HybridQuery.class.getResourceAsStream(resourcePath); - - if (inputStream == null) { - // Fallback: common English stopwords - if ("english".equalsIgnoreCase(language)) { - return Set.of( - "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", - "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", - "these", "they", "this", "to", "was", "will", "with"); - } - return Set.of(); + public HybridQueryBuilder rrfConstant(int rrfConstant) { + this.rrfConstant = rrfConstant; + return this; } - try (BufferedReader reader = - new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) { - return reader - .lines() - .map(String::trim) - .filter(line -> !line.isEmpty() && !line.startsWith("#")) - .collect(Collectors.toSet()); - } catch (IOException e) { - throw new RuntimeException("Failed to load stopwords for language: " + language, e); + public HybridQueryBuilder linearAlpha(float linearAlpha) { + this.linearAlpha = linearAlpha; + return this; } - } - - /** - * Tokenize and escape the user query. - * - *

Ported from Python: _tokenize_and_escape_query (line 185-209) - * - * @param userQuery the user query to tokenize - * @return tokenized and escaped query string joined by OR - */ - public String tokenizeAndEscapeQuery(String userQuery) { - TokenEscaper escaper = new TokenEscaper(); - - // Tokenize: split on whitespace, clean up punctuation - List tokens = - Arrays.stream(userQuery.split("\\s+")) - .map( - token -> - escaper.escape( - token - .strip() - .replaceAll("^,+|,+$", "") - .replace("\u201c", "") - .replace("\u201d", "") - .toLowerCase())) - .filter(token -> !token.isEmpty() && !stopwords.contains(token)) - .collect(Collectors.toList()); - - // Join with OR (pipe) - return String.join(" | ", tokens); - } - /** - * Build the full query string for text search with optional filtering. - * - *

Ported from Python: _build_query_string (line 211-225) - * - * @return the query string - */ - @Override - public String buildQueryString() { - // Handle both Filter objects and String filter expressions (Python port: PR #375) - String filterStr = null; - if (filterExpression instanceof Filter) { - filterStr = ((Filter) filterExpression).build(); - } else if (filterExpression instanceof String) { - filterStr = (String) filterExpression; + public HybridQueryBuilder yieldCombinedScoreAs(String yieldCombinedScoreAs) { + this.yieldCombinedScoreAs = yieldCombinedScoreAs; + return this; } - // Base KNN query - String knnQuery = - String.format( - "KNN %d @%s $%s AS %s", numResults, vectorFieldName, VECTOR_PARAM, DISTANCE_ID); - - // Text query with fuzzy matching (~) - String textQuery = String.format("(~@%s:(%s)", textFieldName, tokenizeAndEscapeQuery(text)); - - // Add filter if present and not wildcard - if (filterStr != null && !filterStr.equals("*")) { - textQuery += " AND " + filterStr; + public HybridQueryBuilder dtype(String dtype) { + this.dtype = dtype; + return this; } - // Combine: (~@text_field:(tokens) [AND filter])=>[KNN ...] - return String.format("%s)=>[%s]", textQuery, knnQuery); - } - - /** - * Build the Redis AggregationBuilder for this hybrid query. - * - *

Ported from Python __init__ method (line 103-129) - * - * @return the AggregationBuilder - */ - @Override - public AggregationBuilder buildRedisAggregation() { - String queryString = buildQueryString(); - AggregationBuilder aggregation = new AggregationBuilder(queryString); - - // Set dialect - aggregation.dialect(dialect); - - // Enable ADDSCORES to expose @__score field containing the text search score - // (Python: self.add_scores() - line 169) - // Note: Requires Redis 7.4.0+. Uses default BM25 scorer. - aggregation.addScores(); - - // Apply vector similarity calculation (Python: line 170-172) - // vector_similarity = (2 - @vector_distance) / 2 - // Normalizes cosine distance [0,2] to similarity [0,1] - aggregation.apply("(2 - @" + DISTANCE_ID + ")/2", "vector_similarity"); - - // Apply text score from @__score (the BM25/text search score exposed by ADDSCORES) - // (Python: text_score="@__score" - line 171) - aggregation.apply("@__score", "text_score"); - - // Apply hybrid score calculation (Python: line 173) - // hybrid_score = (1-alpha) * text_score + alpha * vector_similarity - String hybridScoreFormula = - String.format("%f*@text_score + %f*@vector_similarity", (1 - alpha), alpha); - aggregation.apply(hybridScoreFormula, "hybrid_score"); - - // Sort by hybrid score descending (Python: line 174) - aggregation.sortBy(numResults, SortedField.desc("@hybrid_score")); - - // Load return fields (Python: line 176-177) - if (!returnFields.isEmpty()) { - aggregation.load(returnFields.toArray(String[]::new)); + public HybridQueryBuilder numResults(int numResults) { + this.numResults = numResults; + return this; } - return aggregation; - } + public HybridQueryBuilder returnFields(List returnFields) { + this.returnFields = returnFields != null ? List.copyOf(returnFields) : List.of(); + return this; + } - /** - * Get the parameters for the aggregation query. - * - *

Ported from Python: params property (line 132-145) - * - * @return parameter map with vector - */ - @Override - public Map getParams() { - // Convert vector to bytes (Python: array_to_buffer(self._vector, dtype=self._dtype)) - byte[] vectorBytes = ArrayUtils.floatArrayToBytes(vector); + public HybridQueryBuilder stopwords(Set stopwords) { + this.stopwords = stopwords != null ? Set.copyOf(stopwords) : Set.of(); + return this; + } - Map params = new HashMap<>(); - params.put(VECTOR_PARAM, vectorBytes); - return params; + public HybridQuery build() { + return new HybridQuery(this); + } } } diff --git a/core/src/main/java/com/redis/vl/query/TextQuery.java b/core/src/main/java/com/redis/vl/query/TextQuery.java index 0b33b49..b84edab 100644 --- a/core/src/main/java/com/redis/vl/query/TextQuery.java +++ b/core/src/main/java/com/redis/vl/query/TextQuery.java @@ -55,6 +55,9 @@ public class TextQuery { /** Fields that should not be decoded from binary format */ private final List skipDecodeFields; + /** Fields to return in the search results */ + private final List returnFields; + private TextQuery(Builder builder) { this.text = builder.text; this.scorer = builder.scorer; @@ -65,6 +68,8 @@ private TextQuery(Builder builder) { this.sortDescending = builder.sortDescending; this.skipDecodeFields = builder.skipDecodeFields != null ? List.copyOf(builder.skipDecodeFields) : List.of(); + this.returnFields = + builder.returnFields != null ? List.copyOf(builder.returnFields) : List.of(); } /** @@ -189,6 +194,7 @@ public static class Builder { private String sortBy; private boolean sortDescending = false; private List skipDecodeFields = List.of(); + private List returnFields = List.of(); /** * Set the text to search for. @@ -385,6 +391,28 @@ public Builder skipDecodeFields(String... fields) { return this; } + /** + * Set the fields to return in search results. + * + * @param returnFields List of field names to return + * @return This builder + */ + public Builder returnFields(List returnFields) { + this.returnFields = returnFields != null ? List.copyOf(returnFields) : List.of(); + return this; + } + + /** + * Set the fields to return in search results (varargs). + * + * @param fields Field names to return + * @return This builder + */ + public Builder returnFields(String... fields) { + this.returnFields = fields != null ? List.of(fields) : List.of(); + return this; + } + /** * Build the TextQuery instance. * diff --git a/core/src/main/java/com/redis/vl/test/vcr/VCRContext.java b/core/src/main/java/com/redis/vl/test/vcr/VCRContext.java index 8ab0d0c..091d014 100644 --- a/core/src/main/java/com/redis/vl/test/vcr/VCRContext.java +++ b/core/src/main/java/com/redis/vl/test/vcr/VCRContext.java @@ -167,7 +167,7 @@ private void startRedis() { } private String buildRedisCommand() { - StringBuilder cmd = new StringBuilder("redis-stack-server"); + StringBuilder cmd = new StringBuilder("redis-server"); cmd.append(" --dir /data"); cmd.append(" --dbfilename dump.rdb"); diff --git a/core/src/main/java/com/redis/vl/test/vcr/VCRExtension.java b/core/src/main/java/com/redis/vl/test/vcr/VCRExtension.java index f9e03e7..6108a9d 100644 --- a/core/src/main/java/com/redis/vl/test/vcr/VCRExtension.java +++ b/core/src/main/java/com/redis/vl/test/vcr/VCRExtension.java @@ -7,10 +7,13 @@ import org.junit.jupiter.api.extension.AfterEachCallback; import org.junit.jupiter.api.extension.BeforeAllCallback; import org.junit.jupiter.api.extension.BeforeEachCallback; +import org.junit.jupiter.api.extension.ConditionEvaluationResult; +import org.junit.jupiter.api.extension.ExecutionCondition; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.api.extension.TestWatcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.testcontainers.DockerClientFactory; /** * JUnit 5 extension that provides VCR (Video Cassette Recorder) functionality for recording and @@ -58,7 +61,8 @@ public class VCRExtension AfterAllCallback, BeforeEachCallback, AfterEachCallback, - TestWatcher { + TestWatcher, + ExecutionCondition { private static final Logger LOG = LoggerFactory.getLogger(VCRExtension.class); @@ -67,6 +71,18 @@ public class VCRExtension private VCRContext context; + @Override + public ConditionEvaluationResult evaluateExecutionCondition(ExtensionContext context) { + try { + if (DockerClientFactory.instance().isDockerAvailable()) { + return ConditionEvaluationResult.enabled("Docker is available"); + } + } catch (Exception e) { + // DockerClientFactory may throw if Docker is completely absent + } + return ConditionEvaluationResult.disabled("Docker is not available, skipping VCR tests"); + } + @Override public void beforeAll(ExtensionContext extensionContext) throws Exception { // Get VCR configuration from @VCRTest annotation diff --git a/core/src/main/java/com/redis/vl/test/vcr/VCRTest.java b/core/src/main/java/com/redis/vl/test/vcr/VCRTest.java index 3464109..90895a0 100644 --- a/core/src/main/java/com/redis/vl/test/vcr/VCRTest.java +++ b/core/src/main/java/com/redis/vl/test/vcr/VCRTest.java @@ -33,8 +33,7 @@ *

    *
  • {@link #mode()} - The VCR operating mode (default: PLAYBACK_OR_RECORD) *
  • {@link #dataDir()} - Directory for storing cassettes (default: src/test/resources/vcr-data) - *
  • {@link #redisImage()} - Docker image for Redis container (default: - * redis/redis-stack:latest) + *
  • {@link #redisImage()} - Docker image for Redis container (default: redis:latest) *
* * @see VCRMode @@ -72,10 +71,9 @@ /** * The Docker image to use for the Redis container. * - *

The image should be a Redis Stack image that includes RediSearch and RedisJSON modules for - * optimal functionality. + *

Redis 8.x+ includes search and JSON modules natively. * - * @return the Redis Docker image name (default: redis/redis-stack:latest) + * @return the Redis Docker image name (default: redis:latest) */ - String redisImage() default "redis/redis-stack:latest"; + String redisImage() default "redis:latest"; } diff --git a/core/src/main/java/com/redis/vl/utils/FullTextQueryHelper.java b/core/src/main/java/com/redis/vl/utils/FullTextQueryHelper.java new file mode 100644 index 0000000..bcd9be2 --- /dev/null +++ b/core/src/main/java/com/redis/vl/utils/FullTextQueryHelper.java @@ -0,0 +1,98 @@ +package com.redis.vl.utils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Shared utility methods for full-text query processing. + * + *

Ported from Python: redisvl/utils/full_text_query_helper.py + * + *

Provides tokenization, escaping, and stopword loading used by both {@code HybridQuery} (native + * FT.HYBRID) and {@code AggregateHybridQuery} (FT.AGGREGATE-based hybrid search). + * + * @since 0.2.0 + */ +public final class FullTextQueryHelper { + + private FullTextQueryHelper() { + throw new UnsupportedOperationException("Utility class"); + } + + /** + * Load default stopwords for a given language. + * + *

Python uses nltk, we use a simple file-based approach. + * + * @param language the language (e.g., "english", "german") + * @return set of stopwords + */ + public static Set loadDefaultStopwords(String language) { + if (language == null || language.isEmpty()) { + return Set.of(); + } + + // Try to load stopwords from resources + String resourcePath = "/stopwords/" + language + ".txt"; + java.io.InputStream inputStream = FullTextQueryHelper.class.getResourceAsStream(resourcePath); + + if (inputStream == null) { + // Fallback: common English stopwords + if ("english".equalsIgnoreCase(language)) { + return Set.of( + "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", + "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", + "these", "they", "this", "to", "was", "will", "with"); + } + return Set.of(); + } + + try (BufferedReader reader = + new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) { + return reader + .lines() + .map(String::trim) + .filter(line -> !line.isEmpty() && !line.startsWith("#")) + .collect(Collectors.toSet()); + } catch (IOException e) { + throw new RuntimeException("Failed to load stopwords for language: " + language, e); + } + } + + /** + * Tokenize and escape a user query, removing stopwords. + * + *

Ported from Python: _tokenize_and_escape_query + * + * @param userQuery the user query to tokenize + * @param stopwords the set of stopwords to filter out + * @return tokenized and escaped query string joined by OR (pipe) + */ + public static String tokenizeAndEscapeQuery(String userQuery, Set stopwords) { + TokenEscaper escaper = new TokenEscaper(); + + // Tokenize: split on whitespace, clean up punctuation + List tokens = + Arrays.stream(userQuery.split("\\s+")) + .map( + token -> + escaper.escape( + token + .strip() + .replaceAll("^,+|,+$", "") + .replace("\u201c", "") + .replace("\u201d", "") + .toLowerCase())) + .filter(token -> !token.isEmpty() && !stopwords.contains(token)) + .collect(Collectors.toList()); + + // Join with OR (pipe) + return String.join(" | ", tokens); + } +} diff --git a/core/src/test/java/com/redis/vl/BaseIntegrationTest.java b/core/src/test/java/com/redis/vl/BaseIntegrationTest.java index 5490786..1dcab17 100644 --- a/core/src/test/java/com/redis/vl/BaseIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/BaseIntegrationTest.java @@ -1,18 +1,21 @@ package com.redis.vl; +import com.redis.testcontainers.RedisStackContainer; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.testcontainers.containers.GenericContainer; -import org.testcontainers.utility.DockerImageName; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; import redis.clients.jedis.RedisClient; import redis.clients.jedis.UnifiedJedis; /** - * Base class for integration tests with Redis Stack container. + * Base class for integration tests with Redis container. * - *

Updated for Jedis 7.2+ API using RedisClient instead of deprecated JedisPool/JedisPooled. + *

Updated for Jedis 7.2+ API using RedisClient instead of deprecated JedisPool/JedisPooled. Uses + * Redis Stack image which includes all required modules (RediSearch, RedisJSON, etc.). */ +@Testcontainers(disabledWithoutDocker = true) public abstract class BaseIntegrationTest { @SuppressFBWarnings( @@ -25,23 +28,22 @@ public abstract class BaseIntegrationTest { justification = "Test infrastructure fields intentionally mutable for test lifecycle") protected static String redisUrl; - @SuppressFBWarnings( - value = {"MS_PKGPROTECT", "MS_CANNOT_BE_FINAL"}, - justification = "Test infrastructure fields intentionally mutable for test lifecycle") - protected static GenericContainer REDIS; + @Container + protected static final RedisStackContainer REDIS = + new RedisStackContainer(RedisStackContainer.DEFAULT_IMAGE_NAME.withTag("latest")) + .withReuse(true); private static RedisClient redisClient; @BeforeAll static void startContainer() { - // Start Redis Stack container - REDIS = - new GenericContainer<>(DockerImageName.parse("redis/redis-stack:latest")) - .withExposedPorts(6379); - REDIS.start(); + // Start container if not already started + if (!REDIS.isRunning()) { + REDIS.start(); + } String host = REDIS.getHost(); - int port = REDIS.getMappedPort(6379); + int port = REDIS.getRedisPort(); // Build Redis URL for testing URL-based constructors redisUrl = String.format("redis://%s:%d", host, port); @@ -60,9 +62,6 @@ static void stopContainer() { redisClient = null; unifiedJedis = null; } - if (REDIS != null) { - REDIS.stop(); - } } /** Get Redis URL for tests that need URL-based connections */ diff --git a/core/src/test/java/com/redis/vl/extensions/cache/NotebookSemanticCacheTest.java b/core/src/test/java/com/redis/vl/extensions/cache/NotebookSemanticCacheTest.java index fb8dcb6..50b6ecd 100644 --- a/core/src/test/java/com/redis/vl/extensions/cache/NotebookSemanticCacheTest.java +++ b/core/src/test/java/com/redis/vl/extensions/cache/NotebookSemanticCacheTest.java @@ -1,6 +1,7 @@ package com.redis.vl.extensions.cache; import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import com.redis.vl.BaseIntegrationTest; import com.redis.vl.utils.vectorize.BaseVectorizer; @@ -27,12 +28,12 @@ public class NotebookSemanticCacheTest extends BaseIntegrationTest { public void setUp() { // Cell 5: Create vectorizer using SentenceTransformersVectorizer + // Skip tests if model cannot be loaded (e.g., missing ONNX format, download failures) try { vectorizer = new SentenceTransformersVectorizer("redis/langcache-embed-v3"); } catch (Exception e) { System.err.println("Failed to initialize SentenceTransformersVectorizer: " + e.getMessage()); - e.printStackTrace(); - throw e; + assumeTrue(false, "SentenceTransformersVectorizer not available: " + e.getMessage()); } // Initialize SemanticCache using Builder pattern diff --git a/core/src/test/java/com/redis/vl/index/BatchOperationsIntegrationTest.java b/core/src/test/java/com/redis/vl/index/BatchOperationsIntegrationTest.java index 662a098..f00219d 100644 --- a/core/src/test/java/com/redis/vl/index/BatchOperationsIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/index/BatchOperationsIntegrationTest.java @@ -30,10 +30,10 @@ static void setup() { List> fields = Arrays.asList( - Map.of("name", "$.category", "type", "tag"), - Map.of("name", "$.price", "type", "numeric"), - Map.of("name", "$.description", "type", "text"), - Map.of("name", "$.rating", "type", "numeric")); + Map.of("name", "$.category", "type", "tag", "alias", "category"), + Map.of("name", "$.price", "type", "numeric", "alias", "price"), + Map.of("name", "$.description", "type", "text", "alias", "description"), + Map.of("name", "$.rating", "type", "numeric", "alias", "rating")); schemaDict.put("fields", fields); IndexSchema schema = IndexSchema.fromDict(schemaDict); @@ -86,15 +86,15 @@ private static void loadTestData() { @Order(1) @DisplayName("Test batch search with multiple batches") void testBatchSearchWithMultipleBatches() { - // Create multiple search queries (using JSONPath notation for JSON storage) + // Create multiple search queries (using alias names from schema) List queries = Arrays.asList( - "@\\$\\.category:{electronics}", - "@\\$\\.price:[10 50]", - "@\\$\\.description:(excellent)", - "@\\$\\.rating:[4 5]", - "@\\$\\.category:{books}", - "@\\$\\.price:[100 200]"); + "@category:{electronics}", + "@price:[10 50]", + "@description:(excellent)", + "@rating:[4 5]", + "@category:{books}", + "@price:[100 200]"); int batchSize = 2; // Process 2 queries at a time @@ -118,15 +118,15 @@ void testBatchSearchWithMultipleBatches() { @Order(2) @DisplayName("Test batch query with multiple batches") void testBatchQueryWithMultipleBatches() { - // Create multiple filter queries (using JSONPath for JSON storage) + // Create multiple filter queries (using alias names from schema) List queries = Arrays.asList( - Filter.tag("$.category", "electronics"), - Filter.numeric("$.price").between(10, 50), - Filter.text("$.description", "excellent"), - Filter.numeric("$.rating").gt(3), - Filter.tag("$.category", "books", "food"), - Filter.numeric("$.price").lt(30)); + Filter.tag("category", "electronics"), + Filter.numeric("price").between(10, 50), + Filter.text("description", "excellent"), + Filter.numeric("rating").gt(3), + Filter.tag("category", "books", "food"), + Filter.numeric("price").lt(30)); int batchSize = 3; // Process 3 queries at a time diff --git a/core/src/test/java/com/redis/vl/langchain4j/RedisVLChatMemoryStoreTest.java b/core/src/test/java/com/redis/vl/langchain4j/RedisVLChatMemoryStoreTest.java index 6705c81..fc7dc09 100644 --- a/core/src/test/java/com/redis/vl/langchain4j/RedisVLChatMemoryStoreTest.java +++ b/core/src/test/java/com/redis/vl/langchain4j/RedisVLChatMemoryStoreTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.*; +import com.redis.vl.BaseIntegrationTest; import dev.langchain4j.data.message.AiMessage; import dev.langchain4j.data.message.ChatMessage; import dev.langchain4j.data.message.UserMessage; @@ -10,8 +11,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import redis.clients.jedis.RedisClient; -import redis.clients.jedis.UnifiedJedis; /** * Test for RedisVLChatMemoryStore - LangChain4J integration. @@ -19,27 +18,22 @@ *

Tests the ChatMemoryStore implementation using Redis for conversation persistence. */ @Tag("integration") -class RedisVLChatMemoryStoreTest { +class RedisVLChatMemoryStoreTest extends BaseIntegrationTest { - private UnifiedJedis jedis; private RedisVLChatMemoryStore chatMemoryStore; private static final String SESSION_ID = "test-session-123"; @BeforeEach void setUp() { - jedis = RedisClient.create("localhost", 6379); - chatMemoryStore = new RedisVLChatMemoryStore(jedis, "test_chat:"); + chatMemoryStore = new RedisVLChatMemoryStore(unifiedJedis, "test_chat:"); } @AfterEach void tearDown() { // Clean up test data - if (chatMemoryStore != null && jedis != null) { + if (chatMemoryStore != null && unifiedJedis != null) { chatMemoryStore.deleteMessages(SESSION_ID); } - if (jedis != null) { - jedis.close(); - } } @Test diff --git a/core/src/test/java/com/redis/vl/langchain4j/RedisVLDocumentStoreTest.java b/core/src/test/java/com/redis/vl/langchain4j/RedisVLDocumentStoreTest.java index d63de63..05683d7 100644 --- a/core/src/test/java/com/redis/vl/langchain4j/RedisVLDocumentStoreTest.java +++ b/core/src/test/java/com/redis/vl/langchain4j/RedisVLDocumentStoreTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.*; +import com.redis.vl.BaseIntegrationTest; import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; @@ -10,8 +11,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import redis.clients.jedis.RedisClient; -import redis.clients.jedis.UnifiedJedis; /** * Test for RedisVLDocumentStore - stores raw binary content (images, PDFs) for multimodal RAG. @@ -19,27 +18,22 @@ *

Tests storing and retrieving documents with metadata for use with vision LLMs. */ @Tag("integration") -class RedisVLDocumentStoreTest { +class RedisVLDocumentStoreTest extends BaseIntegrationTest { - private UnifiedJedis jedis; private RedisVLDocumentStore documentStore; private static final String KEY_PREFIX = "test_docs:"; @BeforeEach void setUp() { - jedis = RedisClient.create("localhost", 6379); - documentStore = new RedisVLDocumentStore(jedis, KEY_PREFIX); + documentStore = new RedisVLDocumentStore(unifiedJedis, KEY_PREFIX); } @AfterEach void tearDown() { // Clean up test data - if (documentStore != null && jedis != null) { + if (documentStore != null && unifiedJedis != null) { // Delete all test keys - jedis.keys(KEY_PREFIX + "*").forEach(jedis::del); - } - if (jedis != null) { - jedis.close(); + unifiedJedis.keys(KEY_PREFIX + "*").forEach(unifiedJedis::del); } } @@ -203,7 +197,7 @@ void testStoreNullId() { void testBuilderPattern() { // Given RedisVLDocumentStore built = - RedisVLDocumentStore.builder().jedis(jedis).keyPrefix("custom_prefix:").build(); + RedisVLDocumentStore.builder().jedis(unifiedJedis).keyPrefix("custom_prefix:").build(); // When built.store("test", "content".getBytes(StandardCharsets.UTF_8), Map.of()); diff --git a/core/src/test/java/com/redis/vl/langchain4j/RedisVLEmbeddingStoreFilterTest.java b/core/src/test/java/com/redis/vl/langchain4j/RedisVLEmbeddingStoreFilterTest.java index 541049d..884d7ac 100644 --- a/core/src/test/java/com/redis/vl/langchain4j/RedisVLEmbeddingStoreFilterTest.java +++ b/core/src/test/java/com/redis/vl/langchain4j/RedisVLEmbeddingStoreFilterTest.java @@ -3,6 +3,7 @@ import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey; import static org.junit.jupiter.api.Assertions.*; +import com.redis.vl.BaseIntegrationTest; import com.redis.vl.index.SearchIndex; import com.redis.vl.schema.IndexSchema; import dev.langchain4j.data.document.Metadata; @@ -17,8 +18,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import redis.clients.jedis.RedisClient; -import redis.clients.jedis.UnifiedJedis; /** * Integration tests for RedisVLEmbeddingStore with filter support. @@ -27,9 +26,8 @@ * on tests from LangChain4J community PR #183. */ @Tag("integration") -class RedisVLEmbeddingStoreFilterTest { +class RedisVLEmbeddingStoreFilterTest extends BaseIntegrationTest { - private UnifiedJedis jedis; private SearchIndex searchIndex; private RedisVLEmbeddingStore embeddingStore; private static final String INDEX_NAME = "test_lc4j_filters"; @@ -37,8 +35,6 @@ class RedisVLEmbeddingStoreFilterTest { @BeforeEach void setUp() { - jedis = RedisClient.create("localhost", 6379); - // Create schema with indexed metadata fields for filtering Map schema = Map.of( @@ -78,7 +74,7 @@ void setUp() { "attrs", Map.of("as", "tags", "separator", "|")))); - searchIndex = new SearchIndex(IndexSchema.fromDict(schema), jedis); + searchIndex = new SearchIndex(IndexSchema.fromDict(schema), unifiedJedis); try { searchIndex.create(true); } catch (Exception e) { @@ -105,10 +101,6 @@ void tearDown() { // Ignore } } - - if (jedis != null) { - jedis.close(); - } } @Test diff --git a/core/src/test/java/com/redis/vl/langchain4j/RedisVLEmbeddingStoreTest.java b/core/src/test/java/com/redis/vl/langchain4j/RedisVLEmbeddingStoreTest.java index 2c1f66b..4790d74 100644 --- a/core/src/test/java/com/redis/vl/langchain4j/RedisVLEmbeddingStoreTest.java +++ b/core/src/test/java/com/redis/vl/langchain4j/RedisVLEmbeddingStoreTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.*; +import com.redis.vl.BaseIntegrationTest; import com.redis.vl.index.SearchIndex; import com.redis.vl.schema.IndexSchema; import dev.langchain4j.data.document.Metadata; @@ -14,8 +15,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import redis.clients.jedis.RedisClient; -import redis.clients.jedis.UnifiedJedis; /** * Test for RedisVLEmbeddingStore - LangChain4J integration. @@ -23,9 +22,8 @@ *

Tests the EmbeddingStore implementation using RedisVL as backend. */ @Tag("integration") -class RedisVLEmbeddingStoreTest { +class RedisVLEmbeddingStoreTest extends BaseIntegrationTest { - private UnifiedJedis jedis; private SearchIndex searchIndex; private RedisVLEmbeddingStore embeddingStore; private static final String INDEX_NAME = "test_lc4j_embeddings"; @@ -33,8 +31,6 @@ class RedisVLEmbeddingStoreTest { @BeforeEach void setUp() { - jedis = RedisClient.create("localhost", 6379); - // Create schema for embeddings Map schema = Map.of( @@ -53,7 +49,7 @@ void setUp() { Map.of("dims", VECTOR_DIM, "algorithm", "flat", "distance_metric", "cosine")))); // Create search index - searchIndex = new SearchIndex(IndexSchema.fromDict(schema), jedis); + searchIndex = new SearchIndex(IndexSchema.fromDict(schema), unifiedJedis); try { searchIndex.create(true); // Overwrite if exists } catch (Exception e) { @@ -83,10 +79,6 @@ void tearDown() { // Ignore cleanup errors } } - - if (jedis != null) { - jedis.close(); - } } @Test diff --git a/core/src/test/java/com/redis/vl/notebooks/AdvancedQueriesNotebookIntegrationTest.java b/core/src/test/java/com/redis/vl/notebooks/AdvancedQueriesNotebookIntegrationTest.java index 6463dad..d4a88c0 100644 --- a/core/src/test/java/com/redis/vl/notebooks/AdvancedQueriesNotebookIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/notebooks/AdvancedQueriesNotebookIntegrationTest.java @@ -4,33 +4,26 @@ import com.redis.vl.BaseIntegrationTest; import com.redis.vl.index.SearchIndex; -import com.redis.vl.query.AggregateHybridQuery; -import com.redis.vl.query.Filter; -import com.redis.vl.query.HybridQuery; -import com.redis.vl.query.MultiVectorQuery; -import com.redis.vl.query.TextQuery; +import com.redis.vl.query.*; import com.redis.vl.schema.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import org.junit.jupiter.api.*; /** - * Port of Python notebook: docs/user_guide/11_advanced_queries.ipynb + * Integration test that mirrors notebooks/11_advanced_queries.ipynb exactly. * - *

Demonstrates advanced query types available in RedisVL: + *

Each test method corresponds to one or more code cells in the notebook. If a test fails here, + * the corresponding notebook cell will also fail. + * + *

Notebook sections: * *

    - *
  1. TextQuery: Full text search with advanced scoring algorithms (BM25, TFIDF) - *
  2. AggregateHybridQuery: Combines text and vector search for hybrid retrieval - *
  3. MultiVectorQuery: Search over multiple vector fields simultaneously + *
  4. TextQuery: Full text search with scoring, filters, weights, and returnFields + *
  5. HybridQuery (FT.HYBRID) and AggregateHybridQuery (FT.AGGREGATE): Hybrid text+vector search + *
  6. MultiVectorQuery: Multi-vector search over multiple embedding fields *
- * - *

Python reference: - * /Users/brian.sam-bodden/Code/redis/py/redis-vl-python/docs/user_guide/11_advanced_queries.ipynb */ @Tag("integration") @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @@ -40,10 +33,7 @@ public class AdvancedQueriesNotebookIntegrationTest extends BaseIntegrationTest private static final String INDEX_NAME = "advanced_queries"; private static final String PREFIX = "products"; - /** - * Helper method to convert float array to byte array for vector fields. Matches Python: - * np.array([...], dtype=np.float32).tobytes() - */ + /** Notebook cell: helper-methods */ private byte[] floatArrayToBytes(float[] vector) { ByteBuffer buffer = ByteBuffer.allocate(vector.length * 4).order(ByteOrder.LITTLE_ENDIAN); for (float value : vector) { @@ -52,9 +42,9 @@ private byte[] floatArrayToBytes(float[] vector) { return buffer.array(); } + /** Notebook cells: schema + load-data */ @BeforeEach void setUp() { - // Python schema from cells 4-5 IndexSchema schema = IndexSchema.builder() .name(INDEX_NAME) @@ -82,19 +72,17 @@ void setUp() { index = new SearchIndex(schema, unifiedJedis); index.create(true); - - // Load sample product data from Python cell 2 loadSampleData(); } + /** Notebook cell: sample-data — identical product data */ private void loadSampleData() { - // Python sample data from cell 2 List> products = Arrays.asList( createProduct( "prod_1", "comfortable running shoes for athletes", - "Engineered with a dual-layer EVA foam midsole and FlexWeave breathable mesh upper", + "Engineered with a dual-layer EVA foam midsole and FlexWeave breathable mesh upper, these running shoes deliver responsive cushioning for long-distance runs. The anatomical footbed adapts to your stride while the carbon rubber outsole provides superior traction on varied terrain.", "footwear", 89.99, 4.5, @@ -103,7 +91,7 @@ private void loadSampleData() { createProduct( "prod_2", "lightweight running jacket with water resistance", - "Stay protected with this ultralight 2.5-layer DWR-coated shell featuring laser-cut ventilation", + "Stay protected with this ultralight 2.5-layer DWR-coated shell featuring laser-cut ventilation zones and reflective piping for low-light visibility. Packs into its own chest pocket and weighs just 4.2 oz, making it ideal for unpredictable weather conditions.", "outerwear", 129.99, 4.8, @@ -112,7 +100,7 @@ private void loadSampleData() { createProduct( "prod_3", "professional tennis racket for competitive players", - "Competition-grade racket featuring a 98 sq in head size, 16x19 string pattern", + "Competition-grade racket featuring a 98 sq in head size, 16x19 string pattern, and aerospace-grade graphite frame that delivers explosive power with pinpoint control. Tournament-approved specs include 315g weight and 68 RA stiffness rating for advanced baseline play.", "equipment", 199.99, 4.9, @@ -121,7 +109,7 @@ private void loadSampleData() { createProduct( "prod_4", "yoga mat with extra cushioning for comfort", - "Premium 8mm thick TPE yoga mat with dual-texture surface", + "Premium 8mm thick TPE yoga mat with dual-texture surface - smooth side for hot yoga flow and textured side for maximum grip during balancing poses. Closed-cell technology prevents moisture absorption while alignment markers guide proper positioning in asanas.", "accessories", 39.99, 4.3, @@ -130,7 +118,7 @@ private void loadSampleData() { createProduct( "prod_5", "basketball shoes with excellent ankle support", - "High-top basketball sneakers with Zoom Air units in forefoot and heel", + "High-top basketball sneakers with Zoom Air units in forefoot and heel, reinforced lateral sidewalls for explosive cuts, and herringbone traction pattern optimized for hardwood courts. The internal bootie construction and extended ankle collar provide lockdown support during aggressive drives.", "footwear", 139.99, 4.7, @@ -139,14 +127,13 @@ private void loadSampleData() { createProduct( "prod_6", "swimming goggles with anti-fog coating", - "Low-profile competition goggles with curved polycarbonate lenses", + "Low-profile competition goggles with curved polycarbonate lenses offering 180-degree peripheral vision and UV protection. Hydrophobic anti-fog coating lasts 10x longer than standard treatments, while the split silicone strap and interchangeable nose bridges ensure a watertight, custom fit.", "accessories", 24.99, 4.4, new float[] {0.3f, 0.1f, 0.2f}, new float[] {0.2f, 0.8f})); - // Use SearchIndex.load() for proper data loading index.load(products, "product_id"); } @@ -178,278 +165,355 @@ void tearDown() { } } - /** - * ## 1. TextQuery: Full Text Search - * - *

Python cell 8: Basic text search for "running shoes" - */ + // ===== Section 1: TextQuery ===== + + /** Notebook cell: text-query-basic */ @Test @Order(1) - @DisplayName("Basic Text Search - Python cell 8") - void testBasicTextSearch() { - // Python: TextQuery(text="running shoes", text_field_name="brief_description", ...) - TextQuery query = + @DisplayName("TextQuery basic search with returnFields") + void testTextQueryBasic() { + TextQuery textQuery = TextQuery.builder() .text("running shoes") .textField("brief_description") + .returnFields(Arrays.asList("product_id", "brief_description", "category", "price")) .numResults(5) .build(); - List> results = index.query(query); + List> results = index.query(textQuery); assertThat(results).isNotEmpty(); - // Should find prod_1 (running shoes) and prod_2 (running jacket) assertThat(results).anyMatch(doc -> "prod_1".equals(doc.get("product_id"))); + // Verify returnFields works — results should contain only the requested fields (plus id) + for (Map result : results) { + assertThat(result).containsKey("product_id"); + assertThat(result).containsKey("brief_description"); + } } - /** - * ### Text Search with Different Scoring Algorithms - * - *

Python cells 10-11: Compare BM25STD and TFIDF scorers - */ + /** Notebook cell: text-query-bm25 */ @Test @Order(2) - @DisplayName("Text Search with Different Scoring Algorithms - Python cells 10-11") - void testTextSearchWithDifferentScorers() { - // Python cell 10: BM25 scoring + @DisplayName("TextQuery with BM25 scorer and returnFields") + void testTextQueryBM25() { TextQuery bm25Query = TextQuery.builder() .text("comfortable shoes") .textField("brief_description") - .scorer("BM25STD") // Python: text_scorer="BM25STD" + .scorer("BM25STD") + .returnFields(Arrays.asList("product_id", "brief_description", "price")) .numResults(3) .build(); List> bm25Results = index.query(bm25Query); assertThat(bm25Results).isNotEmpty(); + assertThat(bm25Results).anyMatch(doc -> "prod_1".equals(doc.get("product_id"))); + } - // Python cell 11: TFIDF scoring + /** Notebook cell: text-query-tfidf */ + @Test + @Order(3) + @DisplayName("TextQuery with TFIDF scorer and returnFields") + void testTextQueryTFIDF() { TextQuery tfidfQuery = TextQuery.builder() .text("comfortable shoes") .textField("brief_description") - .scorer("TFIDF") // Python: text_scorer="TFIDF" + .scorer("TFIDF") + .returnFields(Arrays.asList("product_id", "brief_description", "price")) .numResults(3) .build(); List> tfidfResults = index.query(tfidfQuery); assertThat(tfidfResults).isNotEmpty(); - - // Both scorers should return results, though potentially in different orders - assertThat(bm25Results).hasSizeGreaterThan(0); - assertThat(tfidfResults).hasSizeGreaterThan(0); } - /** - * ### Text Search with Filters - * - *

Python cells 13-14: Combine text search with tag and numeric filters - */ + /** Notebook cell: text-query-filter-tag */ @Test - @Order(3) - @DisplayName("Text Search with Filters - Python cells 13-14") - void testTextSearchWithFilters() { - // Python cell 13: Search for "shoes" only in footwear category - // filter_expression=Tag("category") == "footwear" - TextQuery categoryFilterQuery = + @Order(4) + @DisplayName("TextQuery with tag filter and returnFields") + void testTextQueryFilterTag() { + TextQuery filteredTextQuery = TextQuery.builder() .text("shoes") .textField("brief_description") .filterExpression(Filter.tag("category", "footwear")) + .returnFields(Arrays.asList("product_id", "brief_description", "category", "price")) .numResults(5) .build(); - List> categoryResults = index.query(categoryFilterQuery); + List> filteredResults = index.query(filteredTextQuery); - assertThat(categoryResults).isNotEmpty(); - // Verify all results are in footwear category - assertThat(categoryResults).allMatch(doc -> "footwear".equals(doc.get("category"))); + assertThat(filteredResults).isNotEmpty(); + assertThat(filteredResults).allMatch(doc -> "footwear".equals(doc.get("category"))); + } - // Python cell 14: Search for products under $100 - // filter_expression=Num("price") < 100 - TextQuery priceFilterQuery = + /** Notebook cell: text-query-filter-numeric */ + @Test + @Order(5) + @DisplayName("TextQuery with numeric filter and returnFields") + void testTextQueryFilterNumeric() { + TextQuery priceFilteredQuery = TextQuery.builder() .text("comfortable") .textField("brief_description") .filterExpression(Filter.numeric("price").lt(100)) + .returnFields(Arrays.asList("product_id", "brief_description", "price")) .numResults(5) .build(); - List> priceResults = index.query(priceFilterQuery); + List> priceResults = index.query(priceFilteredQuery); assertThat(priceResults).isNotEmpty(); - // Verify all results are under $100 assertThat(priceResults) .allMatch( doc -> { - Object priceObj = doc.get("price"); - double price = - priceObj instanceof Number - ? ((Number) priceObj).doubleValue() - : Double.parseDouble(priceObj.toString()); + double price = Double.parseDouble(doc.get("price").toString()); return price < 100; }); } - /** - * ### Text Search with Multiple Fields and Weights - * - *

Python cell 16: Prioritize brief_description (1.0) over full_description (0.5) - */ + /** Notebook cell: text-query-weighted */ @Test - @Order(4) - @DisplayName("Text Search with Field Weights - Python cell 16") - void testTextSearchWithWeights() { - // Python: text_field_name={"brief_description": 1.0, "full_description": 0.5} - Map fieldWeights = Map.of("brief_description", 1.0, "full_description", 0.5); + @Order(6) + @DisplayName("TextQuery with weighted fields and returnFields") + void testTextQueryWeighted() { + Map fieldWeights = new HashMap<>(); + fieldWeights.put("brief_description", 1.0); + fieldWeights.put("full_description", 0.5); TextQuery weightedQuery = - TextQuery.builder().text("shoes").textFieldWeights(fieldWeights).numResults(3).build(); - - List> results = index.query(weightedQuery); + TextQuery.builder() + .text("shoes") + .textFieldWeights(fieldWeights) + .returnFields(Arrays.asList("product_id", "brief_description")) + .numResults(3) + .build(); - assertThat(results).isNotEmpty(); - // Should prioritize matches in brief_description + List> weightedResults = index.query(weightedQuery); + assertThat(weightedResults).isNotEmpty(); } - /** - * ## 2. AggregateHybridQuery: Combining Text and Vector Search - * - *

Python cell 23: Basic hybrid query combining text and vector search - * - *

NOTE: Disabled due to pre-existing HybridQuery test failures (not introduced by this PR) - */ + // ===== Section 2: HybridQuery (FT.HYBRID) ===== + + /** Notebook cell: hybrid-query-basic */ @Test - @Order(5) - @DisplayName("Basic Aggregate Hybrid Query - Python cell 23") - void testBasicAggregateHybridQuery() { - // Python: AggregateHybridQuery(text="running shoes", text_field_name="brief_description", - // vector=[0.1, 0.2, 0.1], vector_field_name="text_embedding", ...) + @Order(7) + @DisplayName("HybridQuery basic with LINEAR combination") + void testHybridQueryBasic() { HybridQuery hybridQuery = + HybridQuery.builder() + .text("running shoes") + .textFieldName("brief_description") + .vector(new float[] {0.1f, 0.2f, 0.1f}) + .vectorFieldName("text_embedding") + .returnFields(Arrays.asList("product_id", "brief_description", "category", "price")) + .numResults(5) + .yieldTextScoreAs("text_score") + .yieldVsimScoreAs("vector_similarity") + .combinationMethod(HybridQuery.CombinationMethod.LINEAR) + .yieldCombinedScoreAs("hybrid_score") + .build(); + + List> hybridResults = index.query(hybridQuery); + + assertThat(hybridResults).isNotEmpty(); + assertThat(hybridResults).anyMatch(doc -> "prod_1".equals(doc.get("product_id"))); + } + + /** Notebook cell: rdih4aduzml — AggregateHybridQuery fallback */ + @Test + @Order(8) + @DisplayName("AggregateHybridQuery basic (FT.AGGREGATE)") + void testAggregateHybridQueryBasic() { + AggregateHybridQuery aggHybridQuery = AggregateHybridQuery.builder() .text("running shoes") .textFieldName("brief_description") .vector(new float[] {0.1f, 0.2f, 0.1f}) .vectorFieldName("text_embedding") + .returnFields(Arrays.asList("product_id", "brief_description", "category", "price")) .numResults(5) .build(); - List> results = index.query(hybridQuery); + List> aggResults = index.query(aggHybridQuery); - assertThat(results).isNotEmpty(); - // HybridQuery returns aggregation results with hybrid_score, text_score, vector_similarity - assertThat(results).allMatch(doc -> doc.containsKey("hybrid_score")); - assertThat(results).allMatch(doc -> doc.containsKey("text_score")); - assertThat(results).allMatch(doc -> doc.containsKey("vector_similarity")); + assertThat(aggResults).isNotEmpty(); + assertThat(aggResults).allMatch(doc -> doc.containsKey("hybrid_score")); + assertThat(aggResults).allMatch(doc -> doc.containsKey("text_score")); + assertThat(aggResults).allMatch(doc -> doc.containsKey("vector_similarity")); } - /** - * ### Adjusting the Alpha Parameter - * - *

Python cell 25: Emphasize vector search with alpha=0.9 (90% vector, 10% text) - */ + /** Notebook cell: hybrid-query-alpha-code — HybridQuery with linearAlpha */ @Test - @Order(6) - @DisplayName("Hybrid Query with Alpha Parameter - Python cell 25") - void testHybridQueryWithAlpha() { - // Python: alpha=0.9 (90% vector, 10% text) + @Order(9) + @DisplayName("HybridQuery with linearAlpha=0.1 (vector-heavy)") + void testHybridQueryLinearAlpha() { HybridQuery vectorHeavyQuery = - AggregateHybridQuery.builder() + HybridQuery.builder() .text("comfortable") .textFieldName("brief_description") .vector(new float[] {0.15f, 0.25f, 0.15f}) .vectorFieldName("text_embedding") - .alpha(0.9f) // 90% vector, 10% text + .combinationMethod(HybridQuery.CombinationMethod.LINEAR) + .linearAlpha(0.1f) // 10% text, 90% vector + .returnFields(Arrays.asList("product_id", "brief_description")) .numResults(3) + .yieldTextScoreAs("text_score") + .yieldVsimScoreAs("vector_similarity") + .yieldCombinedScoreAs("hybrid_score") .build(); List> results = index.query(vectorHeavyQuery); + assertThat(results).isNotEmpty(); + } + /** Notebook cell: euln4vqwvg — AggregateHybridQuery with alpha=0.9 */ + @Test + @Order(10) + @DisplayName("AggregateHybridQuery with alpha=0.9 (vector-heavy)") + void testAggregateHybridQueryAlpha() { + AggregateHybridQuery vectorHeavyAggQuery = + AggregateHybridQuery.builder() + .text("comfortable") + .textFieldName("brief_description") + .vector(new float[] {0.15f, 0.25f, 0.15f}) + .vectorFieldName("text_embedding") + .alpha(0.9f) // 90% vector, 10% text + .returnFields(Arrays.asList("product_id", "brief_description")) + .numResults(3) + .build(); + + List> results = index.query(vectorHeavyAggQuery); assertThat(results).isNotEmpty(); - // HybridQuery returns aggregation results with hybrid_score, text_score, vector_similarity assertThat(results).allMatch(doc -> doc.containsKey("hybrid_score")); - assertThat(results).allMatch(doc -> doc.containsKey("text_score")); - assertThat(results).allMatch(doc -> doc.containsKey("vector_similarity")); - // Results should prioritize vector similarity over text matching (alpha=0.9) } - /** - * ### Aggregate Hybrid Query with Filters - * - *

Python cell 27: Hybrid search with price filter - */ + /** Notebook cell: agr8k3jy1ip — HybridQuery with RRF combination */ @Test - @Order(7) - @DisplayName("Hybrid Query with Filters - Python cell 27") - void testHybridQueryWithFilters() { - // Python: filter_expression=Num("price") > 100 + @Order(11) + @DisplayName("HybridQuery with RRF combination method") + void testHybridQueryRRF() { + HybridQuery rrfQuery = + HybridQuery.builder() + .text("comfortable") + .textFieldName("brief_description") + .vector(new float[] {0.15f, 0.25f, 0.15f}) + .vectorFieldName("text_embedding") + .combinationMethod(HybridQuery.CombinationMethod.RRF) + .returnFields(Arrays.asList("product_id", "brief_description")) + .numResults(3) + .yieldTextScoreAs("text_score") + .yieldVsimScoreAs("vector_similarity") + .yieldCombinedScoreAs("hybrid_score") + .build(); + + List> rrfResults = index.query(rrfQuery); + assertThat(rrfResults).isNotEmpty(); + } + + /** Notebook cell: hybrid-query-filter-code — HybridQuery with filter */ + @Test + @Order(12) + @DisplayName("HybridQuery with price filter") + void testHybridQueryWithFilter() { HybridQuery filteredHybridQuery = - AggregateHybridQuery.builder() + HybridQuery.builder() .text("professional equipment") .textFieldName("brief_description") .vector(new float[] {0.9f, 0.1f, 0.05f}) .vectorFieldName("text_embedding") .filterExpression(Filter.numeric("price").gt(100)) + .returnFields(Arrays.asList("product_id", "brief_description", "category", "price")) .numResults(5) + .combinationMethod(HybridQuery.CombinationMethod.LINEAR) + .yieldTextScoreAs("text_score") + .yieldVsimScoreAs("vector_similarity") + .yieldCombinedScoreAs("hybrid_score") .build(); List> results = index.query(filteredHybridQuery); + assertThat(results).isNotEmpty(); + } + + /** Notebook cell: p2yl7z26pd — AggregateHybridQuery with filter */ + @Test + @Order(13) + @DisplayName("AggregateHybridQuery with price filter") + void testAggregateHybridQueryWithFilter() { + AggregateHybridQuery filteredAggHybridQuery = + AggregateHybridQuery.builder() + .text("professional equipment") + .textFieldName("brief_description") + .vector(new float[] {0.9f, 0.1f, 0.05f}) + .vectorFieldName("text_embedding") + .filterExpression(Filter.numeric("price").gt(100)) + .returnFields(Arrays.asList("product_id", "brief_description", "category", "price")) + .numResults(5) + .build(); + List> results = index.query(filteredAggHybridQuery); assertThat(results).isNotEmpty(); - // HybridQuery returns aggregation results with hybrid_score, text_score, vector_similarity assertThat(results).allMatch(doc -> doc.containsKey("hybrid_score")); - assertThat(results).allMatch(doc -> doc.containsKey("text_score")); - assertThat(results).allMatch(doc -> doc.containsKey("vector_similarity")); - // Filter ensures only products with price > $100 are included in the aggregation } - /** - * ### Using Different Text Scorers - * - *

Python cell 29: Hybrid query with TFIDF scorer - */ + /** Notebook cell: hybrid-query-scorer-code — HybridQuery with TFIDF scorer */ @Test - @Order(8) - @DisplayName("Hybrid Query with TFIDF Scorer - Python cell 29") - void testHybridQueryWithTFIDF() { - // Python: text_scorer="TFIDF" + @Order(14) + @DisplayName("HybridQuery with TFIDF scorer") + void testHybridQueryTFIDFScorer() { HybridQuery hybridTfidf = - AggregateHybridQuery.builder() + HybridQuery.builder() .text("shoes support") .textFieldName("brief_description") .vector(new float[] {0.12f, 0.18f, 0.12f}) .vectorFieldName("text_embedding") .textScorer("TFIDF") + .returnFields(Arrays.asList("product_id", "brief_description")) .numResults(3) + .combinationMethod(HybridQuery.CombinationMethod.LINEAR) + .yieldTextScoreAs("text_score") + .yieldVsimScoreAs("vector_similarity") + .yieldCombinedScoreAs("hybrid_score") .build(); List> results = index.query(hybridTfidf); + assertThat(results).isNotEmpty(); + } + /** Notebook cell: gerdav306fh — AggregateHybridQuery with TFIDF scorer */ + @Test + @Order(15) + @DisplayName("AggregateHybridQuery with TFIDF scorer") + void testAggregateHybridQueryTFIDFScorer() { + AggregateHybridQuery aggTfidf = + AggregateHybridQuery.builder() + .text("shoes support") + .textFieldName("brief_description") + .vector(new float[] {0.12f, 0.18f, 0.12f}) + .vectorFieldName("text_embedding") + .textScorer("TFIDF") + .returnFields(Arrays.asList("product_id", "brief_description")) + .numResults(3) + .build(); + + List> results = index.query(aggTfidf); assertThat(results).isNotEmpty(); - // HybridQuery returns aggregation results with hybrid_score, text_score, vector_similarity assertThat(results).allMatch(doc -> doc.containsKey("hybrid_score")); - assertThat(results).allMatch(doc -> doc.containsKey("text_score")); - assertThat(results).allMatch(doc -> doc.containsKey("vector_similarity")); - // Should use TFIDF for text scoring combined with vector similarity } - /** - * ## 3. MultiVectorQuery: Multi-Vector Search - * - *

Python cell 32: Search over multiple vector fields (text + image embeddings) - */ + // ===== Section 3: MultiVectorQuery ===== + + /** Notebook cell: multi-vector-query-basic */ @Test - @Order(9) - @DisplayName("Basic Multi-Vector Query - Python cell 32") - void testBasicMultiVectorQuery() { - // Python: - // text_vector = Vector(vector=[0.1, 0.2, 0.1], field_name="text_embedding", weight=0.7) - // image_vector = Vector(vector=[0.8, 0.1], field_name="image_embedding", weight=0.3) + @Order(16) + @DisplayName("MultiVectorQuery basic with two vector fields") + void testMultiVectorQueryBasic() { com.redis.vl.query.Vector textVector = com.redis.vl.query.Vector.builder() .vector(new float[] {0.1f, 0.2f, 0.1f}) .fieldName("text_embedding") .dtype("float32") - .weight(0.7) // 70% weight for text embedding + .weight(0.7) .build(); com.redis.vl.query.Vector imageVector = @@ -457,38 +521,35 @@ void testBasicMultiVectorQuery() { .vector(new float[] {0.8f, 0.1f}) .fieldName("image_embedding") .dtype("float32") - .weight(0.3) // 30% weight for image embedding + .weight(0.3) .build(); - MultiVectorQuery multiQuery = - MultiVectorQuery.builder().vectors(textVector, imageVector).numResults(5).build(); + MultiVectorQuery multiVectorQuery = + MultiVectorQuery.builder() + .vectors(textVector, imageVector) + .returnFields(Arrays.asList("product_id", "brief_description", "category")) + .numResults(5) + .build(); - List> results = index.query(multiQuery); + List> multiResults = index.query(multiVectorQuery); - assertThat(results).isNotEmpty(); - // MultiVectorQuery returns aggregation results with combined_score, score_0, score_1, etc. - assertThat(results).allMatch(doc -> doc.containsKey("combined_score")); - assertThat(results).allMatch(doc -> doc.containsKey("score_0")); // text_embedding score - assertThat(results).allMatch(doc -> doc.containsKey("score_1")); // image_embedding score - // Should return results ranked by combined score: 0.7 * text_score + 0.3 * image_score + assertThat(multiResults).isNotEmpty(); + assertThat(multiResults).allMatch(doc -> doc.containsKey("combined_score")); + assertThat(multiResults).allMatch(doc -> doc.containsKey("score_0")); + assertThat(multiResults).allMatch(doc -> doc.containsKey("score_1")); } - /** - * ### Adjusting Vector Weights - * - *

Python cell 34: Emphasize image similarity (80% image, 20% text) - */ + /** Notebook cell: multi-vector-query-weights */ @Test - @Order(10) - @DisplayName("Multi-Vector Query with Different Weights - Python cell 34") - void testMultiVectorQueryWithDifferentWeights() { - // Python: More emphasis on image similarity + @Order(17) + @DisplayName("MultiVectorQuery with emphasis on image similarity") + void testMultiVectorQueryImageHeavy() { com.redis.vl.query.Vector textVec = com.redis.vl.query.Vector.builder() .vector(new float[] {0.9f, 0.1f, 0.05f}) .fieldName("text_embedding") .dtype("float32") - .weight(0.2) // 20% weight + .weight(0.2) .build(); com.redis.vl.query.Vector imageVec = @@ -496,33 +557,27 @@ void testMultiVectorQueryWithDifferentWeights() { .vector(new float[] {0.1f, 0.9f}) .fieldName("image_embedding") .dtype("float32") - .weight(0.8) // 80% weight + .weight(0.8) .build(); MultiVectorQuery imageHeavyQuery = - MultiVectorQuery.builder().vectors(textVec, imageVec).numResults(3).build(); + MultiVectorQuery.builder() + .vectors(textVec, imageVec) + .returnFields(Arrays.asList("product_id", "brief_description", "category")) + .numResults(3) + .build(); List> results = index.query(imageHeavyQuery); - assertThat(results).isNotEmpty(); - // MultiVectorQuery returns aggregation results with combined_score, score_0, score_1, etc. assertThat(results).allMatch(doc -> doc.containsKey("combined_score")); - assertThat(results).allMatch(doc -> doc.containsKey("score_0")); // text_embedding score - assertThat(results).allMatch(doc -> doc.containsKey("score_1")); // image_embedding score - // Results prioritize image similarity (0.2 * text + 0.8 * image) } - /** - * ### Multi-Vector Query with Filters - * - *

Python cell 36: Combine multi-vector search with category filter - */ + /** Notebook cell: multi-vector-query-filter */ @Test - @Order(11) - @DisplayName("Multi-Vector Query with Filters - Python cell 36") - void testMultiVectorQueryWithFilters() { - // Python: filter_expression=Tag("category") == "footwear" - com.redis.vl.query.Vector textVec = + @Order(18) + @DisplayName("MultiVectorQuery with category filter") + void testMultiVectorQueryWithFilter() { + com.redis.vl.query.Vector textVecFilter = com.redis.vl.query.Vector.builder() .vector(new float[] {0.1f, 0.2f, 0.1f}) .fieldName("text_embedding") @@ -530,7 +585,7 @@ void testMultiVectorQueryWithFilters() { .weight(0.6) .build(); - com.redis.vl.query.Vector imageVec = + com.redis.vl.query.Vector imageVecFilter = com.redis.vl.query.Vector.builder() .vector(new float[] {0.8f, 0.1f}) .fieldName("image_embedding") @@ -540,38 +595,55 @@ void testMultiVectorQueryWithFilters() { MultiVectorQuery filteredMultiQuery = MultiVectorQuery.builder() - .vectors(textVec, imageVec) + .vectors(textVecFilter, imageVecFilter) .filterExpression(Filter.tag("category", "footwear")) + .returnFields(Arrays.asList("product_id", "brief_description", "category", "price")) .numResults(5) .build(); List> results = index.query(filteredMultiQuery); - assertThat(results).isNotEmpty(); - // MultiVectorQuery returns aggregation results with combined_score, score_0, score_1, etc. assertThat(results).allMatch(doc -> doc.containsKey("combined_score")); - assertThat(results).allMatch(doc -> doc.containsKey("score_0")); - assertThat(results).allMatch(doc -> doc.containsKey("score_1")); - // Filter ensures only footwear category products are included } - /** - * ## Comparing Query Types - * - *

Python cells 38-40: Side-by-side comparison of TextQuery and MultiVectorQuery - */ + // ===== Section 4: Comparing Query Types ===== + + /** Notebook cell: compare-queries */ @Test - @Order(12) - @DisplayName("Comparing Query Types - Python cells 38-40") + @Order(19) + @DisplayName("Compare all query types side by side") void testCompareQueryTypes() { - // Python cell 38: TextQuery - keyword-based search - TextQuery textQuery = - TextQuery.builder().text("shoes").textField("brief_description").numResults(3).build(); + // TextQuery + TextQuery textQ = + TextQuery.builder() + .text("shoes") + .textField("brief_description") + .returnFields(Arrays.asList("product_id", "brief_description")) + .numResults(3) + .build(); - List> textResults = index.query(textQuery); + List> textResults = index.query(textQ); assertThat(textResults).isNotEmpty(); - // Python cell 40: MultiVectorQuery - searches multiple vector fields + // HybridQuery (FT.HYBRID) + HybridQuery hybridQ = + HybridQuery.builder() + .text("shoes") + .textFieldName("brief_description") + .vector(new float[] {0.1f, 0.2f, 0.1f}) + .vectorFieldName("text_embedding") + .returnFields(Arrays.asList("product_id", "brief_description")) + .numResults(3) + .combinationMethod(HybridQuery.CombinationMethod.LINEAR) + .yieldTextScoreAs("text_score") + .yieldVsimScoreAs("vector_similarity") + .yieldCombinedScoreAs("hybrid_score") + .build(); + + List> hybridResults = index.query(hybridQ); + assertThat(hybridResults).isNotEmpty(); + + // MultiVectorQuery com.redis.vl.query.Vector mvText = com.redis.vl.query.Vector.builder() .vector(new float[] {0.1f, 0.2f, 0.1f}) @@ -588,14 +660,32 @@ void testCompareQueryTypes() { .weight(0.5) .build(); - MultiVectorQuery multiQuery = - MultiVectorQuery.builder().vectors(mvText, mvImage).numResults(3).build(); + MultiVectorQuery multiQ = + MultiVectorQuery.builder() + .vectors(mvText, mvImage) + .returnFields(Arrays.asList("product_id", "brief_description")) + .numResults(3) + .build(); - List> multiResults = index.query(multiQuery); + List> multiResults = index.query(multiQ); assertThat(multiResults).isNotEmpty(); - // All query types should return results + // All three query types should produce results assertThat(textResults).hasSizeGreaterThan(0); + assertThat(hybridResults).hasSizeGreaterThan(0); assertThat(multiResults).hasSizeGreaterThan(0); } + + // ===== Section 5: Cleanup ===== + + /** Notebook cell: cleanup-code — verified via tearDown */ + @Test + @Order(20) + @DisplayName("Cleanup - delete index and verify") + void testCleanup() { + assertThat(index.exists()).isTrue(); + index.delete(true); + assertThat(index.exists()).isFalse(); + index = null; // prevent tearDown from double-deleting + } } diff --git a/core/src/test/java/com/redis/vl/notebooks/MessageHistoryNotebookIntegrationTest.java b/core/src/test/java/com/redis/vl/notebooks/MessageHistoryNotebookIntegrationTest.java index 43412c4..ee3577d 100644 --- a/core/src/test/java/com/redis/vl/notebooks/MessageHistoryNotebookIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/notebooks/MessageHistoryNotebookIntegrationTest.java @@ -119,6 +119,7 @@ public void testStorePromptResponse() { } assertThat(context).hasSize(6); + // The last two messages are the stored prompt/response pair in order. assertThat(context.get(4)).containsEntry("role", "user").containsEntry("content", prompt); assertThat(context.get(5)).containsEntry("role", "llm").containsEntry("content", response); } @@ -202,7 +203,7 @@ public void testDropMessage() { "what is the smallest country in Europe?", "Monaco is the smallest country in Europe at 0.78 square miles."); - // Get the key of the incorrect message + // Get the key of the incorrect message (the most recent one is the llm response) List> rawContext = chatHistory.getRecent(1, false, true, null); assertThat(rawContext).hasSize(1); String badKey = (String) rawContext.get(0).get("entry_id"); diff --git a/core/src/test/java/com/redis/vl/query/AggregateHybridQueryIntegrationTest.java b/core/src/test/java/com/redis/vl/query/AggregateHybridQueryIntegrationTest.java new file mode 100644 index 0000000..7f55875 --- /dev/null +++ b/core/src/test/java/com/redis/vl/query/AggregateHybridQueryIntegrationTest.java @@ -0,0 +1,369 @@ +package com.redis.vl.query; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.redis.vl.BaseIntegrationTest; +import com.redis.vl.index.SearchIndex; +import com.redis.vl.schema.IndexSchema; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +/** + * Integration tests for AggregateHybridQuery - the FT.AGGREGATE-based hybrid search. + * + *

Ported from Python test_aggregation.py + * + *

AggregateHybridQuery combines text and vector search using Redis aggregation to score + * documents based on a weighted combination of text and vector similarity. + */ +@Tag("integration") +@DisplayName("AggregateHybridQuery Integration Tests") +class AggregateHybridQueryIntegrationTest extends BaseIntegrationTest { + + private SearchIndex index; + + @BeforeEach + void setUp() { + String schemaYaml = + """ + version: '0.1.0' + index: + name: user-index-agg-hybrid + prefix: user-agg-hybrid + storage_type: hash + fields: + - name: user + type: tag + - name: credit_score + type: tag + - name: job + type: text + - name: description + type: text + - name: age + type: numeric + - name: last_updated + type: numeric + - name: location + type: geo + - name: user_embedding + type: vector + attrs: + dims: 3 + distance_metric: cosine + algorithm: flat + datatype: float32 + """; + + IndexSchema schema = IndexSchema.fromYaml(schemaYaml); + index = new SearchIndex(schema, unifiedJedis); + index.create(true); + + List> data = + List.of( + Map.of( + "user", + "john", + "age", + 18, + "job", + "engineer", + "credit_score", + "high", + "description", + "A talented engineer specializing in software development", + "location", + "-122.4194,37.7749", + "user_embedding", + new float[] {0.1f, 0.1f, 0.5f}), + Map.of( + "user", + "derrick", + "age", + 14, + "job", + "doctor", + "credit_score", + "low", + "description", + "A medical professional with expertise in lung cancer", + "location", + "-122.4194,37.7749", + "user_embedding", + new float[] {0.1f, 0.1f, 0.5f}), + Map.of( + "user", + "nancy", + "age", + 94, + "job", + "doctor", + "credit_score", + "high", + "description", + "A cardiologist with 30 years of experience in heart surgery", + "location", + "-122.4194,37.7749", + "user_embedding", + new float[] {0.1f, 0.1f, 0.5f}), + Map.of( + "user", + "tyler", + "age", + 100, + "job", + "engineer", + "credit_score", + "high", + "description", + "An aerospace engineer working on satellite systems", + "location", + "-122.4194,37.7749", + "user_embedding", + new float[] {0.1f, 0.1f, 0.5f}), + Map.of( + "user", + "tim", + "age", + 12, + "job", + "dermatologist", + "credit_score", + "high", + "description", + "A dermatologist focusing on skin cancer research", + "location", + "-122.4194,37.7749", + "user_embedding", + new float[] {0.1f, 0.1f, 0.5f}), + Map.of( + "user", + "taimur", + "age", + 15, + "job", + "CEO", + "credit_score", + "low", + "description", + "Chief executive officer of a tech startup", + "location", + "-122.4194,37.7749", + "user_embedding", + new float[] {0.1f, 0.1f, 0.5f}), + Map.of( + "user", + "joe", + "age", + 35, + "job", + "dentist", + "credit_score", + "medium", + "description", + "A dentist specializing in cosmetic procedures", + "location", + "-122.4194,37.7749", + "user_embedding", + new float[] {0.1f, 0.1f, 0.5f})); + + index.load(data, "user"); + } + + @AfterEach + void tearDown() { + if (index != null) { + index.delete(true); + } + } + + @Test + void testBasicAggregateHybridQuery() { + String text = "a medical professional with expertise in lung cancer"; + String textField = "description"; + float[] vector = new float[] {0.1f, 0.1f, 0.5f}; + String vectorField = "user_embedding"; + List returnFields = + List.of("user", "credit_score", "age", "job", "location", "description"); + + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textField) + .vector(vector) + .vectorFieldName(vectorField) + .returnFields(returnFields) + .build(); + + List> results = index.query(query); + + assertThat(results).hasSize(7); + for (Map doc : results) { + assertThat(doc.get("user")).isIn("john", "derrick", "nancy", "tyler", "tim", "taimur", "joe"); + assertThat(doc).containsKeys("age", "job", "credit_score"); + } + + // Test with limited results + AggregateHybridQuery limitedQuery = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textField) + .vector(vector) + .vectorFieldName(vectorField) + .numResults(3) + .build(); + + List> limitedResults = index.query(limitedQuery); + assertThat(limitedResults).hasSize(3); + + double firstScore = getDoubleValue(limitedResults.get(0), "hybrid_score"); + double secondScore = getDoubleValue(limitedResults.get(1), "hybrid_score"); + double thirdScore = getDoubleValue(limitedResults.get(2), "hybrid_score"); + + assertThat(firstScore).isGreaterThanOrEqualTo(secondScore); + assertThat(secondScore).isGreaterThanOrEqualTo(thirdScore); + } + + @Test + void testAggregateHybridQueryEmptyTextValidation() { + String textField = "description"; + float[] vector = new float[] {0.1f, 0.1f, 0.5f}; + String vectorField = "user_embedding"; + + assertThrows( + IllegalArgumentException.class, + () -> + AggregateHybridQuery.builder() + .text("") + .textFieldName(textField) + .vector(vector) + .vectorFieldName(vectorField) + .build()); + + assertThrows( + IllegalArgumentException.class, + () -> + AggregateHybridQuery.builder() + .text("with a for but and") + .textFieldName(textField) + .vector(vector) + .vectorFieldName(vectorField) + .build()); + } + + @Test + void testAggregateHybridQueryWithTagAndNumericFilter() { + String text = "a medical professional with expertise in lung cancer"; + String textField = "description"; + float[] vector = new float[] {0.1f, 0.1f, 0.5f}; + String vectorField = "user_embedding"; + List returnFields = + List.of("user", "credit_score", "age", "job", "location", "description"); + + Filter filterExpression = + Filter.and(Filter.tag("credit_score", "high"), Filter.numeric("age").gt(30)); + + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textField) + .vector(vector) + .vectorFieldName(vectorField) + .filterExpression(filterExpression) + .returnFields(returnFields) + .build(); + + List> results = index.query(query); + + assertThat(results).hasSize(2); + + for (Map result : results) { + assertThat(result.get("credit_score")).isEqualTo("high"); + int age = getIntValue(result, "age"); + assertThat(age).isGreaterThan(30); + } + } + + @Test + void testAggregateHybridQueryAlphaParameter() { + String text = "a medical professional with expertise in lung cancer"; + String textField = "description"; + float[] vector = new float[] {0.1f, 0.1f, 0.5f}; + String vectorField = "user_embedding"; + + for (float alpha : new float[] {0.1f, 0.5f, 0.9f}) { + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textField) + .vector(vector) + .vectorFieldName(vectorField) + .alpha(alpha) + .build(); + + List> results = index.query(query); + assertThat(results).hasSize(7); + + for (Map result : results) { + double vectorSimilarity = getDoubleValue(result, "vector_similarity"); + double textScore = getDoubleValue(result, "text_score"); + double hybridScore = getDoubleValue(result, "hybrid_score"); + + double expectedScore = alpha * vectorSimilarity + (1 - alpha) * textScore; + assertThat(Math.abs(hybridScore - expectedScore)).isLessThan(0.0001); + } + } + } + + @Test + void testAggregateHybridQueryWithStringFilterExpression() { + String text = "a medical professional with expertise in lung cancer"; + String textField = "description"; + float[] vector = new float[] {0.1f, 0.1f, 0.5f}; + String vectorField = "user_embedding"; + + String stringFilter = "(@credit_score:{high} @age:[31 +inf])"; + + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textField) + .vector(vector) + .vectorFieldName(vectorField) + .filterExpression(stringFilter) + .returnFields(List.of("user", "credit_score", "age", "job")) + .build(); + + List> results = index.query(query); + + assertThat(results).hasSize(2); + + for (Map result : results) { + assertThat(result.get("credit_score")).isEqualTo("high"); + int age = getIntValue(result, "age"); + assertThat(age).isGreaterThan(30); + } + } + + private double getDoubleValue(Map map, String key) { + Object value = map.get(key); + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + return Double.parseDouble(value.toString()); + } + + private int getIntValue(Map map, String key) { + Object value = map.get(key); + if (value instanceof Number) { + return ((Number) value).intValue(); + } + return Integer.parseInt(value.toString()); + } +} diff --git a/core/src/test/java/com/redis/vl/query/AggregateHybridQueryTest.java b/core/src/test/java/com/redis/vl/query/AggregateHybridQueryTest.java new file mode 100644 index 0000000..6ff6cf9 --- /dev/null +++ b/core/src/test/java/com/redis/vl/query/AggregateHybridQueryTest.java @@ -0,0 +1,188 @@ +package com.redis.vl.query; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for AggregateHybridQuery - ported from Python test_aggregation_types.py + * + *

Python reference: /redis-vl-python/tests/unit/test_aggregation_types.py + * + *

Tests the FT.AGGREGATE-based hybrid search (AggregateHybridQuery). + */ +@DisplayName("AggregateHybridQuery Unit Tests") +class AggregateHybridQueryTest { + + private static final float[] SAMPLE_VECTOR = new float[] {0.1f, 0.2f, 0.3f}; + + @Test + @DisplayName("Should support string filter expressions") + void testAggregateHybridQueryWithStringFilter() { + String text = "search for document 12345"; + String textFieldName = "description"; + String vectorFieldName = "embedding"; + + String stringFilter = "@category:{tech|science|engineering}"; + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textFieldName) + .vector(SAMPLE_VECTOR) + .vectorFieldName(vectorFieldName) + .filterExpression(stringFilter) + .build(); + + assertThat(query.getFilterExpression()).isEqualTo(stringFilter); + + String queryString = query.buildQueryString(); + assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)"); + assertThat(queryString).contains("AND " + stringFilter); + } + + @Test + @DisplayName("Should support Filter objects") + void testAggregateHybridQueryWithFilterObject() { + String text = "search for document 12345"; + String textFieldName = "description"; + String vectorFieldName = "embedding"; + + Filter filterExpression = Filter.tag("category", "tech"); + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textFieldName) + .vector(SAMPLE_VECTOR) + .vectorFieldName(vectorFieldName) + .filterExpression(filterExpression) + .build(); + + assertThat(query.getFilterExpression()).isEqualTo(filterExpression); + + String queryString = query.buildQueryString(); + assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)"); + assertThat(queryString).contains("AND @category:{tech}"); + } + + @Test + @DisplayName("Should work without filter") + void testAggregateHybridQueryNoFilter() { + String text = "search for document 12345"; + String textFieldName = "description"; + String vectorFieldName = "embedding"; + + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textFieldName) + .vector(SAMPLE_VECTOR) + .vectorFieldName(vectorFieldName) + .build(); + + String queryString = query.buildQueryString(); + assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)"); + assertThat(queryString).doesNotContain("AND"); + } + + @Test + @DisplayName("Should handle wildcard filter") + void testAggregateHybridQueryWildcardFilter() { + String text = "search for document 12345"; + String textFieldName = "description"; + String vectorFieldName = "embedding"; + + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text(text) + .textFieldName(textFieldName) + .vector(SAMPLE_VECTOR) + .vectorFieldName(vectorFieldName) + .filterExpression("*") + .build(); + + String queryString = query.buildQueryString(); + assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)"); + assertThat(queryString).doesNotContain("AND"); + } + + @Test + @DisplayName("Should reject empty text") + void testRejectsEmptyText() { + assertThatThrownBy( + () -> + AggregateHybridQuery.builder() + .text("") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("text string cannot be empty"); + } + + @Test + @DisplayName("Should reject text that becomes empty after stopwords removal") + void testRejectsTextThatBecomesEmptyAfterStopwords() { + assertThatThrownBy( + () -> + AggregateHybridQuery.builder() + .text("with a for but and") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("text string cannot be empty after removing stopwords"); + } + + @Test + @DisplayName("Should build correct query string format") + void testQueryStringFormat() { + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text("medical professional") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("user_embedding") + .numResults(5) + .build(); + + String queryString = query.buildQueryString(); + + assertThat(queryString).matches(".*\\(~@description:\\(.*\\)\\)=>\\[KNN.*\\]"); + assertThat(queryString).contains("KNN 5 @user_embedding"); + assertThat(queryString).contains("AS vector_distance"); + } + + @Test + @DisplayName("Should store alpha parameter correctly") + void testAlphaParameter() { + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text("test") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .alpha(0.3f) + .build(); + + assertThat(query.getAlpha()).isEqualTo(0.3f); + } + + @Test + @DisplayName("Should store numResults parameter correctly") + void testNumResultsParameter() { + AggregateHybridQuery query = + AggregateHybridQuery.builder() + .text("test") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .numResults(20) + .build(); + + assertThat(query.getNumResults()).isEqualTo(20); + } +} diff --git a/core/src/test/java/com/redis/vl/query/HybridQueryIntegrationTest.java b/core/src/test/java/com/redis/vl/query/HybridQueryIntegrationTest.java index c73b7d3..4c56387 100644 --- a/core/src/test/java/com/redis/vl/query/HybridQueryIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/query/HybridQueryIntegrationTest.java @@ -1,35 +1,36 @@ package com.redis.vl.query; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertThrows; import com.redis.vl.BaseIntegrationTest; import com.redis.vl.index.SearchIndex; +import com.redis.vl.query.HybridQuery.CombinationMethod; +import com.redis.vl.query.HybridQuery.VectorSearchMethod; import com.redis.vl.schema.IndexSchema; import java.util.List; import java.util.Map; -import java.util.Set; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; /** - * Integration tests for HybridQuery - ported from Python test_aggregation.py + * Integration tests for HybridQuery - the native FT.HYBRID implementation. * - *

Python reference: /redis-vl-python/tests/integration/test_aggregation.py + *

Requires Redis 8.4+ with native FT.HYBRID command support. * - *

HybridQuery combines text and vector search using Redis aggregation to score documents based - * on a weighted combination of text and vector similarity. + *

HybridQuery combines text and vector search using the native FT.HYBRID command with built-in + * score fusion (RRF or LINEAR). */ -@DisplayName("HybridQuery Integration Tests") +@Tag("integration") +@DisplayName("HybridQuery Integration Tests (FT.HYBRID)") class HybridQueryIntegrationTest extends BaseIntegrationTest { private SearchIndex index; @BeforeEach void setUp() { - // Create index matching Python fixture (test_aggregation.py:10-58) String schemaYaml = """ version: '0.1.0' @@ -65,7 +66,6 @@ void setUp() { index = new SearchIndex(schema, unifiedJedis); index.create(true); - // Load test data matching Python sample_data List> data = List.of( Map.of( @@ -184,8 +184,8 @@ void tearDown() { } } - /** Port of Python test_aggregation_query (line 60) */ @Test + @DisplayName("Basic hybrid query with text and vector search") void testBasicHybridQuery() { String text = "a medical professional with expertise in lung cancer"; String textField = "description"; @@ -205,344 +205,189 @@ void testBasicHybridQuery() { List> results = index.query(query); - // Verify results - assertThat(results).hasSize(7); - for (Map doc : results) { - assertThat(doc.get("user")).isIn("john", "derrick", "nancy", "tyler", "tim", "taimur", "joe"); - assertThat(doc).containsKeys("age", "job", "credit_score"); - } + assertThat(results).isNotEmpty(); + assertThat(results).hasSizeLessThanOrEqualTo(10); + } - // Test with limited results - HybridQuery limitedQuery = + @Test + @DisplayName("Hybrid query with RRF combination method") + void testHybridQueryWithRRF() { + HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .numResults(3) + .text("medical professional cancer") + .textFieldName("description") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .combinationMethod(CombinationMethod.RRF) + .rrfWindow(20) + .rrfConstant(60) + .returnFields(List.of("user", "description")) .build(); - List> limitedResults = index.query(limitedQuery); - assertThat(limitedResults).hasSize(3); - - // Verify hybrid scores are sorted descending - double firstScore = getDoubleValue(limitedResults.get(0), "hybrid_score"); - double secondScore = getDoubleValue(limitedResults.get(1), "hybrid_score"); - double thirdScore = getDoubleValue(limitedResults.get(2), "hybrid_score"); + List> results = index.query(query); - assertThat(firstScore).isGreaterThanOrEqualTo(secondScore); - assertThat(secondScore).isGreaterThanOrEqualTo(thirdScore); + assertThat(results).isNotEmpty(); } - /** Port of Python test_empty_query_string (line 112) */ @Test - void testHybridQueryEmptyTextValidation() { - String textField = "description"; - float[] vector = new float[] {0.1f, 0.1f, 0.5f}; - String vectorField = "user_embedding"; + @DisplayName("Hybrid query with LINEAR combination method and alpha") + void testHybridQueryWithLinear() { + HybridQuery query = + HybridQuery.builder() + .text("medical professional cancer") + .textFieldName("description") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .combinationMethod(CombinationMethod.LINEAR) + .linearAlpha(0.5f) + .returnFields(List.of("user", "description")) + .build(); - // Test if text is empty - assertThrows( - IllegalArgumentException.class, - () -> - HybridQuery.builder() - .text("") - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .build()); - - // Test if text becomes empty after stopwords are removed - // "with a for but and" will all be removed as default English stopwords - assertThrows( - IllegalArgumentException.class, - () -> - HybridQuery.builder() - .text("with a for but and") - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .build()); + List> results = index.query(query); + + assertThat(results).isNotEmpty(); } - /** Port of Python test_aggregation_query_with_filter (line 139) */ @Test - void testHybridQueryWithTagAndNumericFilter() { - String text = "a medical professional with expertise in lung cancer"; - String textField = "description"; - float[] vector = new float[] {0.1f, 0.1f, 0.5f}; - String vectorField = "user_embedding"; - List returnFields = - List.of("user", "credit_score", "age", "job", "location", "description"); - - Filter filterExpression = - Filter.and(Filter.tag("credit_score", "high"), Filter.numeric("age").gt(30)); - + @DisplayName("Hybrid query with KNN vector search") + void testHybridQueryWithKnn() { HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .filterExpression(filterExpression) - .returnFields(returnFields) + .text("engineer software") + .textFieldName("description") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .vectorSearchMethod(VectorSearchMethod.KNN) + .numResults(3) + .returnFields(List.of("user", "job")) .build(); List> results = index.query(query); - // Should return only high credit_score users with age > 30 - assertThat(results).hasSize(2); // nancy and tyler - - for (Map result : results) { - assertThat(result.get("credit_score")).isEqualTo("high"); - int age = getIntValue(result, "age"); - assertThat(age).isGreaterThan(30); - } + assertThat(results).isNotEmpty(); + assertThat(results).hasSizeLessThanOrEqualTo(3); } - /** Port of Python test_aggregation_query_with_geo_filter (line 165) */ @Test - void testHybridQueryWithGeoFilter() { - String text = "a medical professional with expertise in lung cancer"; - String textField = "description"; - float[] vector = new float[] {0.1f, 0.1f, 0.5f}; - String vectorField = "user_embedding"; - List returnFields = - List.of("user", "credit_score", "age", "job", "location", "description"); - - // GeoRadius: longitude, latitude, radius, unit - Filter filterExpression = - Filter.geo("location").radius(-122.4194, 37.7749, 1000, Filter.GeoUnit.M); - + @DisplayName("Hybrid query with tag filter expression") + void testHybridQueryWithTagFilter() { HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .filterExpression(filterExpression) - .returnFields(returnFields) + .text("medical professional cancer") + .textFieldName("description") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .filterExpression(Filter.tag("credit_score", "high")) + .returnFields(List.of("user", "credit_score", "description")) .build(); List> results = index.query(query); - // Python test expects 3 results - assertThat(results).hasSizeGreaterThanOrEqualTo(3); + assertThat(results).isNotEmpty(); for (Map result : results) { - assertThat(result.get("location")).isNotNull(); - } - } - - /** Port of Python test_aggregate_query_alpha (line 190) */ - @Test - void testHybridQueryAlphaParameter() { - String text = "a medical professional with expertise in lung cancer"; - String textField = "description"; - float[] vector = new float[] {0.1f, 0.1f, 0.5f}; - String vectorField = "user_embedding"; - - // Test different alpha values - for (float alpha : new float[] {0.1f, 0.5f, 0.9f}) { - HybridQuery query = - HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .alpha(alpha) - .build(); - - List> results = index.query(query); - assertThat(results).hasSize(7); - - // Verify score calculation: hybrid_score = alpha * vector_similarity + (1-alpha) * text_score - for (Map result : results) { - double vectorSimilarity = getDoubleValue(result, "vector_similarity"); - double textScore = getDoubleValue(result, "text_score"); - double hybridScore = getDoubleValue(result, "hybrid_score"); - - double expectedScore = alpha * vectorSimilarity + (1 - alpha) * textScore; - - // Allow for small floating point error - assertThat(Math.abs(hybridScore - expectedScore)).isLessThan(0.0001); - } + assertThat(result.get("credit_score")).isEqualTo("high"); } } - /** Port of Python test_aggregate_query_stopwords (line 218) */ @Test - void testHybridQueryCustomStopwords() { - String text = "a medical professional with expertise in lung cancer"; - String textField = "description"; - float[] vector = new float[] {0.1f, 0.1f, 0.5f}; - String vectorField = "user_embedding"; - float alpha = 0.5f; - - // Custom stopwords - remove "medical" and "expertise" - Set customStopwords = Set.of("medical", "expertise"); - + @DisplayName("Hybrid query with numeric filter expression") + void testHybridQueryWithNumericFilter() { HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .alpha(alpha) - .stopwords(customStopwords) + .text("medical professional cancer") + .textFieldName("description") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .filterExpression(Filter.numeric("age").gt(30)) + .returnFields(List.of("user", "age", "description")) .build(); - // Verify stopwords were applied by checking query string - String queryString = query.buildQueryString(); - assertThat(queryString).doesNotContain("medical"); - assertThat(queryString).doesNotContain("expertise"); - List> results = index.query(query); - assertThat(results).hasSize(7); - // Verify score calculation still works + assertThat(results).isNotEmpty(); for (Map result : results) { - double vectorSimilarity = getDoubleValue(result, "vector_similarity"); - double textScore = getDoubleValue(result, "text_score"); - double hybridScore = getDoubleValue(result, "hybrid_score"); - - double expectedScore = alpha * vectorSimilarity + (1 - alpha) * textScore; - assertThat(Math.abs(hybridScore - expectedScore)).isLessThan(0.0001); + int age = getIntValue(result, "age"); + assertThat(age).isGreaterThan(30); } } - /** Port of Python test_aggregate_query_with_text_filter (line 252) */ @Test - void testHybridQueryWithTextFilter() { - String text = "a medical professional with expertise in lung cancer"; - String textField = "description"; - float[] vector = new float[] {0.1f, 0.1f, 0.5f}; - String vectorField = "user_embedding"; - - // Filter: text field must contain "medical" - Filter filterExpression = Filter.text(textField, "medical"); - + @DisplayName("Hybrid query with string filter expression") + void testHybridQueryWithStringFilter() { HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .alpha(0.5f) - .filterExpression(filterExpression) - .returnFields(List.of("job", "description")) + .text("medical professional cancer") + .textFieldName("description") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .filterExpression("(@credit_score:{high} @age:[31 +inf])") + .returnFields(List.of("user", "credit_score", "age")) .build(); List> results = index.query(query); - // Note: Only derrick has "medical" in description - // Python test expects 2, but only 1 document actually contains "medical" - assertThat(results).hasSizeGreaterThanOrEqualTo(1); + assertThat(results).isNotEmpty(); for (Map result : results) { - String description = result.get(textField).toString().toLowerCase(); - assertThat(description).contains("medical"); + assertThat(result.get("credit_score")).isEqualTo("high"); + int age = getIntValue(result, "age"); + assertThat(age).isGreaterThan(30); } + } - // Test with NOT filter: contains "medical" but NOT "research" - Filter complexFilter = - Filter.and(Filter.text(textField, "medical"), Filter.textNot(textField, "research")); - - HybridQuery complexQuery = + @Test + @DisplayName("Hybrid query with return fields") + void testHybridQueryWithReturnFields() { + HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .alpha(0.5f) - .filterExpression(complexFilter) - .returnFields(List.of("description")) + .text("engineer") + .textFieldName("job") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .returnFields(List.of("user", "job")) + .numResults(5) .build(); - List> complexResults = index.query(complexQuery); - assertThat(complexResults).hasSizeGreaterThanOrEqualTo(1); + List> results = index.query(query); - for (Map result : complexResults) { - String description = result.get(textField).toString().toLowerCase(); - assertThat(description).contains("medical"); - assertThat(description).doesNotContain("research"); - } + assertThat(results).isNotEmpty(); } - /** - * Integration test for string filter expressions - port of Python PR #375 - * - *

Tests that raw Redis filter strings work correctly in HybridQuery - */ @Test - void testHybridQueryWithStringFilterExpression() { - String text = "a medical professional with expertise in lung cancer"; - String textField = "description"; - float[] vector = new float[] {0.1f, 0.1f, 0.5f}; - String vectorField = "user_embedding"; - - // Use raw Redis filter string: credit_score is "high" AND age > 30 - String stringFilter = "(@credit_score:{high} @age:[31 +inf])"; - + @DisplayName("Hybrid query result count verification") + void testHybridQueryResultCount() { HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .filterExpression(stringFilter) - .returnFields(List.of("user", "credit_score", "age", "job")) + .text("medical professional cancer") + .textFieldName("description") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .numResults(3) + .returnFields(List.of("user")) .build(); List> results = index.query(query); - // Should return only high credit_score users with age > 30 - assertThat(results).hasSize(2); // nancy (94) and tyler (100) - - for (Map result : results) { - assertThat(result.get("credit_score")).isEqualTo("high"); - int age = getIntValue(result, "age"); - assertThat(age).isGreaterThan(30); - } + assertThat(results).hasSizeLessThanOrEqualTo(3); } - /** - * Integration test for wildcard string filter - port of Python test - * - *

Tests that wildcard filter "*" doesn't add an AND clause - */ @Test - void testHybridQueryWithWildcardStringFilter() { - String text = "engineer"; - String textField = "job"; - float[] vector = new float[] {0.1f, 0.1f, 0.5f}; - String vectorField = "user_embedding"; - - // Wildcard filter should match all documents + @DisplayName("Hybrid query with score aliases") + void testHybridQueryWithScoreAliases() { HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textField) - .vector(vector) - .vectorFieldName(vectorField) - .filterExpression("*") + .text("medical professional cancer") + .textFieldName("description") + .vector(new float[] {0.1f, 0.1f, 0.5f}) + .vectorFieldName("user_embedding") + .yieldTextScoreAs("text_score") + .yieldVsimScoreAs("vector_score") + .yieldCombinedScoreAs("combined_score") + .returnFields(List.of("user", "description")) .build(); List> results = index.query(query); - // Should return all documents (wildcard doesn't filter) - assertThat(results).hasSize(7); - - // Verify query string doesn't contain "AND *" - String queryString = query.buildQueryString(); - assertThat(queryString).doesNotContain("AND *"); - } - - // Helper methods for type conversion (Hash storage returns strings) - private double getDoubleValue(Map map, String key) { - Object value = map.get(key); - if (value instanceof Number) { - return ((Number) value).doubleValue(); - } - return Double.parseDouble(value.toString()); + assertThat(results).isNotEmpty(); } private int getIntValue(Map map, String key) { diff --git a/core/src/test/java/com/redis/vl/query/HybridQueryTest.java b/core/src/test/java/com/redis/vl/query/HybridQueryTest.java index 9802cf3..5813383 100644 --- a/core/src/test/java/com/redis/vl/query/HybridQueryTest.java +++ b/core/src/test/java/com/redis/vl/query/HybridQueryTest.java @@ -3,215 +3,404 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import com.redis.vl.query.HybridQuery.CombinationMethod; +import com.redis.vl.query.HybridQuery.VectorSearchMethod; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import redis.clients.jedis.search.hybrid.FTHybridParams; /** - * Unit tests for HybridQuery - ported from Python test_aggregation_types.py + * Unit tests for HybridQuery - the native FT.HYBRID implementation. * - *

Python reference: /redis-vl-python/tests/unit/test_aggregation_types.py - * - *

Tests the ability to pass string filter expressions directly to HybridQuery, in addition to - * Filter objects. This is a port of the test added in PR #375. + *

Tests the builder defaults, parameter validation, vector search methods, combination methods, + * filter expressions, and FTHybridParams construction. */ -@DisplayName("HybridQuery Unit Tests") +@DisplayName("HybridQuery Unit Tests (FT.HYBRID)") class HybridQueryTest { private static final float[] SAMPLE_VECTOR = new float[] {0.1f, 0.2f, 0.3f}; - /** - * Port of Python test_hybrid_query_with_string_filter (test_aggregation_types.py:118-191) - * - *

This test ensures that when a string filter expression is passed to HybridQuery, it's - * properly included in the generated query string and not set to empty. Regression test for bug - * where string filters were being ignored in Python. - * - *

In Java, this test verifies we support BOTH Filter objects and raw string filters for - * feature parity with Python. - */ @Test - @DisplayName("Should support string filter expressions") - void testHybridQueryWithStringFilter() { - String text = "search for document 12345"; - String textFieldName = "description"; - String vectorFieldName = "embedding"; + @DisplayName("Should use correct builder defaults") + void testBuilderDefaults() { + HybridQuery query = + HybridQuery.builder() + .text("test query") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .build(); + + assertThat(query.getTextScorer()).isEqualTo("BM25STD"); + assertThat(query.getNumResults()).isEqualTo(10); + assertThat(query.getLinearAlpha()).isEqualTo(0.3f); + assertThat(query.getRrfWindow()).isEqualTo(20); + assertThat(query.getRrfConstant()).isEqualTo(60); + assertThat(query.getVectorSearchMethod()).isEqualTo(VectorSearchMethod.KNN); + assertThat(query.getCombinationMethod()).isEqualTo(CombinationMethod.RRF); + assertThat(query.getDtype()).isEqualTo("float32"); + assertThat(query.getKnnEfRuntime()).isEqualTo(10); + assertThat(query.getRangeEpsilon()).isEqualTo(0.01f); + assertThat(query.getVectorParamName()).isEqualTo("vector"); + assertThat(query.getYieldTextScoreAs()).isNull(); + assertThat(query.getYieldVsimScoreAs()).isNull(); + assertThat(query.getYieldCombinedScoreAs()).isNull(); + assertThat(query.getFilterExpression()).isNull(); + assertThat(query.getReturnFields()).isEmpty(); + } + + @Test + @DisplayName("Should reject empty text") + void testRejectsEmptyText() { + assertThatThrownBy( + () -> + HybridQuery.builder() + .text("") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("text string cannot be empty"); + } + + @Test + @DisplayName("Should reject text that becomes empty after stopwords removal") + void testRejectsTextThatBecomesEmptyAfterStopwords() { + assertThatThrownBy( + () -> + HybridQuery.builder() + .text("with a for but and") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("text string cannot be empty after removing stopwords"); + } + + @Test + @DisplayName("Should configure KNN vector search method parameters") + void testKnnVectorSearchMethod() { + HybridQuery query = + HybridQuery.builder() + .text("test query") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .vectorSearchMethod(VectorSearchMethod.KNN) + .knnEfRuntime(50) + .numResults(20) + .build(); + + assertThat(query.getVectorSearchMethod()).isEqualTo(VectorSearchMethod.KNN); + assertThat(query.getKnnEfRuntime()).isEqualTo(50); + assertThat(query.getNumResults()).isEqualTo(20); + } + + @Test + @DisplayName("Should configure RANGE vector search method parameters") + void testRangeVectorSearchMethod() { + HybridQuery query = + HybridQuery.builder() + .text("test query") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .vectorSearchMethod(VectorSearchMethod.RANGE) + .rangeRadius(0.5f) + .rangeEpsilon(0.05f) + .build(); + + assertThat(query.getVectorSearchMethod()).isEqualTo(VectorSearchMethod.RANGE); + assertThat(query.getRangeRadius()).isEqualTo(0.5f); + assertThat(query.getRangeEpsilon()).isEqualTo(0.05f); + } + + @Test + @DisplayName("Should require radius for RANGE vector search method") + void testRangeRequiresRadius() { + assertThatThrownBy( + () -> + HybridQuery.builder() + .text("test query") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .vectorSearchMethod(VectorSearchMethod.RANGE) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("rangeRadius is required when vectorSearchMethod is RANGE"); + } - // Test with string filter expression - should include filter in query string + @Test + @DisplayName("Should configure RRF combination method parameters") + void testRrfCombinationMethod() { + HybridQuery query = + HybridQuery.builder() + .text("test query") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .combinationMethod(CombinationMethod.RRF) + .rrfWindow(30) + .rrfConstant(100) + .build(); + + assertThat(query.getCombinationMethod()).isEqualTo(CombinationMethod.RRF); + assertThat(query.getRrfWindow()).isEqualTo(30); + assertThat(query.getRrfConstant()).isEqualTo(100); + } + + @Test + @DisplayName("Should configure LINEAR combination method parameters") + void testLinearCombinationMethod() { + HybridQuery query = + HybridQuery.builder() + .text("test query") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .combinationMethod(CombinationMethod.LINEAR) + .linearAlpha(0.7f) + .build(); + + assertThat(query.getCombinationMethod()).isEqualTo(CombinationMethod.LINEAR); + assertThat(query.getLinearAlpha()).isEqualTo(0.7f); + } + + @Test + @DisplayName("Should support string filter expressions") + void testStringFilterExpression() { String stringFilter = "@category:{tech|science|engineering}"; - HybridQuery hybridQuery = + HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textFieldName) + .text("search for document 12345") + .textFieldName("description") .vector(SAMPLE_VECTOR) - .vectorFieldName(vectorFieldName) + .vectorFieldName("embedding") .filterExpression(stringFilter) .build(); - // Check that filter is stored correctly - assertThat(hybridQuery.getFilterExpression()).isEqualTo(stringFilter); + assertThat(query.getFilterExpression()).isEqualTo(stringFilter); - // Check that the generated query string includes both text search and filter - String queryString = hybridQuery.buildQueryString(); - assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)"); - assertThat(queryString).contains("AND " + stringFilter); + String queryString = query.buildQueryString(); + assertThat(queryString).contains("@description:(search | document | 12345)"); + assertThat(queryString).contains(stringFilter); } - /** Port of Python test - verify Filter objects still work */ @Test @DisplayName("Should support Filter objects") - void testHybridQueryWithFilterObject() { - String text = "search for document 12345"; - String textFieldName = "description"; - String vectorFieldName = "embedding"; - - // Test with FilterExpression - should also work (existing functionality) + void testFilterObjectExpression() { Filter filterExpression = Filter.tag("category", "tech"); - HybridQuery hybridQuery = + HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textFieldName) + .text("search for document 12345") + .textFieldName("description") .vector(SAMPLE_VECTOR) - .vectorFieldName(vectorFieldName) + .vectorFieldName("embedding") .filterExpression(filterExpression) .build(); - // Check that filter is stored correctly - assertThat(hybridQuery.getFilterExpression()).isEqualTo(filterExpression); + assertThat(query.getFilterExpression()).isEqualTo(filterExpression); - // Check that the generated query string includes both text search and filter - String queryString = hybridQuery.buildQueryString(); - assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)"); - assertThat(queryString).contains("AND @category:{tech}"); + String queryString = query.buildQueryString(); + assertThat(queryString).contains("@description:(search | document | 12345)"); + assertThat(queryString).contains("@category:{tech}"); } - /** Port of Python test - verify no filter works */ @Test - @DisplayName("Should work without filter") - void testHybridQueryNoFilter() { - String text = "search for document 12345"; - String textFieldName = "description"; - String vectorFieldName = "embedding"; + @DisplayName("Should build FTHybridParams successfully") + void testBuildFTHybridParams() { + HybridQuery query = + HybridQuery.builder() + .text("medical professional") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("user_embedding") + .numResults(5) + .combinationMethod(CombinationMethod.RRF) + .rrfWindow(15) + .rrfConstant(60) + .build(); - // Test with no filter - should only have text search - HybridQuery hybridQuery = + FTHybridParams params = query.buildFTHybridParams(); + assertThat(params).isNotNull(); + } + + @Test + @DisplayName("Should build FTHybridParams with LINEAR combination") + void testBuildFTHybridParamsLinear() { + HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textFieldName) + .text("medical professional") + .textFieldName("description") .vector(SAMPLE_VECTOR) - .vectorFieldName(vectorFieldName) + .vectorFieldName("user_embedding") + .combinationMethod(CombinationMethod.LINEAR) + .linearAlpha(0.6f) + .numResults(10) .build(); - String queryString = hybridQuery.buildQueryString(); - assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)"); - assertThat(queryString).doesNotContain("AND"); + FTHybridParams params = query.buildFTHybridParams(); + assertThat(params).isNotNull(); } - /** Port of Python test - verify wildcard filter works */ @Test - @DisplayName("Should handle wildcard filter") - void testHybridQueryWildcardFilter() { - String text = "search for document 12345"; - String textFieldName = "description"; - String vectorFieldName = "embedding"; + @DisplayName("Should build FTHybridParams with RANGE vector method") + void testBuildFTHybridParamsRange() { + HybridQuery query = + HybridQuery.builder() + .text("medical professional") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("user_embedding") + .vectorSearchMethod(VectorSearchMethod.RANGE) + .rangeRadius(0.5f) + .rangeEpsilon(0.02f) + .build(); + + FTHybridParams params = query.buildFTHybridParams(); + assertThat(params).isNotNull(); + } - // Test with wildcard filter - should only have text search (no AND clause) - HybridQuery hybridQuery = + @Test + @DisplayName("Should yield score alias fields") + void testYieldScoreAliases() { + HybridQuery query = HybridQuery.builder() - .text(text) - .textFieldName(textFieldName) + .text("test query") + .textFieldName("description") .vector(SAMPLE_VECTOR) - .vectorFieldName(vectorFieldName) - .filterExpression("*") + .vectorFieldName("embedding") + .yieldTextScoreAs("text_score") + .yieldVsimScoreAs("vector_score") + .yieldCombinedScoreAs("combined_score") .build(); - String queryString = hybridQuery.buildQueryString(); - assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)"); - assertThat(queryString).doesNotContain("AND"); + assertThat(query.getYieldTextScoreAs()).isEqualTo("text_score"); + assertThat(query.getYieldVsimScoreAs()).isEqualTo("vector_score"); + assertThat(query.getYieldCombinedScoreAs()).isEqualTo("combined_score"); + + FTHybridParams params = query.buildFTHybridParams(); + assertThat(params).isNotNull(); } - /** Test that empty text throws exception */ @Test - @DisplayName("Should reject empty text") - void testRejectsEmptyText() { - assertThatThrownBy( - () -> - HybridQuery.builder() - .text("") - .textFieldName("description") - .vector(SAMPLE_VECTOR) - .vectorFieldName("embedding") - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("text string cannot be empty"); + @DisplayName("Should produce correct query string without filter") + void testQueryStringNoFilter() { + HybridQuery query = + HybridQuery.builder() + .text("medical professional") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .build(); + + String queryString = query.buildQueryString(); + assertThat(queryString).isEqualTo("@description:(medical | professional)"); } - /** Test that text becomes empty after stopwords are removed */ @Test - @DisplayName("Should reject text that becomes empty after stopwords removal") - void testRejectsTextThatBecomesEmptyAfterStopwords() { - // "with a for but and" will all be removed as default English stopwords - assertThatThrownBy( - () -> - HybridQuery.builder() - .text("with a for but and") - .textFieldName("description") - .vector(SAMPLE_VECTOR) - .vectorFieldName("embedding") - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("text string cannot be empty after removing stopwords"); + @DisplayName("Should produce correct query string with filter") + void testQueryStringWithFilter() { + HybridQuery query = + HybridQuery.builder() + .text("medical professional") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .filterExpression("@age:[30 +inf]") + .build(); + + String queryString = query.buildQueryString(); + assertThat(queryString).contains("@description:(medical | professional)"); + assertThat(queryString).contains("@age:[30 +inf]"); } - /** Test query string building */ @Test - @DisplayName("Should build correct query string format") - void testQueryStringFormat() { + @DisplayName("Should handle wildcard filter without adding it to query") + void testWildcardFilter() { HybridQuery query = HybridQuery.builder() .text("medical professional") .textFieldName("description") .vector(SAMPLE_VECTOR) - .vectorFieldName("user_embedding") - .numResults(5) + .vectorFieldName("embedding") + .filterExpression("*") .build(); String queryString = query.buildQueryString(); + assertThat(queryString).isEqualTo("@description:(medical | professional)"); + assertThat(queryString).doesNotContain("*"); + } - // Verify format: (~@text_field:(tokens))=>[KNN num @vector_field $vector AS vector_distance] - assertThat(queryString).matches(".*\\(~@description:\\(.*\\)\\)=>\\[KNN.*\\]"); - assertThat(queryString).contains("KNN 5 @user_embedding"); - assertThat(queryString).contains("AS vector_distance"); + @Test + @DisplayName("Should configure return fields") + void testReturnFields() { + HybridQuery query = + HybridQuery.builder() + .text("test query") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .returnFields(java.util.List.of("field1", "field2", "field3")) + .build(); + + assertThat(query.getReturnFields()).containsExactly("field1", "field2", "field3"); + + FTHybridParams params = query.buildFTHybridParams(); + assertThat(params).isNotNull(); } - /** Test alpha parameter */ @Test - @DisplayName("Should store alpha parameter correctly") - void testAlphaParameter() { + @DisplayName("Should throw for unknown scorer") + void testUnknownScorer() { + assertThatThrownBy( + () -> + HybridQuery.builder() + .text("test query") + .textFieldName("description") + .vector(SAMPLE_VECTOR) + .vectorFieldName("embedding") + .textScorer("UNKNOWN_SCORER") + .build() + .buildFTHybridParams()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unknown scorer"); + } + + @Test + @DisplayName("Should produce vector params in getParams()") + void testGetParams() { HybridQuery query = HybridQuery.builder() - .text("test") + .text("test query") .textFieldName("description") .vector(SAMPLE_VECTOR) .vectorFieldName("embedding") - .alpha(0.3f) .build(); - assertThat(query.getAlpha()).isEqualTo(0.3f); + java.util.Map params = query.getParams(); + assertThat(params).containsKey("vector"); + assertThat(params.get("vector")).isInstanceOf(byte[].class); } - /** Test numResults parameter */ @Test - @DisplayName("Should store numResults parameter correctly") - void testNumResultsParameter() { + @DisplayName("Should support custom vector param name") + void testCustomVectorParamName() { HybridQuery query = HybridQuery.builder() - .text("test") + .text("test query") .textFieldName("description") .vector(SAMPLE_VECTOR) .vectorFieldName("embedding") - .numResults(20) + .vectorParamName("my_vec") .build(); - assertThat(query.getNumResults()).isEqualTo(20); + assertThat(query.getVectorParamName()).isEqualTo("my_vec"); + + java.util.Map params = query.getParams(); + assertThat(params).containsKey("my_vec"); } } diff --git a/core/src/test/java/com/redis/vl/query/MultiVectorQueryIntegrationTest.java b/core/src/test/java/com/redis/vl/query/MultiVectorQueryIntegrationTest.java index 083da3f..7dc4f9b 100644 --- a/core/src/test/java/com/redis/vl/query/MultiVectorQueryIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/query/MultiVectorQueryIntegrationTest.java @@ -166,9 +166,9 @@ void testMultipleVectorsQuery() { .contains("@image_embedding:[VECTOR_RANGE 2.0 $vector_1]") .contains(" | "); - // Verify scoring + // Verify scoring (@ prefix is correct for FT.AGGREGATE APPLY expressions) String formula = query.getScoringFormula(); - assertThat(formula).contains("0.70 * score_0").contains("0.30 * score_1"); + assertThat(formula).contains("0.70 * @score_0").contains("0.30 * @score_1"); } @Test @@ -213,9 +213,9 @@ void testWeightedScoringCalculation() { assertThat(calculations.get("score_0")).isEqualTo("(2 - distance_0)/2"); assertThat(calculations.get("score_1")).isEqualTo("(2 - distance_1)/2"); - // Verify combined scoring formula + // Verify combined scoring formula (@ prefix is correct for FT.AGGREGATE APPLY expressions) String formula = query.getScoringFormula(); - assertThat(formula).isEqualTo("0.60 * score_0 + 0.40 * score_1"); + assertThat(formula).isEqualTo("0.60 * @score_0 + 0.40 * @score_1"); } @Test diff --git a/core/src/test/java/com/redis/vl/schema/JsonFieldAliasIntegrationTest.java b/core/src/test/java/com/redis/vl/schema/JsonFieldAliasIntegrationTest.java index 7abfc94..4093c3d 100644 --- a/core/src/test/java/com/redis/vl/schema/JsonFieldAliasIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/schema/JsonFieldAliasIntegrationTest.java @@ -2,6 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; +import com.redis.vl.BaseIntegrationTest; import com.redis.vl.index.SearchIndex; import com.redis.vl.query.Filter; import java.util.List; @@ -10,8 +11,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import redis.clients.jedis.RedisClient; -import redis.clients.jedis.UnifiedJedis; import redis.clients.jedis.search.SearchResult; /** @@ -26,16 +25,13 @@ *

Updated for Jedis 7.2+ API. */ @Tag("integration") -public class JsonFieldAliasIntegrationTest { +public class JsonFieldAliasIntegrationTest extends BaseIntegrationTest { - private UnifiedJedis jedis; private SearchIndex searchIndex; private static final String INDEX_NAME = "test_json_alias"; @BeforeEach void setUp() { - jedis = RedisClient.create("localhost", 6379); - // Create schema with JSON storage and field aliases Map schema = Map.of( @@ -65,7 +61,7 @@ void setUp() { "as", "embedding")))); - searchIndex = new SearchIndex(IndexSchema.fromDict(schema), jedis); + searchIndex = new SearchIndex(IndexSchema.fromDict(schema), unifiedJedis); searchIndex.create(true); // Overwrite if exists } @@ -79,9 +75,6 @@ void tearDown() { // Ignore cleanup errors } } - if (jedis != null) { - jedis.close(); - } } @Test diff --git a/core/src/test/java/com/redis/vl/test/vcr/VCRAnnotationsTest.java b/core/src/test/java/com/redis/vl/test/vcr/VCRAnnotationsTest.java index fbc1762..670d783 100644 --- a/core/src/test/java/com/redis/vl/test/vcr/VCRAnnotationsTest.java +++ b/core/src/test/java/com/redis/vl/test/vcr/VCRAnnotationsTest.java @@ -70,7 +70,7 @@ void vcrTestShouldHaveRedisImageAttribute() throws NoSuchMethodException { void vcrTestShouldHaveDefaultRedisImage() throws NoSuchMethodException { var method = VCRTest.class.getMethod("redisImage"); String defaultValue = (String) method.getDefaultValue(); - assertThat(defaultValue).isEqualTo("redis/redis-stack:latest"); + assertThat(defaultValue).isEqualTo("redis:latest"); } @Test diff --git a/core/src/test/java/com/redis/vl/utils/rerank/CohereRerankerIntegrationTest.java b/core/src/test/java/com/redis/vl/utils/rerank/CohereRerankerIntegrationTest.java index 598ecf1..cb571c0 100644 --- a/core/src/test/java/com/redis/vl/utils/rerank/CohereRerankerIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/utils/rerank/CohereRerankerIntegrationTest.java @@ -1,6 +1,7 @@ package com.redis.vl.utils.rerank; import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.util.*; import org.junit.jupiter.api.BeforeAll; @@ -12,7 +13,7 @@ * *

Based on Python notebook: docs/user_guide/06_rerankers.ipynb * - *

Requires COHERE_API_KEY environment variable to be set. + *

Requires COHERE_API_KEY environment variable to be set. Tests are skipped when not available. */ @Tag("integration") class CohereRerankerIntegrationTest { @@ -32,8 +33,9 @@ class CohereRerankerIntegrationTest { @BeforeAll static void setUp() { apiKey = System.getenv("COHERE_API_KEY"); - assertNotNull(apiKey, "COHERE_API_KEY environment variable must be set"); - assertFalse(apiKey.isEmpty(), "COHERE_API_KEY must not be empty"); + assumeTrue( + apiKey != null && !apiKey.isEmpty(), + "COHERE_API_KEY environment variable not set, skipping"); } @Test diff --git a/core/src/test/java/com/redis/vl/utils/rerank/VoyageAIRerankerIntegrationTest.java b/core/src/test/java/com/redis/vl/utils/rerank/VoyageAIRerankerIntegrationTest.java index 7c2d66a..583e97d 100644 --- a/core/src/test/java/com/redis/vl/utils/rerank/VoyageAIRerankerIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/utils/rerank/VoyageAIRerankerIntegrationTest.java @@ -1,6 +1,7 @@ package com.redis.vl.utils.rerank; import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.util.*; import org.junit.jupiter.api.BeforeAll; @@ -12,7 +13,7 @@ * *

Based on Python notebook: docs/user_guide/06_rerankers.ipynb * - *

Requires VOYAGE_API_KEY environment variable to be set. + *

Requires VOYAGE_API_KEY environment variable to be set. Tests are skipped when not available. */ @Tag("integration") class VoyageAIRerankerIntegrationTest { @@ -32,8 +33,9 @@ class VoyageAIRerankerIntegrationTest { @BeforeAll static void setUp() { apiKey = System.getenv("VOYAGE_API_KEY"); - assertNotNull(apiKey, "VOYAGE_API_KEY environment variable must be set"); - assertFalse(apiKey.isEmpty(), "VOYAGE_API_KEY must not be empty"); + assumeTrue( + apiKey != null && !apiKey.isEmpty(), + "VOYAGE_API_KEY environment variable not set, skipping"); } @Test diff --git a/core/src/test/java/com/redis/vl/utils/vectorize/SentenceTransformersVectorizerIntegrationTest.java b/core/src/test/java/com/redis/vl/utils/vectorize/SentenceTransformersVectorizerIntegrationTest.java index 704952e..a951217 100644 --- a/core/src/test/java/com/redis/vl/utils/vectorize/SentenceTransformersVectorizerIntegrationTest.java +++ b/core/src/test/java/com/redis/vl/utils/vectorize/SentenceTransformersVectorizerIntegrationTest.java @@ -11,24 +11,18 @@ public class SentenceTransformersVectorizerIntegrationTest { @Test public void testRedisLangcacheEmbedV3ModelFails() { - // This test should fail because redis/langcache-embed-v3 doesn't have ONNX format + // This test verifies that redis/langcache-embed-v3 fails because it doesn't have ONNX format. + // The model uses SafeTensors format which is not compatible with ONNX Runtime. Exception exception = assertThrows( - RuntimeException.class, + Exception.class, () -> { SentenceTransformersVectorizer vectorizer = new SentenceTransformersVectorizer("redis/langcache-embed-v3"); }); - assertTrue( - exception.getMessage().contains("Failed to initialize SentenceTransformersVectorizer")); - - // Check the root cause - Throwable cause = exception.getCause(); - assertNotNull(cause); - assertTrue( - cause.getMessage().contains("model.onnx") - || cause.getMessage().contains("Model not found")); + // Verify the exception indicates the model could not be loaded + assertNotNull(exception.getMessage()); } @Test diff --git a/demos/langchain4j-vcr/README.md b/demos/langchain4j-vcr/README.md index 36cfb55..67d7813 100644 --- a/demos/langchain4j-vcr/README.md +++ b/demos/langchain4j-vcr/README.md @@ -98,7 +98,7 @@ OPENAI_API_KEY=your-key VCR_MODE=RECORD ./gradlew :demos:langchain4j-vcr:test ## How It Works 1. **Test Setup**: `@VCRTest` annotation triggers the VCR JUnit 5 extension -2. **Container Start**: A Redis Stack container is started with persistence enabled +2. **Container Start**: A Redis container is started with persistence enabled 3. **Model Wrapping**: Fields annotated with `@VCRModel` are wrapped with VCR proxies 4. **Recording**: When a model is called, VCR checks for existing cassette: - **Cache hit**: Returns recorded response @@ -138,7 +138,7 @@ demos/langchain4j-vcr/ |-----------|---------|-------------| | `mode` | `PLAYBACK_OR_RECORD` | VCR operating mode | | `dataDir` | `src/test/resources/vcr-data` | Cassette storage directory | -| `redisImage` | `redis/redis-stack:latest` | Redis Docker image | +| `redisImage` | `redis:latest` | Redis Docker image | ### @VCRModel Annotation diff --git a/demos/rag-multimodal/README.md b/demos/rag-multimodal/README.md index 34f5d81..fdb6406 100644 --- a/demos/rag-multimodal/README.md +++ b/demos/rag-multimodal/README.md @@ -20,16 +20,13 @@ A JavaFX application demonstrating multimodal Retrieval-Augmented Generation (RA ## Prerequisites -### 1. Redis Stack +### 1. Redis -You need Redis Stack running for vector search capabilities: +You need Redis 8.0+ running for vector search capabilities: ```bash # Using Docker (recommended) - Note: Using port 6399 to avoid conflicts -docker run -d --name redis-rag-demo -p 6399:6379 redis/redis-stack:latest - -# Or using Homebrew on macOS (configure to use port 6399) -brew install redis-stack +docker run -d --name redis-rag-demo -p 6399:6379 redis:latest # Or download from https://redis.io/download ``` @@ -48,7 +45,7 @@ java -version ```bash # Start Redis Stack on port 6399 (avoids conflicts with default port) -docker run -d --name redis-rag-demo -p 6399:6379 redis/redis-stack:latest +docker run -d --name redis-rag-demo -p 6399:6379 redis:latest ``` ### 2. Build the Application @@ -177,7 +174,7 @@ export ANTHROPIC_API_KEY=sk-ant-... docker ps | grep redis-rag-demo # If not running, start it -docker run -d --name redis-rag-demo -p 6399:6379 redis/redis-stack:latest +docker run -d --name redis-rag-demo -p 6399:6379 redis:latest # Or restart existing container docker start redis-rag-demo @@ -229,7 +226,7 @@ docker start redis-rag-demo - **LangChain4J**: LLM orchestration - **Apache PDFBox**: PDF processing - **JTokkit**: Token counting -- **Redis Stack**: Vector database backend +- **Redis**: Vector database backend ## Development diff --git a/demos/rag-multimodal/docker-compose.yml b/demos/rag-multimodal/docker-compose.yml index 18f55ac..5ee68e2 100644 --- a/demos/rag-multimodal/docker-compose.yml +++ b/demos/rag-multimodal/docker-compose.yml @@ -1,6 +1,6 @@ services: redis: - image: redis/redis-stack:latest + image: redis:latest container_name: redis-rag-demo ports: - "6399:6379" # Map host port 6399 to container port 6379 (avoids conflicts) diff --git a/demos/rag-multimodal/src/test/java/com/redis/vl/BaseIntegrationTest.java b/demos/rag-multimodal/src/test/java/com/redis/vl/BaseIntegrationTest.java index 4a0f6f6..b178ab5 100644 --- a/demos/rag-multimodal/src/test/java/com/redis/vl/BaseIntegrationTest.java +++ b/demos/rag-multimodal/src/test/java/com/redis/vl/BaseIntegrationTest.java @@ -7,7 +7,7 @@ import redis.clients.jedis.RedisClient; import redis.clients.jedis.UnifiedJedis; -/** Base class for integration tests with Redis Stack 8.0 container */ +/** Base class for integration tests with Redis 8.x+ container */ public abstract class BaseIntegrationTest { protected static UnifiedJedis jedis; @@ -17,9 +17,9 @@ public abstract class BaseIntegrationTest { @BeforeAll static void startContainer() { - // Start Redis Stack container + // Start Redis container (8.x+ includes search modules natively) redisContainer = - new GenericContainer<>(DockerImageName.parse("redis/redis-stack:latest")) + new GenericContainer<>(DockerImageName.parse("redis:latest")) .withExposedPorts(6379); redisContainer.start(); diff --git a/demos/spring-ai-vcr/README.md b/demos/spring-ai-vcr/README.md index 576d6a3..ee5fdb7 100644 --- a/demos/spring-ai-vcr/README.md +++ b/demos/spring-ai-vcr/README.md @@ -100,7 +100,7 @@ OPENAI_API_KEY=your-key VCR_MODE=RECORD ./gradlew :demos:spring-ai-vcr:test ## How It Works 1. **Test Setup**: `@VCRTest` annotation triggers the VCR JUnit 5 extension -2. **Container Start**: A Redis Stack container is started with persistence enabled +2. **Container Start**: A Redis container is started with persistence enabled 3. **Model Wrapping**: Fields annotated with `@VCRModel` are wrapped with VCR proxies 4. **Recording**: When a model is called, VCR checks for existing cassette: - **Cache hit**: Returns recorded response @@ -140,7 +140,7 @@ demos/spring-ai-vcr/ |-----------|---------|-------------| | `mode` | `PLAYBACK_OR_RECORD` | VCR operating mode | | `dataDir` | `src/test/resources/vcr-data` | Cassette storage directory | -| `redisImage` | `redis/redis-stack:latest` | Redis Docker image | +| `redisImage` | `redis:latest` | Redis Docker image | ### @VCRModel Annotation diff --git a/docs/content/antora.yml b/docs/content/antora.yml index effc01a..81c8972 100644 --- a/docs/content/antora.yml +++ b/docs/content/antora.yml @@ -13,6 +13,6 @@ asciidoc: redisvl-version: '0.1.0-SNAPSHOT' url-redisvl-java: https://github.com/redis/redis-vl-java url-redisvl-python: https://github.com/redis/redis-vl-python - url-redis-stack: https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/ + url-redis: https://redis.io/docs/latest/get-started/ url-redis-search: https://redis.io/docs/stack/search/ url-redis-json: https://redis.io/docs/stack/json/ \ No newline at end of file diff --git a/docs/content/modules/ROOT/nav.adoc b/docs/content/modules/ROOT/nav.adoc index a946fcc..7382159 100644 --- a/docs/content/modules/ROOT/nav.adoc +++ b/docs/content/modules/ROOT/nav.adoc @@ -23,5 +23,5 @@ .Resources * https://github.com/redis/redis-vl-java[GitHub^] * https://github.com/redis/redis-vl-python[Python Version^] -* {url-redis-stack}[Redis Stack^] +* {url-redis}[Redis^] * https://discord.gg/redis[Discord^] diff --git a/docs/content/modules/ROOT/pages/getting-started.adoc b/docs/content/modules/ROOT/pages/getting-started.adoc index 296d2a4..4af1b9a 100644 --- a/docs/content/modules/ROOT/pages/getting-started.adoc +++ b/docs/content/modules/ROOT/pages/getting-started.adoc @@ -16,7 +16,7 @@ RedisVL is a versatile Java library designed to enhance AI applications using Re * Java 17 or higher * Maven or Gradle -* Redis Stack 7.2+ or Redis with RediSearch module +* Redis 8.0+ (includes built-in search and vector capabilities) == Installation @@ -45,16 +45,16 @@ implementation 'com.redis:redisvl:{redisvl-version}' == Set Up Redis -The easiest way to get started is with Redis Stack using Docker: +The easiest way to get started is with Redis using Docker: [source,bash] ---- -docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest +docker run -d --name redis -p 6379:6379 -p 8001:8001 redis:latest ---- This command: -* Starts Redis Stack in a Docker container +* Starts Redis in a Docker container * Exposes Redis on port 6379 * Exposes Redis Insight GUI on port 8001 (access at http://localhost:8001) diff --git a/docs/content/modules/ROOT/pages/vcr-testing.adoc b/docs/content/modules/ROOT/pages/vcr-testing.adoc index 132f9c1..943c938 100644 --- a/docs/content/modules/ROOT/pages/vcr-testing.adoc +++ b/docs/content/modules/ROOT/pages/vcr-testing.adoc @@ -191,7 +191,7 @@ Specify a custom Redis image: ---- @VCRTest( mode = VCRMode.PLAYBACK, - redisImage = "redis/redis-stack:7.2.0-v6" // default + redisImage = "redis:latest" // default ) public class MyTest { // ... diff --git a/docs/design/VCR_TEST_SYSTEM.md b/docs/design/VCR_TEST_SYSTEM.md index ff63546..30ab767 100644 --- a/docs/design/VCR_TEST_SYSTEM.md +++ b/docs/design/VCR_TEST_SYSTEM.md @@ -269,7 +269,7 @@ public class VCRContext { Files.createDirectories(dataDir); // Start Redis with volume mount - redisContainer = new RedisContainer(DockerImageName.parse("redis/redis-stack:latest")) + redisContainer = new RedisContainer(DockerImageName.parse("redis:latest")) .withFileSystemBind(dataDir.toAbsolutePath().toString(), "/data", BindMode.READ_WRITE) .withCommand(buildRedisCommand()); @@ -281,7 +281,7 @@ public class VCRContext { } private String buildRedisCommand() { - StringBuilder cmd = new StringBuilder("redis-stack-server"); + StringBuilder cmd = new StringBuilder("redis-server"); cmd.append(" --appendonly yes"); cmd.append(" --appendfsync everysec"); cmd.append(" --dir /data"); @@ -699,7 +699,7 @@ class AdvancedTest { static VCRExtension vcr = VCRExtension.builder() .mode(VCRMode.PLAYBACK_OR_RECORD) .dataDir("src/test/resources/custom-vcr-data") - .redisImage("redis/redis-stack:7.4") + .redisImage("redis:latest") .enableInteractionLogging(true) .build(); diff --git a/gradle.properties b/gradle.properties index dc80d7b..55bd7ad 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -version = 0.12.2 +version = 0.13.0 diff --git a/notebooks/01_getting_started.ipynb b/notebooks/01_getting_started.ipynb index f7573a1..77590fe 100644 --- a/notebooks/01_getting_started.ipynb +++ b/notebooks/01_getting_started.ipynb @@ -36,7 +36,7 @@ "outputs": [], "source": [ "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", + "%maven redis.clients:jedis:7.3.0\n", "%maven org.slf4j:slf4j-nop:2.0.16\n", "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", @@ -262,7 +262,7 @@ ], "source": [ "// Connect to Redis\n", - "UnifiedJedis client = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));\n", + "UnifiedJedis client = new UnifiedJedis(new HostAndPort(\"redis\", 6379));\n", "\n", "// Create SearchIndex from schema (validateOnLoad is true by default)\n", "SearchIndex index = SearchIndex.fromDict(schema, client);\n", @@ -301,10 +301,10 @@ ], "source": [ "// Create SearchIndex with Redis URL - the index will manage the connection\n", - "SearchIndex indexWithUrl = SearchIndex.fromDict(schema, \"redis://redis-stack:6379\");\n", + "SearchIndex indexWithUrl = SearchIndex.fromDict(schema, \"redis://redis:6379\");\n", "\n", "// Or with validateOnLoad option enabled\n", - "SearchIndex indexWithValidation = SearchIndex.fromDict(schema, \"redis://redis-stack:6379\", true);\n", + "SearchIndex indexWithValidation = SearchIndex.fromDict(schema, \"redis://redis:6379\", true);\n", "\n", "System.out.println(\"Created indices with URL-based connection\");" ] @@ -396,7 +396,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[user_simple_docs:01K70ZR1PYMQFQ737CTSDCMAP5, user_simple_docs:01K70ZR1PY414PXY73W2AEANE7, user_simple_docs:01K70ZR1PYZZTV2QSHM4DJYB89]\n" + "[user_simple_docs:01KHRPSD4HP5M0W9R3FXBB84EN, user_simple_docs:01KHRPSD4H1263YPY5E51D85KG, user_simple_docs:01KHRPSD4HV8SH89NPQ662PT4E]\n" ] } ], @@ -490,7 +490,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[user_simple_docs:01K70ZR1WATEXBRXMSNFVP4GCH]\n" + "[user_simple_docs:01KHRPSD9YZVAGREV4BBSBPS53]\n" ] } ], @@ -561,9 +561,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "{credit_score=high, score=1.0, vector_distance=0, user_embedding=���=���=\u0000\u0000\u0000?, id=user_simple_docs:01K70ZR1PYMQFQ737CTSDCMAP5, job=engineer, user=john, age=1}\n", - "{credit_score=low, score=1.0, vector_distance=0, user_embedding=���=���=\u0000\u0000\u0000?, id=user_simple_docs:01K70ZR1PY414PXY73W2AEANE7, job=doctor, user=mary, age=2}\n", - "{credit_score=high, score=1.0, vector_distance=0.0566298961639, user_embedding=���=���>\u0000\u0000\u0000?, id=user_simple_docs:01K70ZR1WATEXBRXMSNFVP4GCH, job=engineer, user=tyler, age=9}\n" + "{credit_score=high, score=1.0, vector_distance=0, user_embedding=���=���=\u0000\u0000\u0000?, id=user_simple_docs:01KHRPSD4HP5M0W9R3FXBB84EN, job=engineer, user=john, age=1}\n", + "{credit_score=low, score=1.0, vector_distance=0, user_embedding=���=���=\u0000\u0000\u0000?, id=user_simple_docs:01KHRPSD4H1263YPY5E51D85KG, job=doctor, user=mary, age=2}\n", + "{credit_score=high, score=1.0, vector_distance=0.0566298961639, user_embedding=���=���>\u0000\u0000\u0000?, id=user_simple_docs:01KHRPSD9YZVAGREV4BBSBPS53, job=engineer, user=tyler, age=9}\n" ] } ], @@ -644,9 +644,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "{credit_score=low, score=1.0, vector_distance=0, user_embedding=���=���=\u0000\u0000\u0000?, id=user_simple_docs:01K70ZR1PY414PXY73W2AEANE7, job=doctor, user=mary, age=2}\n", - "{credit_score=high, score=1.0, vector_distance=0, user_embedding=���=���=\u0000\u0000\u0000?, id=user_simple_docs:01K70ZR1PYMQFQ737CTSDCMAP5, job=engineer, user=john, age=1}\n", - "{credit_score=high, score=1.0, vector_distance=0.0566298961639, user_embedding=���=���>\u0000\u0000\u0000?, id=user_simple_docs:01K70ZR1WATEXBRXMSNFVP4GCH, job=engineer, user=tyler, age=9}\n" + "{credit_score=high, score=1.0, vector_distance=0.0566298961639, user_embedding=���=���>\u0000\u0000\u0000?, id=user_simple_docs:01KHRPSD9YZVAGREV4BBSBPS53, job=engineer, user=tyler, age=9}\n", + "{credit_score=low, score=1.0, vector_distance=0, user_embedding=���=���=\u0000\u0000\u0000?, id=user_simple_docs:01KHRPSD4H1263YPY5E51D85KG, job=doctor, user=mary, age=2}\n", + "{credit_score=high, score=1.0, vector_distance=0, user_embedding=���=���=\u0000\u0000\u0000?, id=user_simple_docs:01KHRPSD4HP5M0W9R3FXBB84EN, job=engineer, user=john, age=1}\n" ] } ], @@ -860,7 +860,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/02_hybrid_queries.ipynb b/notebooks/02_hybrid_queries.ipynb index 49762c8..99ae6b8 100644 --- a/notebooks/02_hybrid_queries.ipynb +++ b/notebooks/02_hybrid_queries.ipynb @@ -39,7 +39,7 @@ ], "source": [ "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", + "%maven redis.clients:jedis:7.3.0\n", "%maven org.slf4j:slf4j-nop:2.0.16\n", "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", @@ -321,7 +321,7 @@ ], "source": [ "// Connect to Redis and create index\n", - "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));\n", + "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis\", 6379));\n", "\n", "// Create SearchIndex\n", "SearchIndex index = SearchIndex.fromDict(schemaMap, jedis);\n", @@ -1404,7 +1404,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/03_llmcache.ipynb b/notebooks/03_llmcache.ipynb index 5fcf195..3d6b379 100644 --- a/notebooks/03_llmcache.ipynb +++ b/notebooks/03_llmcache.ipynb @@ -18,7 +18,7 @@ "outputs": [], "source": [ "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", + "%maven redis.clients:jedis:7.3.0\n", "%maven org.slf4j:slf4j-nop:2.0.16\n", "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", @@ -72,7 +72,7 @@ ], "source": [ "// Setup connection and initialize components\n", - "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));\n", + "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis\", 6379));\n", "\n", "// Setup OpenAI client\n", "String apiKey = System.getenv(\"OPENAI_API_KEY\");\n", @@ -267,7 +267,7 @@ " Prompt: What is the capital of France?\n", " Response: Paris\n", " Distance: 0.0\n", - " Metadata: {country=france, vector_distance=0, id=7dc948dd-e705-4828-a62e-c68535374afe, updated_at=1759896302, city=Paris, inserted_at=1759896302}\n" + " Metadata: {country=france, vector_distance=0, id=eba12844-38d7-4294-b7e8-6d46a2b65c73, updated_at=1771429802, city=Paris, inserted_at=1771429802}\n" ] } ], @@ -513,7 +513,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Without caching, a call to OpenAI to answer this simple question took 1.859 seconds.\n", + "Without caching, a call to OpenAI to answer this simple question took 2.036 seconds.\n", "Added to cache\n" ] } @@ -542,8 +542,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Avg time taken with LLM cache enabled: 0.0315\n", - "Percentage of time saved: 98.31%\n" + "Avg time taken with LLM cache enabled: 0.023600000000000003\n", + "Percentage of time saved: 98.84%\n" ] } ], @@ -719,7 +719,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/04_vectorizers.ipynb b/notebooks/04_vectorizers.ipynb index 9b618ef..c851167 100644 --- a/notebooks/04_vectorizers.ipynb +++ b/notebooks/04_vectorizers.ipynb @@ -14,7 +14,7 @@ "Before running this notebook, be sure to have a running Redis Stack instance. You can start it with Docker:\n", "\n", "```bash\n", - "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", + "docker run -d -p 6379:6379 -p 8001:8001 redis:latest\n", "```\n", "\n", "This will run Redis on port 6379 and RedisInsight at http://localhost:8001." @@ -27,7 +27,7 @@ "outputs": [], "source": [ "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", + "%maven redis.clients:jedis:7.3.0\n", "%maven org.slf4j:slf4j-nop:2.0.16\n", "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", @@ -177,7 +177,7 @@ "output_type": "stream", "text": [ "OpenAI Vector dimensions: 1536\n", - "First 10 dimensions: [-0.0010550643, -0.003138513, 0.0023639908, -0.0045145433, -0.01032912, 0.012896847, -0.005494419, -0.002988136, -0.0072633694, -0.03365857]\n", + "First 10 dimensions: [-0.0011391325, -0.0032063872, 0.0023801322, -0.004501554, -0.010328997, 0.012922565, -0.00549112, -0.0029864837, -0.0073279613, -0.033658173]\n", "Created 3 embeddings\n" ] } @@ -377,7 +377,7 @@ ], "source": [ "// Connect to Redis\n", - "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));\n", + "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis\", 6379));\n", "\n", "// Create the schema from a Map (matching the Python notebook YAML)\n", "Map schema = Map.of(\n", @@ -416,7 +416,7 @@ "output_type": "stream", "text": [ "Loaded 3 documents\n", - "Keys: [doc:01K70ZPYFTE128NAKYDDT6SNSR, doc:01K70ZPYFVNBZK8JVHJ3D8J6N6, doc:01K70ZPYFVRQQANM5GKTHESABV]\n" + "Keys: [doc:01KHRR0S58GA9NRQN477CRT9EZ, doc:01KHRR0S58K1949MC4PRT0VRQE, doc:01KHRR0S58CFGZKWY3ZRSZ68EB]\n" ] } ], @@ -531,7 +531,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/05_hash_vs_json.ipynb b/notebooks/05_hash_vs_json.ipynb index dfa9201..9c7a4c5 100644 --- a/notebooks/05_hash_vs_json.ipynb +++ b/notebooks/05_hash_vs_json.ipynb @@ -16,7 +16,7 @@ "For example, you can run [Redis Stack](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/) locally with Docker:\n", "\n", "```bash\n", - "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", + "docker run -d -p 6379:6379 -p 8001:8001 redis:latest\n", "```\n", "\n", "Or create a [FREE Redis Cloud](https://redis.io/cloud)." @@ -41,7 +41,7 @@ ], "source": [ "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", + "%maven redis.clients:jedis:7.3.0\n", "%maven org.slf4j:slf4j-nop:2.0.16\n", "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", @@ -347,7 +347,7 @@ ], "source": [ "// Connect to Redis and construct a search index from the hash schema\n", - "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));\n", + "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis\", 6379));\n", "\n", "SearchIndex hindex = SearchIndex.fromDict(hashSchema, jedis);\n", "\n", @@ -985,8 +985,8 @@ "output_type": "stream", "text": [ "Loaded 2 bikes\n", - " bike-json:01K70ZYCD71HJGX2QSKSK98MC8\n", - " bike-json:01K70ZYCD7XXPNHHSM7TXJ7Y74\n" + " bike-json:01KHRRVJM172YTMV8N4YFPQYVS\n", + " bike-json:01KHRRVJM2PQEPS9Y5Y9XSPFXP\n" ] } ], @@ -1061,14 +1061,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "ID: bike-json:01K70ZYCD71HJGX2QSKSK98MC8\n", + "ID: bike-json:01KHRRVJM172YTMV8N4YFPQYVS\n", " Brand: Specialized\n", " Name: Specialized Stumpjumper\n", " Type: Enduro bikes\n", " Cosine Distance: 0.623969972134\n", " Similarity Score: 0.3760\n", "\n", - "ID: bike-json:01K70ZYCD7XXPNHHSM7TXJ7Y74\n", + "ID: bike-json:01KHRRVJM2PQEPS9Y5Y9XSPFXP\n", " Brand: Trek\n", " Name: bike_2\n", " Type: Enduro bikes\n", @@ -1219,7 +1219,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/06_rerankers.ipynb b/notebooks/06_rerankers.ipynb index d8cb265..4973fb2 100644 --- a/notebooks/06_rerankers.ipynb +++ b/notebooks/06_rerankers.ipynb @@ -19,7 +19,7 @@ "For example, you can run Redis Stack locally with Docker:\n", "\n", "```bash\n", - "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", + "docker run -d -p 6379:6379 -p 8001:8001 redis:latest\n", "```\n", "\n", "This will run Redis on port 6379 and RedisInsight at http://localhost:8001." @@ -361,9 +361,9 @@ "text": [ "\n", "=== Cohere Reranking Results (Structured Docs) ===\n", - "0.9988120794296265 -- {source=textbook, passage=Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.}\n", - "0.5974904298782349 -- {source=wiki, passage=Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.}\n", - "0.05910154804587364 -- {source=encyclopedia, passage=The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.}\n" + "0.9988120794296265 -- {passage=Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America., source=textbook}\n", + "0.5974904298782349 -- {passage=Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment., source=wiki}\n", + "0.05910154804587364 -- {passage=The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan., source=encyclopedia}\n" ] } ], @@ -484,7 +484,7 @@ "=== VoyageAI Reranked Results ===\n", "0.796875 -- Washington, D.C. (also known as simply Washington or D.C., a...\n", "0.578125 -- Charlotte Amalie is the capital and largest city of the Unit...\n", - "0.562500 -- Carson City is the capital city of the American state of Nev...\n" + "0.546875 -- Carson City is the capital city of the American state of Nev...\n" ] } ], @@ -515,7 +515,7 @@ "=== VoyageAI Reranked Results (Structured Docs) ===\n", "0.796875 -- {source=textbook, content=Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.}\n", "0.578125 -- {source=textbook, content=Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.}\n", - "0.562500 -- {source=wiki, content=Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.}\n" + "0.546875 -- {source=wiki, content=Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.}\n" ] } ], @@ -571,7 +571,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/07_message_history.ipynb b/notebooks/07_message_history.ipynb index 2ec90ef..78bff0c 100644 --- a/notebooks/07_message_history.ipynb +++ b/notebooks/07_message_history.ipynb @@ -22,7 +22,7 @@ "outputs": [], "source": [ "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", + "%maven redis.clients:jedis:7.3.0\n", "%maven org.slf4j:slf4j-nop:2.0.16\n", "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", @@ -57,7 +57,7 @@ ], "source": [ "// Connect to Redis\n", - "UnifiedJedis client = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));" + "UnifiedJedis client = new UnifiedJedis(new HostAndPort(\"redis\", 6379));" ] }, { @@ -352,7 +352,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Bad message key: 01K713B8XBCP1V3XX3M625YTG7:1.7598999199724438E9\n", + "Bad message key: 01KHRTAMA7HRJFYWZB0XZCTAGN:1.7714331863112757E9:874ef56b\n", "\n", "Corrected context:\n", "{role=user, content=What is the population of Great Britain?}\n", @@ -441,7 +441,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/08_semantic_router.ipynb b/notebooks/08_semantic_router.ipynb index 69b0ca3..f11e331 100644 --- a/notebooks/08_semantic_router.ipynb +++ b/notebooks/08_semantic_router.ipynb @@ -29,7 +29,7 @@ ], "source": [ "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", + "%maven redis.clients:jedis:7.3.0\n", "%maven org.slf4j:slf4j-nop:2.0.16\n", "%maven com.microsoft.onnxruntime:onnxruntime:1.16.3\n", "%maven com.squareup.okhttp3:okhttp:4.12.0\n", @@ -162,7 +162,7 @@ ], "source": [ "// Connect to Redis\n", - "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));\n", + "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis\", 6379));\n", "\n", "// Create vectorizer using SentenceTransformersVectorizer to download and run model locally\n", "BaseVectorizer vectorizer = new SentenceTransformersVectorizer(\"sentence-transformers/all-mpnet-base-v2\");\n", @@ -549,25 +549,25 @@ "output_type": "stream", "text": [ "Found 5 references for technology route:\n", - " Reference: new tech gadgets\n", - " Reference ID: 7e4bca5853c1c329\n", - " Route: technology\n", - "\n", - " Reference: latest AI trends\n", - " Reference ID: f243fb2d073774e8\n", + " Reference: what are the latest advancements in AI?\n", + " Reference ID: 851f51cce5a9ccfb\n", " Route: technology\n", "\n", " Reference: what's trending in tech?\n", " Reference ID: 149a9c9919c58534\n", " Route: technology\n", "\n", - " Reference: what are the latest advancements in AI?\n", - " Reference ID: 851f51cce5a9ccfb\n", + " Reference: latest AI trends\n", + " Reference ID: f243fb2d073774e8\n", " Route: technology\n", "\n", " Reference: tell me about the newest gadgets\n", " Reference ID: 85cc73a1437df27c\n", " Route: technology\n", + "\n", + " Reference: new tech gadgets\n", + " Reference ID: 7e4bca5853c1c329\n", + " Route: technology\n", "\n" ] } @@ -595,8 +595,8 @@ "output_type": "stream", "text": [ "Retrieved reference by ID:\n", - " Reference: new tech gadgets\n", - " Reference ID: 7e4bca5853c1c329\n" + " Reference: what are the latest advancements in AI?\n", + " Reference ID: 851f51cce5a9ccfb\n" ] } ], @@ -739,7 +739,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/10_embeddings_cache.ipynb b/notebooks/10_embeddings_cache.ipynb index ff12991..f41b110 100644 --- a/notebooks/10_embeddings_cache.ipynb +++ b/notebooks/10_embeddings_cache.ipynb @@ -31,7 +31,7 @@ "outputs": [], "source": [ "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", + "%maven redis.clients:jedis:7.3.0\n", "%maven org.slf4j:slf4j-nop:2.0.16\n", "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", @@ -123,7 +123,7 @@ ], "source": [ "// Create Redis connection\n", - "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));\n", + "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis\", 6379));\n", "\n", "// Initialize the embeddings cache\n", "EmbeddingsCache cache = new EmbeddingsCache(\n", @@ -606,17 +606,17 @@ "output_type": "stream", "text": [ "Benchmarking without caching:\n", - "Time taken without caching: 0.079 seconds\n", - "Average time per embedding: 0.0079 seconds\n", + "Time taken without caching: 0.09 seconds\n", + "Average time per embedding: 0.009 seconds\n", "\n", "Benchmarking with caching:\n", - "Time taken with caching: 0.028 seconds\n", - "Average time per embedding: 0.0028 seconds\n", + "Time taken with caching: 0.018 seconds\n", + "Average time per embedding: 0.0018 seconds\n", "\n", "Performance comparison:\n", - "Speedup with caching: 2.82x faster\n", - "Time saved: 0.051 seconds (64.6%)\n", - "Latency reduction: 0.0051 seconds per query\n" + "Speedup with caching: 5.00x faster\n", + "Time saved: 0.072 seconds (80.0%)\n", + "Latency reduction: 0.0072 seconds per query\n" ] } ], @@ -751,7 +751,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "21.0.8+9-Ubuntu-0ubuntu124.04.1" + "version": "21.0.10+7-Ubuntu-124.04" } }, "nbformat": 4, diff --git a/notebooks/11_advanced_queries.ipynb b/notebooks/11_advanced_queries.ipynb index 45321b0..8975077 100644 --- a/notebooks/11_advanced_queries.ipynb +++ b/notebooks/11_advanced_queries.ipynb @@ -1,877 +1,1258 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "intro", - "metadata": {}, - "source": [ - "# Advanced Query Types\n", - "\n", - "In this notebook, we will explore advanced query types available in RedisVL:\n", - "\n", - "1. **`TextQuery`**: Full text search with advanced scoring\n", - "2. **`AggregateHybridQuery`**: Combines text and vector search for hybrid retrieval\n", - "3. **`MultiVectorQuery`**: Search over multiple vector fields simultaneously\n", - "\n", - "These query types are powerful tools for building sophisticated search applications that go beyond simple vector similarity search.\n", - "\n", - "Prerequisites:\n", - "- Ensure RedisVL4J is available in your Java environment.\n", - "- Have a running instance of [Redis Stack](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/) or [Redis Cloud](https://redis.io/cloud)." - ] - }, - { - "cell_type": "markdown", - "id": "setup", - "metadata": {}, - "source": [ - "## Setup and Data Preparation\n", - "\n", - "First, let's create a schema and prepare sample data that includes text fields, numeric fields, and vector fields." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dependencies", - "metadata": {}, - "outputs": [], - "source": [ - "// Load Maven dependencies\n", - "%maven redis.clients:jedis:7.0.0\n", - "%maven org.slf4j:slf4j-nop:2.0.16\n", - "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", - "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", - "\n", - "// Import RedisVL classes\n", - "import com.redis.vl.index.SearchIndex;\n", - "import com.redis.vl.schema.*;\n", - "import com.redis.vl.query.*;\n", - "\n", - "// Import Redis client\n", - "import redis.clients.jedis.UnifiedJedis;\n", - "import redis.clients.jedis.HostAndPort;\n", - "\n", - "// Import Java standard libraries\n", - "import java.util.*;\n", - "import java.nio.*;" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "helper-methods", - "metadata": {}, - "outputs": [], - "source": [ - "// Helper method to convert float arrays to byte arrays for vector fields\n", - "byte[] floatArrayToBytes(float[] vector) {\n", - " ByteBuffer buffer = ByteBuffer.allocate(vector.length * 4).order(ByteOrder.LITTLE_ENDIAN);\n", - " for (float value : vector) {\n", - " buffer.putFloat(value);\n", - " }\n", - " return buffer.array();\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "sample-data-intro", - "metadata": {}, - "source": [ - "## Sample Data with Text Descriptions and Vectors\n", - "\n", - "We'll create sample product data with text descriptions, categories, prices, and two types of embeddings:\n", - "- **text_embedding**: 3-dimensional vectors representing product text\n", - "- **image_embedding**: 2-dimensional vectors representing product images" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "sample-data", - "metadata": {}, - "outputs": [], - "source": [ - "// Sample data with text descriptions, categories, and vectors (matching Python exactly)\n", - "List> data = Arrays.asList(\n", - " Map.of(\n", - " \"product_id\", \"prod_1\",\n", - " \"brief_description\", \"comfortable running shoes for athletes\",\n", - " \"full_description\", \"Engineered with a dual-layer EVA foam midsole and FlexWeave breathable mesh upper, these running shoes deliver responsive cushioning for long-distance runs. The anatomical footbed adapts to your stride while the carbon rubber outsole provides superior traction on varied terrain.\",\n", - " \"category\", \"footwear\",\n", - " \"price\", \"89.99\",\n", - " \"rating\", \"4.5\",\n", - " \"text_embedding\", floatArrayToBytes(new float[]{0.1f, 0.2f, 0.1f}),\n", - " \"image_embedding\", floatArrayToBytes(new float[]{0.8f, 0.1f})\n", - " ),\n", - " Map.of(\n", - " \"product_id\", \"prod_2\",\n", - " \"brief_description\", \"lightweight running jacket with water resistance\",\n", - " \"full_description\", \"Stay protected with this ultralight 2.5-layer DWR-coated shell featuring laser-cut ventilation zones and reflective piping for low-light visibility. Packs into its own chest pocket and weighs just 4.2 oz, making it ideal for unpredictable weather conditions.\",\n", - " \"category\", \"outerwear\",\n", - " \"price\", \"129.99\",\n", - " \"rating\", \"4.8\",\n", - " \"text_embedding\", floatArrayToBytes(new float[]{0.2f, 0.3f, 0.2f}),\n", - " \"image_embedding\", floatArrayToBytes(new float[]{0.7f, 0.2f})\n", - " ),\n", - " Map.of(\n", - " \"product_id\", \"prod_3\",\n", - " \"brief_description\", \"professional tennis racket for competitive players\",\n", - " \"full_description\", \"Competition-grade racket featuring a 98 sq in head size, 16x19 string pattern, and aerospace-grade graphite frame that delivers explosive power with pinpoint control. Tournament-approved specs include 315g weight and 68 RA stiffness rating for advanced baseline play.\",\n", - " \"category\", \"equipment\",\n", - " \"price\", \"199.99\",\n", - " \"rating\", \"4.9\",\n", - " \"text_embedding\", floatArrayToBytes(new float[]{0.9f, 0.1f, 0.05f}),\n", - " \"image_embedding\", floatArrayToBytes(new float[]{0.1f, 0.9f})\n", - " ),\n", - " Map.of(\n", - " \"product_id\", \"prod_4\",\n", - " \"brief_description\", \"yoga mat with extra cushioning for comfort\",\n", - " \"full_description\", \"Premium 8mm thick TPE yoga mat with dual-texture surface - smooth side for hot yoga flow and textured side for maximum grip during balancing poses. Closed-cell technology prevents moisture absorption while alignment markers guide proper positioning in asanas.\",\n", - " \"category\", \"accessories\",\n", - " \"price\", \"39.99\",\n", - " \"rating\", \"4.3\",\n", - " \"text_embedding\", floatArrayToBytes(new float[]{0.15f, 0.25f, 0.15f}),\n", - " \"image_embedding\", floatArrayToBytes(new float[]{0.5f, 0.5f})\n", - " ),\n", - " Map.of(\n", - " \"product_id\", \"prod_5\",\n", - " \"brief_description\", \"basketball shoes with excellent ankle support\",\n", - " \"full_description\", \"High-top basketball sneakers with Zoom Air units in forefoot and heel, reinforced lateral sidewalls for explosive cuts, and herringbone traction pattern optimized for hardwood courts. The internal bootie construction and extended ankle collar provide lockdown support during aggressive drives.\",\n", - " \"category\", \"footwear\",\n", - " \"price\", \"139.99\",\n", - " \"rating\", \"4.7\",\n", - " \"text_embedding\", floatArrayToBytes(new float[]{0.12f, 0.18f, 0.12f}),\n", - " \"image_embedding\", floatArrayToBytes(new float[]{0.75f, 0.15f})\n", - " ),\n", - " Map.of(\n", - " \"product_id\", \"prod_6\",\n", - " \"brief_description\", \"swimming goggles with anti-fog coating\",\n", - " \"full_description\", \"Low-profile competition goggles with curved polycarbonate lenses offering 180-degree peripheral vision and UV protection. Hydrophobic anti-fog coating lasts 10x longer than standard treatments, while the split silicone strap and interchangeable nose bridges ensure a watertight, custom fit.\",\n", - " \"category\", \"accessories\",\n", - " \"price\", \"24.99\",\n", - " \"rating\", \"4.4\",\n", - " \"text_embedding\", floatArrayToBytes(new float[]{0.3f, 0.1f, 0.2f}),\n", - " \"image_embedding\", floatArrayToBytes(new float[]{0.2f, 0.8f})\n", - " )\n", - ");\n", - "\n", - "System.out.println(\"Created \" + data.size() + \" sample products\");" - ] - }, - { - "cell_type": "markdown", - "id": "schema-intro", - "metadata": {}, - "source": [ - "## Define the Schema\n", - "\n", - "Our schema includes:\n", - "- **Tag fields**: `product_id`, `category`\n", - "- **Text fields**: `brief_description` and `full_description` for full-text search\n", - "- **Numeric fields**: `price`, `rating`\n", - "- **Vector fields**: `text_embedding` (3 dimensions) and `image_embedding` (2 dimensions) for semantic search" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "schema", - "metadata": {}, - "outputs": [], - "source": [ - "IndexSchema schema = IndexSchema.builder()\n", - " .name(\"advanced_queries\")\n", - " .prefix(\"products\")\n", - " .storageType(IndexSchema.StorageType.HASH)\n", - " .field(TagField.builder().name(\"product_id\").build())\n", - " .field(TagField.builder().name(\"category\").build())\n", - " .field(TextField.builder().name(\"brief_description\").build())\n", - " .field(TextField.builder().name(\"full_description\").build())\n", - " .field(NumericField.builder().name(\"price\").build())\n", - " .field(NumericField.builder().name(\"rating\").build())\n", - " .field(\n", - " VectorField.builder()\n", - " .name(\"text_embedding\")\n", - " .dimensions(3)\n", - " .distanceMetric(VectorField.DistanceMetric.COSINE)\n", - " .build())\n", - " .field(\n", - " VectorField.builder()\n", - " .name(\"image_embedding\")\n", - " .dimensions(2)\n", - " .distanceMetric(VectorField.DistanceMetric.COSINE)\n", - " .build())\n", - " .build();\n", - "\n", - "System.out.println(\"Schema created\");" - ] - }, - { - "cell_type": "markdown", - "id": "create-index", - "metadata": {}, - "source": [ - "## Create Index and Load Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "load-data", - "metadata": {}, - "outputs": [], - "source": [ - "// Connect to Redis\n", - "UnifiedJedis client = new UnifiedJedis(new HostAndPort(\"localhost\", 6379));\n", - "\n", - "// Create the search index\n", - "SearchIndex index = new SearchIndex(schema, client);\n", - "index.create(true);\n", - "\n", - "// Load data\n", - "List keys = new ArrayList<>();\n", - "for (Map product : data) {\n", - " String key = \"products:\" + product.get(\"product_id\");\n", - " keys.add(key);\n", - " Map fields = new HashMap<>();\n", - " product.forEach((k, v) -> {\n", - " if (v instanceof byte[]) {\n", - " fields.put(k, new String((byte[]) v, java.nio.charset.StandardCharsets.ISO_8859_1));\n", - " } else {\n", - " fields.put(k, String.valueOf(v));\n", - " }\n", - " });\n", - " client.hset(key, fields);\n", - "}\n", - "\n", - "System.out.println(\"Loaded \" + keys.size() + \" products into the index\");" - ] - }, - { - "cell_type": "markdown", - "id": "text-query-intro", - "metadata": {}, - "source": [ - "## 1. TextQuery: Full Text Search\n", - "\n", - "The `TextQuery` class enables full text search with advanced scoring algorithms. It's ideal for keyword-based search with relevance ranking.\n", - "\n", - "### Basic Text Search\n", - "\n", - "Let's search for products related to \"running shoes\":" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "text-query-basic", - "metadata": {}, - "outputs": [], - "source": [ - "TextQuery textQuery = TextQuery.builder()\n", - " .text(\"running shoes\")\n", - " .textField(\"brief_description\")\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", - " .numResults(5)\n", - " .build();\n", - "\n", - "List> results = index.query(textQuery);\n", - "\n", - "System.out.println(\"Found \" + results.size() + \" results:\");\n", - "for (Map result : results) {\n", - " System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "text-query-scorers", - "metadata": {}, - "source": [ - "### Text Search with Different Scoring Algorithms\n", - "\n", - "RedisVL supports multiple text scoring algorithms. Let's compare `BM25STD` and `TFIDF`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "text-query-bm25", - "metadata": {}, - "outputs": [], - "source": [ - "// BM25 standard scoring (default)\n", - "TextQuery bm25Query = TextQuery.builder()\n", - " .text(\"comfortable shoes\")\n", - " .textField(\"brief_description\")\n", - " .scorer(\"BM25STD\")\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"price\"))\n", - " .numResults(3)\n", - " .build();\n", - "\n", - "System.out.println(\"Results with BM25 scoring:\");\n", - "List> bm25Results = index.query(bm25Query);\n", - "for (Map result : bm25Results) {\n", - " System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "text-query-tfidf", - "metadata": {}, - "outputs": [], - "source": [ - "// TFIDF scoring\n", - "TextQuery tfidfQuery = TextQuery.builder()\n", - " .text(\"comfortable shoes\")\n", - " .textField(\"brief_description\")\n", - " .scorer(\"TFIDF\")\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"price\"))\n", - " .numResults(3)\n", - " .build();\n", - "\n", - "System.out.println(\"Results with TFIDF scoring:\");\n", - "List> tfidfResults = index.query(tfidfQuery);\n", - "for (Map result : tfidfResults) {\n", - " System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "text-query-filters", - "metadata": {}, - "source": [ - "### Text Search with Filters\n", - "\n", - "Combine text search with filters to narrow results:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "text-query-filter-tag", - "metadata": {}, - "outputs": [], - "source": [ - "// Search for \"shoes\" only in the footwear category\n", - "TextQuery filteredTextQuery = TextQuery.builder()\n", - " .text(\"shoes\")\n", - " .textField(\"brief_description\")\n", - " .filterExpression(Filter.tag(\"category\", \"footwear\"))\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", - " .numResults(5)\n", - " .build();\n", - "\n", - "List> filteredResults = index.query(filteredTextQuery);\n", - "System.out.println(\"Filtered results (footwear category):\");\n", - "for (Map result : filteredResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\"));\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "text-query-filter-numeric", - "metadata": {}, - "outputs": [], - "source": [ - "// Search for products under $100\n", - "TextQuery priceFilteredQuery = TextQuery.builder()\n", - " .text(\"comfortable\")\n", - " .textField(\"brief_description\")\n", - " .filterExpression(Filter.numeric(\"price\").lt(100))\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"price\"))\n", - " .numResults(5)\n", - " .build();\n", - "\n", - "List> priceResults = index.query(priceFilteredQuery);\n", - "System.out.println(\"Price filtered results (under $100):\");\n", - "for (Map result : priceResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\") + \" - $\" + result.get(\"price\"));\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "text-query-weights", - "metadata": {}, - "source": [ - "### Text Search with Multiple Fields and Weights\n", - "\n", - "You can search across multiple text fields with different weights to prioritize certain fields.\n", - "Here we'll prioritize the `brief_description` field and make text similarity in that field twice as important as text similarity in `full_description`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "text-query-weighted", - "metadata": {}, - "outputs": [], - "source": [ - "Map fieldWeights = new HashMap<>();\n", - "fieldWeights.put(\"brief_description\", 1.0);\n", - "fieldWeights.put(\"full_description\", 0.5);\n", - "\n", - "TextQuery weightedQuery = TextQuery.builder()\n", - " .text(\"shoes\")\n", - " .textFieldWeights(fieldWeights)\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", - " .numResults(3)\n", - " .build();\n", - "\n", - "List> weightedResults = index.query(weightedQuery);\n", - "System.out.println(\"Weighted field search results:\");\n", - "for (Map result : weightedResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\"));\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "hybrid-query-intro", - "metadata": {}, - "source": [ - "## 2. AggregateHybridQuery: Combining Text and Vector Search\n", - "\n", - "The `AggregateHybridQuery` combines text search and vector similarity to provide the best of both worlds:\n", - "- **Text search**: Finds exact keyword matches\n", - "- **Vector search**: Captures semantic similarity\n", - "\n", - "Results are scored using a weighted combination:\n", - "\n", - "```\n", - "hybrid_score = (alpha) * vector_score + (1 - alpha) * text_score\n", - "```\n", - "\n", - "Where `alpha` controls the balance between vector and text search (default: 0.7)." - ] - }, - { - "cell_type": "markdown", - "id": "hybrid-query-basic-intro", - "metadata": {}, - "source": [ - "### Basic Aggregate Hybrid Query\n", - "\n", - "Let's search for \"running shoes\" with both text and semantic search:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "hybrid-query-basic", - "metadata": {}, - "outputs": [], - "source": [ - "AggregateHybridQuery hybridQuery = AggregateHybridQuery.builder()\n", - " .text(\"running shoes\")\n", - " .textFieldName(\"brief_description\")\n", - " .vector(new float[]{0.1f, 0.2f, 0.1f})\n", - " .vectorFieldName(\"text_embedding\")\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", - " .numResults(5)\n", - " .build();\n", - "\n", - "List> hybridResults = index.query(hybridQuery);\n", - "\n", - "System.out.println(\"Hybrid search results:\");\n", - "for (Map result : hybridResults) {\n", - " System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n", - "}" - ] - }, + "cells": [ + { + "cell_type": "markdown", + "id": "intro", + "metadata": {}, + "source": [ + "# Advanced Query Types\n", + "\n", + "In this notebook, we will explore advanced query types available in RedisVL:\n", + "\n", + "1. **`TextQuery`**: Full text search with advanced scoring\n", + "2. **`AggregateHybridQuery` and `HybridQuery`**: Combines text and vector search for hybrid retrieval\n", + "3. **`MultiVectorQuery`**: Search over multiple vector fields simultaneously\n", + "\n", + "These query types are powerful tools for building sophisticated search applications that go beyond simple vector similarity search.\n", + "\n", + "Prerequisites:\n", + "- Ensure RedisVL4J is available in your Java environment.\n", + "- Have a running instance of [Redis](https://redis.io) or [Redis Cloud](https://redis.io/cloud).\n", + "- For `HybridQuery`, we will need Redis >= 8.4.0 and Jedis >= 7.3.0." + ] + }, + { + "cell_type": "markdown", + "id": "setup", + "metadata": {}, + "source": [ + "## Setup and Data Preparation\n", + "\n", + "First, let's create a schema and prepare sample data that includes text fields, numeric fields, and vector fields." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dependencies", + "metadata": {}, + "outputs": [], + "source": [ + "// Load Maven dependencies\n", + "%maven redis.clients:jedis:7.3.0\n", + "%maven org.slf4j:slf4j-nop:2.0.16\n", + "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", + "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", + "\n", + "// Import RedisVL classes\n", + "import com.redis.vl.index.SearchIndex;\n", + "import com.redis.vl.schema.*;\n", + "import com.redis.vl.query.*;\n", + "\n", + "// Import Redis client\n", + "import redis.clients.jedis.UnifiedJedis;\n", + "import redis.clients.jedis.HostAndPort;\n", + "\n", + "// Import Java standard libraries\n", + "import java.util.*;\n", + "import java.nio.*;" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "helper-methods", + "metadata": {}, + "outputs": [], + "source": [ + "// Helper method to convert float arrays to byte arrays for vector fields\n", + "byte[] floatArrayToBytes(float[] vector) {\n", + " ByteBuffer buffer = ByteBuffer.allocate(vector.length * 4).order(ByteOrder.LITTLE_ENDIAN);\n", + " for (float value : vector) {\n", + " buffer.putFloat(value);\n", + " }\n", + " return buffer.array();\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "sample-data-intro", + "metadata": {}, + "source": [ + "## Sample Data with Text Descriptions and Vectors\n", + "\n", + "We'll create sample product data with text descriptions, categories, prices, and two types of embeddings:\n", + "- **text_embedding**: 3-dimensional vectors representing product text\n", + "- **image_embedding**: 2-dimensional vectors representing product images" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "sample-data", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "hybrid-query-alpha", - "metadata": {}, - "source": [ - "### Adjusting the Alpha Parameter\n", - "\n", - "The `alpha` parameter controls the weight between vector and text search:\n", - "- `alpha=1.0`: Pure vector search\n", - "- `alpha=0.0`: Pure text search\n", - "- `alpha=0.7` (default): 70% vector, 30% text" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Created 6 sample products\n" + ] + } + ], + "source": [ + "// Sample data with text descriptions, categories, and vectors (matching Python exactly)\n", + "List> data = Arrays.asList(\n", + " Map.of(\n", + " \"product_id\", \"prod_1\",\n", + " \"brief_description\", \"comfortable running shoes for athletes\",\n", + " \"full_description\", \"Engineered with a dual-layer EVA foam midsole and FlexWeave breathable mesh upper, these running shoes deliver responsive cushioning for long-distance runs. The anatomical footbed adapts to your stride while the carbon rubber outsole provides superior traction on varied terrain.\",\n", + " \"category\", \"footwear\",\n", + " \"price\", \"89.99\",\n", + " \"rating\", \"4.5\",\n", + " \"text_embedding\", floatArrayToBytes(new float[]{0.1f, 0.2f, 0.1f}),\n", + " \"image_embedding\", floatArrayToBytes(new float[]{0.8f, 0.1f})\n", + " ),\n", + " Map.of(\n", + " \"product_id\", \"prod_2\",\n", + " \"brief_description\", \"lightweight running jacket with water resistance\",\n", + " \"full_description\", \"Stay protected with this ultralight 2.5-layer DWR-coated shell featuring laser-cut ventilation zones and reflective piping for low-light visibility. Packs into its own chest pocket and weighs just 4.2 oz, making it ideal for unpredictable weather conditions.\",\n", + " \"category\", \"outerwear\",\n", + " \"price\", \"129.99\",\n", + " \"rating\", \"4.8\",\n", + " \"text_embedding\", floatArrayToBytes(new float[]{0.2f, 0.3f, 0.2f}),\n", + " \"image_embedding\", floatArrayToBytes(new float[]{0.7f, 0.2f})\n", + " ),\n", + " Map.of(\n", + " \"product_id\", \"prod_3\",\n", + " \"brief_description\", \"professional tennis racket for competitive players\",\n", + " \"full_description\", \"Competition-grade racket featuring a 98 sq in head size, 16x19 string pattern, and aerospace-grade graphite frame that delivers explosive power with pinpoint control. Tournament-approved specs include 315g weight and 68 RA stiffness rating for advanced baseline play.\",\n", + " \"category\", \"equipment\",\n", + " \"price\", \"199.99\",\n", + " \"rating\", \"4.9\",\n", + " \"text_embedding\", floatArrayToBytes(new float[]{0.9f, 0.1f, 0.05f}),\n", + " \"image_embedding\", floatArrayToBytes(new float[]{0.1f, 0.9f})\n", + " ),\n", + " Map.of(\n", + " \"product_id\", \"prod_4\",\n", + " \"brief_description\", \"yoga mat with extra cushioning for comfort\",\n", + " \"full_description\", \"Premium 8mm thick TPE yoga mat with dual-texture surface - smooth side for hot yoga flow and textured side for maximum grip during balancing poses. Closed-cell technology prevents moisture absorption while alignment markers guide proper positioning in asanas.\",\n", + " \"category\", \"accessories\",\n", + " \"price\", \"39.99\",\n", + " \"rating\", \"4.3\",\n", + " \"text_embedding\", floatArrayToBytes(new float[]{0.15f, 0.25f, 0.15f}),\n", + " \"image_embedding\", floatArrayToBytes(new float[]{0.5f, 0.5f})\n", + " ),\n", + " Map.of(\n", + " \"product_id\", \"prod_5\",\n", + " \"brief_description\", \"basketball shoes with excellent ankle support\",\n", + " \"full_description\", \"High-top basketball sneakers with Zoom Air units in forefoot and heel, reinforced lateral sidewalls for explosive cuts, and herringbone traction pattern optimized for hardwood courts. The internal bootie construction and extended ankle collar provide lockdown support during aggressive drives.\",\n", + " \"category\", \"footwear\",\n", + " \"price\", \"139.99\",\n", + " \"rating\", \"4.7\",\n", + " \"text_embedding\", floatArrayToBytes(new float[]{0.12f, 0.18f, 0.12f}),\n", + " \"image_embedding\", floatArrayToBytes(new float[]{0.75f, 0.15f})\n", + " ),\n", + " Map.of(\n", + " \"product_id\", \"prod_6\",\n", + " \"brief_description\", \"swimming goggles with anti-fog coating\",\n", + " \"full_description\", \"Low-profile competition goggles with curved polycarbonate lenses offering 180-degree peripheral vision and UV protection. Hydrophobic anti-fog coating lasts 10x longer than standard treatments, while the split silicone strap and interchangeable nose bridges ensure a watertight, custom fit.\",\n", + " \"category\", \"accessories\",\n", + " \"price\", \"24.99\",\n", + " \"rating\", \"4.4\",\n", + " \"text_embedding\", floatArrayToBytes(new float[]{0.3f, 0.1f, 0.2f}),\n", + " \"image_embedding\", floatArrayToBytes(new float[]{0.2f, 0.8f})\n", + " )\n", + ");\n", + "\n", + "System.out.println(\"Created \" + data.size() + \" sample products\");" + ] + }, + { + "cell_type": "markdown", + "id": "schema-intro", + "metadata": {}, + "source": [ + "## Define the Schema\n", + "\n", + "Our schema includes:\n", + "- **Tag fields**: `product_id`, `category`\n", + "- **Text fields**: `brief_description` and `full_description` for full-text search\n", + "- **Numeric fields**: `price`, `rating`\n", + "- **Vector fields**: `text_embedding` (3 dimensions) and `image_embedding` (2 dimensions) for semantic search" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "schema", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "hybrid-query-alpha-code", - "metadata": {}, - "outputs": [], - "source": [ - "// More emphasis on vector search (alpha=0.9)\n", - "AggregateHybridQuery vectorHeavyQuery = AggregateHybridQuery.builder()\n", - " .text(\"comfortable\")\n", - " .textFieldName(\"brief_description\")\n", - " .vector(new float[]{0.15f, 0.25f, 0.15f})\n", - " .vectorFieldName(\"text_embedding\")\n", - " .alpha(0.9f) // 90% vector, 10% text\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", - " .numResults(3)\n", - " .build();\n", - "\n", - "System.out.println(\"Results with alpha=0.9 (vector-heavy):\");\n", - "List> vectorHeavyResults = index.query(vectorHeavyQuery);\n", - "for (Map result : vectorHeavyResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\"));\n", - "}" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "SLF4J: Failed to load class \"org.slf4j.impl.StaticLoggerBinder\".\n", + "SLF4J: Defaulting to no-operation (NOP) logger implementation\n", + "SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.\n" + ] }, { - "cell_type": "markdown", - "id": "hybrid-query-filter", - "metadata": {}, - "source": [ - "### Aggregate Hybrid Query with Filters\n", - "\n", - "You can also combine hybrid search with filters:" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Schema created\n" + ] + } + ], + "source": [ + "IndexSchema schema = IndexSchema.builder()\n", + " .name(\"advanced_queries\")\n", + " .prefix(\"products\")\n", + " .storageType(IndexSchema.StorageType.HASH)\n", + " .field(TagField.builder().name(\"product_id\").build())\n", + " .field(TagField.builder().name(\"category\").build())\n", + " .field(TextField.builder().name(\"brief_description\").build())\n", + " .field(TextField.builder().name(\"full_description\").build())\n", + " .field(NumericField.builder().name(\"price\").build())\n", + " .field(NumericField.builder().name(\"rating\").build())\n", + " .field(\n", + " VectorField.builder()\n", + " .name(\"text_embedding\")\n", + " .dimensions(3)\n", + " .distanceMetric(VectorField.DistanceMetric.COSINE)\n", + " .build())\n", + " .field(\n", + " VectorField.builder()\n", + " .name(\"image_embedding\")\n", + " .dimensions(2)\n", + " .distanceMetric(VectorField.DistanceMetric.COSINE)\n", + " .build())\n", + " .build();\n", + "\n", + "System.out.println(\"Schema created\");" + ] + }, + { + "cell_type": "markdown", + "id": "create-index", + "metadata": {}, + "source": [ + "## Create Index and Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "load-data", + "metadata": {}, + "outputs": [], + "source": "// Connect to Redis\nUnifiedJedis client = new UnifiedJedis(new HostAndPort(\"redis\", 6379));\n\n// Create the search index\nSearchIndex index = new SearchIndex(schema, client);\nindex.create(true);\n\n// Load data\nList keys = new ArrayList<>();\nfor (Map product : data) {\n String key = \"products:\" + product.get(\"product_id\");\n keys.add(key);\n Map fields = new HashMap<>();\n product.forEach((k, v) -> {\n if (v instanceof byte[]) {\n fields.put(k, new String((byte[]) v, java.nio.charset.StandardCharsets.ISO_8859_1));\n } else {\n fields.put(k, String.valueOf(v));\n }\n });\n client.hset(key, fields);\n}\n\nSystem.out.println(\"Loaded \" + keys.size() + \" products into the index\");" + }, + { + "cell_type": "markdown", + "id": "text-query-intro", + "metadata": {}, + "source": [ + "## 1. TextQuery: Full Text Search\n", + "\n", + "The `TextQuery` class enables full text search with advanced scoring algorithms. It's ideal for keyword-based search with relevance ranking.\n", + "\n", + "### Basic Text Search\n", + "\n", + "Let's search for products related to \"running shoes\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "text-query-basic", + "metadata": {}, + "outputs": [], + "source": "TextQuery textQuery = TextQuery.builder()\n .text(\"running shoes\")\n .textField(\"brief_description\")\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n .numResults(5)\n .build();\n\nList> results = index.query(textQuery);\n\nSystem.out.println(\"Found \" + results.size() + \" results:\");\nfor (Map result : results) {\n System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n}" + }, + { + "cell_type": "markdown", + "id": "text-query-scorers", + "metadata": {}, + "source": [ + "### Text Search with Different Scoring Algorithms\n", + "\n", + "RedisVL supports multiple text scoring algorithms. Let's compare `BM25STD` and `TFIDF`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "text-query-bm25", + "metadata": {}, + "outputs": [], + "source": "// BM25 standard scoring (default)\nTextQuery bm25Query = TextQuery.builder()\n .text(\"comfortable shoes\")\n .textField(\"brief_description\")\n .scorer(\"BM25STD\")\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"price\"))\n .numResults(3)\n .build();\n\nSystem.out.println(\"Results with BM25 scoring:\");\nList> bm25Results = index.query(bm25Query);\nfor (Map result : bm25Results) {\n System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n}" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "text-query-tfidf", + "metadata": {}, + "outputs": [], + "source": "// TFIDF scoring\nTextQuery tfidfQuery = TextQuery.builder()\n .text(\"comfortable shoes\")\n .textField(\"brief_description\")\n .scorer(\"TFIDF\")\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"price\"))\n .numResults(3)\n .build();\n\nSystem.out.println(\"Results with TFIDF scoring:\");\nList> tfidfResults = index.query(tfidfQuery);\nfor (Map result : tfidfResults) {\n System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n}" + }, + { + "cell_type": "markdown", + "id": "text-query-filters", + "metadata": {}, + "source": [ + "### Text Search with Filters\n", + "\n", + "Combine text search with filters to narrow results:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "text-query-filter-tag", + "metadata": {}, + "outputs": [], + "source": "// Search for \"shoes\" only in the footwear category\nTextQuery filteredTextQuery = TextQuery.builder()\n .text(\"shoes\")\n .textField(\"brief_description\")\n .filterExpression(Filter.tag(\"category\", \"footwear\"))\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n .numResults(5)\n .build();\n\nList> filteredResults = index.query(filteredTextQuery);\nSystem.out.println(\"Filtered results (footwear category):\");\nfor (Map result : filteredResults) {\n System.out.println(\" \" + result.get(\"brief_description\"));\n}" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "text-query-filter-numeric", + "metadata": {}, + "outputs": [], + "source": "// Search for products under $100\nTextQuery priceFilteredQuery = TextQuery.builder()\n .text(\"comfortable\")\n .textField(\"brief_description\")\n .filterExpression(Filter.numeric(\"price\").lt(100))\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"price\"))\n .numResults(5)\n .build();\n\nList> priceResults = index.query(priceFilteredQuery);\nSystem.out.println(\"Price filtered results (under $100):\");\nfor (Map result : priceResults) {\n System.out.println(\" \" + result.get(\"brief_description\") + \" - $\" + result.get(\"price\"));\n}" + }, + { + "cell_type": "markdown", + "id": "text-query-weights", + "metadata": {}, + "source": [ + "### Text Search with Multiple Fields and Weights\n", + "\n", + "You can search across multiple text fields with different weights to prioritize certain fields.\n", + "Here we'll prioritize the `brief_description` field and make text similarity in that field twice as important as text similarity in `full_description`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "text-query-weighted", + "metadata": {}, + "outputs": [], + "source": "Map fieldWeights = new HashMap<>();\nfieldWeights.put(\"brief_description\", 1.0);\nfieldWeights.put(\"full_description\", 0.5);\n\nTextQuery weightedQuery = TextQuery.builder()\n .text(\"shoes\")\n .textFieldWeights(fieldWeights)\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n .numResults(3)\n .build();\n\nList> weightedResults = index.query(weightedQuery);\nSystem.out.println(\"Weighted field search results:\");\nfor (Map result : weightedResults) {\n System.out.println(\" \" + result.get(\"brief_description\"));\n}" + }, + { + "cell_type": "markdown", + "id": "hybrid-query-intro", + "metadata": {}, + "source": [ + "## 2. Hybrid Queries: Combining Text and Vector Search\n", + "\n", + "Hybrid queries combine text search and vector similarity to provide the best of both worlds:\n", + "- **Text search**: Finds exact keyword matches\n", + "- **Vector search**: Captures semantic similarity\n", + "\n", + "As of Redis 8.4.0, Redis natively supports a [`FT.HYBRID`](https://redis.io/docs/latest/commands/ft.hybrid) search command. RedisVL provides a `HybridQuery` class that makes it easy to construct and execute hybrid queries. For earlier versions of Redis, RedisVL provides an `AggregateHybridQuery` class that uses Redis aggregation to achieve similar results." + ] + }, + { + "cell_type": "markdown", + "id": "hybrid-query-basic-intro", + "metadata": {}, + "source": [ + "### Basic Hybrid Query\n", + "\n", + "> NOTE: `HybridQuery` requires Redis >= 8.4.0 and Jedis >= 7.3.0.\n", + "\n", + "Let's search for \"running shoes\" with both text and semantic search, combining the results' scores using a linear combination:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "hybrid-query-basic", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "hybrid-query-filter-code", - "metadata": {}, - "outputs": [], - "source": [ - "// Hybrid search with a price filter\n", - "AggregateHybridQuery filteredHybridQuery = AggregateHybridQuery.builder()\n", - " .text(\"professional equipment\")\n", - " .textFieldName(\"brief_description\")\n", - " .vector(new float[]{0.9f, 0.1f, 0.05f})\n", - " .vectorFieldName(\"text_embedding\")\n", - " .filterExpression(Filter.numeric(\"price\").gt(100))\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", - " .numResults(5)\n", - " .build();\n", - "\n", - "List> filteredHybridResults = index.query(filteredHybridQuery);\n", - "System.out.println(\"Filtered hybrid results (price > $100):\");\n", - "for (Map result : filteredHybridResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\") + \" - $\" + result.get(\"price\"));\n", - "}" - ] - }, + "ename": "EvalException", + "evalue": "Failed to execute hybrid query: Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mjava.lang.RuntimeException: Failed to execute hybrid query: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1537)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#48:1)\u001b[0m" + ] + } + ], + "source": [ + "// Create a native hybrid query (FT.HYBRID - Redis 8.4+)\n", + "HybridQuery hybridQuery = HybridQuery.builder()\n", + " .text(\"running shoes\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.1f, 0.2f, 0.1f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", + " .numResults(5)\n", + " .yieldTextScoreAs(\"text_score\")\n", + " .yieldVsimScoreAs(\"vector_similarity\")\n", + " .combinationMethod(HybridQuery.CombinationMethod.LINEAR)\n", + " .yieldCombinedScoreAs(\"hybrid_score\")\n", + " .build();\n", + "\n", + "List> hybridResults = index.query(hybridQuery);\n", + "\n", + "System.out.println(\"HybridQuery results (native FT.HYBRID):\");\n", + "for (Map result : hybridResults) {\n", + " System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "5s8wmcpogwv", + "metadata": {}, + "source": [ + "For earlier versions of Redis, you can use `AggregateHybridQuery` instead:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "rdih4aduzml", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "hybrid-query-scorers", - "metadata": {}, - "source": [ - "### Using Different Text Scorers\n", - "\n", - "AggregateHybridQuery supports the same text scoring algorithms as TextQuery:" - ] - }, + "ename": "EvalException", + "evalue": "Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mredis.clients.jedis.exceptions.JedisConnectionException: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.connectToFirstSuccessfulHost(DefaultJedisSocketFactory.java:68)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.createSocket(DefaultJedisSocketFactory.java:94)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.connect(Connection.java:269)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:507)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:499)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection$Builder.build(Connection.java:60)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.build(ConnectionFactory.java:154)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.lambda$initAuthXManager$0(ConnectionFactory.java:144)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.makeObject(ConnectionFactory.java:177)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:557)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:299)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:231)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.util.Pool.getResource(Pool.java:38)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:55)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:13)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.providers.PooledConnectionProvider.getConnection(PooledConnectionProvider.java:83)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.executors.DefaultCommandExecutor.executeCommand(DefaultCommandExecutor.java:23)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.executeCommand(UnifiedJedis.java:317)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.ftAggregate(UnifiedJedis.java:4547)\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1552)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#50:1)\u001b[0m" + ] + } + ], + "source": [ + "// AggregateHybridQuery (FT.AGGREGATE - works on all Redis versions with search)\n", + "AggregateHybridQuery aggHybridQuery = AggregateHybridQuery.builder()\n", + " .text(\"running shoes\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.1f, 0.2f, 0.1f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", + " .numResults(5)\n", + " .build();\n", + "\n", + "List> aggResults = index.query(aggHybridQuery);\n", + "\n", + "System.out.println(\"AggregateHybridQuery results (FT.AGGREGATE):\");\n", + "for (Map result : aggResults) {\n", + " System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "hybrid-query-alpha", + "metadata": {}, + "source": [ + "### Adjusting the Alpha Parameter\n", + "\n", + "Results are scored using a weighted combination:\n", + "\n", + "```\n", + "hybrid_score = (alpha) * text_score + (1 - alpha) * vector_score\n", + "```\n", + "\n", + "Where `alpha` controls the balance between text and vector search (default: 0.3 for `HybridQuery` and 0.7 for `AggregateHybridQuery`). Note that `AggregateHybridQuery` reverses the definition of `alpha` to be the weight of the vector score.\n", + "\n", + "The `alpha` parameter controls the weight between text and vector search:\n", + "- `alpha=1.0`: Pure text search (or pure vector search for `AggregateHybridQuery`)\n", + "- `alpha=0.0`: Pure vector search (or pure text search for `AggregateHybridQuery`)\n", + "- `alpha=0.3` (default - `HybridQuery`): 30% text, 70% vector" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "hybrid-query-alpha-code", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "hybrid-query-scorer-code", - "metadata": {}, - "outputs": [], - "source": [ - "// Aggregate Hybrid query with TFIDF scorer\n", - "AggregateHybridQuery hybridTfidf = AggregateHybridQuery.builder()\n", - " .text(\"shoes support\")\n", - " .textFieldName(\"brief_description\")\n", - " .vector(new float[]{0.12f, 0.18f, 0.12f})\n", - " .vectorFieldName(\"text_embedding\")\n", - " .textScorer(\"TFIDF\")\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", - " .numResults(3)\n", - " .build();\n", - "\n", - "List> hybridTfidfResults = index.query(hybridTfidf);\n", - "System.out.println(\"Hybrid query with TFIDF scorer:\");\n", - "for (Map result : hybridTfidfResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\"));\n", - "}" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with linearAlpha=0.1 (vector-heavy):\n" + ] }, { - "cell_type": "markdown", - "id": "multi-vector-intro", - "metadata": {}, - "source": [ - "## 3. MultiVectorQuery: Multi-Vector Search\n", - "\n", - "The `MultiVectorQuery` allows you to search over multiple vector fields simultaneously. This is useful when you have different types of embeddings (e.g., text and image embeddings) and want to find results that match across multiple modalities.\n", - "\n", - "The final score is calculated as a weighted combination:\n", - "\n", - "```\n", - "combined_score = w_1 * score_1 + w_2 * score_2 + w_3 * score_3 + ...\n", - "```" - ] - }, + "ename": "EvalException", + "evalue": "Failed to execute hybrid query: Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mjava.lang.RuntimeException: Failed to execute hybrid query: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1537)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#53:1)\u001b[0m" + ] + } + ], + "source": [ + "// HybridQuery with linear alpha = 0.1 (10% text, 90% vector)\n", + "HybridQuery vectorHeavyQuery = HybridQuery.builder()\n", + " .text(\"comfortable\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.15f, 0.25f, 0.15f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .combinationMethod(HybridQuery.CombinationMethod.LINEAR)\n", + " .linearAlpha(0.1f) // 10% text, 90% vector\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", + " .numResults(3)\n", + " .yieldTextScoreAs(\"text_score\")\n", + " .yieldVsimScoreAs(\"vector_similarity\")\n", + " .yieldCombinedScoreAs(\"hybrid_score\")\n", + " .build();\n", + "\n", + "System.out.println(\"Results with linearAlpha=0.1 (vector-heavy):\");\n", + "List> vectorHeavyResults = index.query(vectorHeavyQuery);\n", + "for (Map result : vectorHeavyResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "euln4vqwvg", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "multi-vector-basic", - "metadata": {}, - "source": [ - "### Basic Multi-Vector Query\n", - "\n", - "First, we need to import the `Vector` class to define our query vectors:" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with alpha=0.9 (vector-heavy):\n" + ] }, { - "cell_type": "code", - "execution_count": null, - "id": "multi-vector-query-basic", - "metadata": {}, - "outputs": [], - "source": [ - "// Define multiple vectors for the query\n", - "com.redis.vl.query.Vector textVector = com.redis.vl.query.Vector.builder()\n", - " .vector(new float[]{0.1f, 0.2f, 0.1f})\n", - " .fieldName(\"text_embedding\")\n", - " .dtype(\"float32\")\n", - " .weight(0.7) // 70% weight for text embedding\n", - " .build();\n", - "\n", - "com.redis.vl.query.Vector imageVector = com.redis.vl.query.Vector.builder()\n", - " .vector(new float[]{0.8f, 0.1f})\n", - " .fieldName(\"image_embedding\")\n", - " .dtype(\"float32\")\n", - " .weight(0.3) // 30% weight for image embedding\n", - " .build();\n", - "\n", - "// Create a multi-vector query\n", - "MultiVectorQuery multiVectorQuery = MultiVectorQuery.builder()\n", - " .vectors(textVector, imageVector)\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\"))\n", - " .numResults(5)\n", - " .build();\n", - "\n", - "List> multiResults = index.query(multiVectorQuery);\n", - "System.out.println(\"Multi-vector search results:\");\n", - "for (Map result : multiResults) {\n", - " System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n", - "}" - ] - }, + "ename": "EvalException", + "evalue": "Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mredis.clients.jedis.exceptions.JedisConnectionException: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.connectToFirstSuccessfulHost(DefaultJedisSocketFactory.java:68)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.createSocket(DefaultJedisSocketFactory.java:94)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.connect(Connection.java:269)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:507)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:499)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection$Builder.build(Connection.java:60)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.build(ConnectionFactory.java:154)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.lambda$initAuthXManager$0(ConnectionFactory.java:144)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.makeObject(ConnectionFactory.java:177)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:557)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:299)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:231)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.util.Pool.getResource(Pool.java:38)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:55)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:13)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.providers.PooledConnectionProvider.getConnection(PooledConnectionProvider.java:83)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.executors.DefaultCommandExecutor.executeCommand(DefaultCommandExecutor.java:23)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.executeCommand(UnifiedJedis.java:317)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.ftAggregate(UnifiedJedis.java:4547)\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1552)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#56:1)\u001b[0m" + ] + } + ], + "source": [ + "// AggregateHybridQuery with alpha=0.9 (90% vector, 10% text)\n", + "AggregateHybridQuery vectorHeavyAggQuery = AggregateHybridQuery.builder()\n", + " .text(\"comfortable\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.15f, 0.25f, 0.15f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .alpha(0.9f) // 90% vector, 10% text\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", + " .numResults(3)\n", + " .build();\n", + "\n", + "System.out.println(\"Results with alpha=0.9 (vector-heavy):\");\n", + "List> vectorHeavyAggResults = index.query(vectorHeavyAggQuery);\n", + "for (Map result : vectorHeavyAggResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "8oy9pi3cqq2", + "metadata": {}, + "source": [ + "### Reciprocal Rank Fusion (RRF)\n", + "\n", + "In addition to combining scores using a linear combination, `HybridQuery` also supports reciprocal rank fusion (RRF) for combining scores. This method is useful when you want to combine scores giving more weight to the top results from each query.\n", + "\n", + "`HybridQuery` allows for the following parameters to be specified for RRF:\n", + "- `rrfWindow`: The window size to use for the RRF combination method. Limits the fusion scope.\n", + "- `rrfConstant`: The constant to use for the RRF combination method. Controls the decay of rank influence.\n", + "\n", + "`AggregateHybridQuery` does not support RRF, and only supports a linear combination of scores." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "agr8k3jy1ip", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "multi-vector-weights", - "metadata": {}, - "source": [ - "### Adjusting Vector Weights\n", - "\n", - "You can adjust the weights to prioritize different vector fields:" - ] - }, + "ename": "EvalException", + "evalue": "Failed to execute hybrid query: Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mjava.lang.RuntimeException: Failed to execute hybrid query: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1537)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#58:1)\u001b[0m" + ] + } + ], + "source": [ + "// HybridQuery with RRF combination method\n", + "HybridQuery rrfQuery = HybridQuery.builder()\n", + " .text(\"comfortable\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.15f, 0.25f, 0.15f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .combinationMethod(HybridQuery.CombinationMethod.RRF)\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", + " .numResults(3)\n", + " .yieldTextScoreAs(\"text_score\")\n", + " .yieldVsimScoreAs(\"vector_similarity\")\n", + " .yieldCombinedScoreAs(\"hybrid_score\")\n", + " .build();\n", + "\n", + "List> rrfResults = index.query(rrfQuery);\n", + "System.out.println(\"Results with RRF combination:\");\n", + "for (Map result : rrfResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "hybrid-query-filter", + "metadata": {}, + "source": [ + "### Hybrid Query with Filters\n", + "\n", + "You can also combine hybrid search with filters:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "hybrid-query-filter-code", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "multi-vector-query-weights", - "metadata": {}, - "outputs": [], - "source": [ - "// More emphasis on image similarity\n", - "com.redis.vl.query.Vector textVec = com.redis.vl.query.Vector.builder()\n", - " .vector(new float[]{0.9f, 0.1f, 0.05f})\n", - " .fieldName(\"text_embedding\")\n", - " .dtype(\"float32\")\n", - " .weight(0.2) // 20% weight\n", - " .build();\n", - "\n", - "com.redis.vl.query.Vector imageVec = com.redis.vl.query.Vector.builder()\n", - " .vector(new float[]{0.1f, 0.9f})\n", - " .fieldName(\"image_embedding\")\n", - " .dtype(\"float32\")\n", - " .weight(0.8) // 80% weight\n", - " .build();\n", - "\n", - "MultiVectorQuery imageHeavyQuery = MultiVectorQuery.builder()\n", - " .vectors(textVec, imageVec)\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\"))\n", - " .numResults(3)\n", - " .build();\n", - "\n", - "System.out.println(\"Results with emphasis on image similarity:\");\n", - "List> imageHeavyResults = index.query(imageHeavyQuery);\n", - "for (Map result : imageHeavyResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\"));\n", - "}" - ] - }, + "ename": "EvalException", + "evalue": "Failed to execute hybrid query: Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mjava.lang.RuntimeException: Failed to execute hybrid query: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1537)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#60:1)\u001b[0m" + ] + } + ], + "source": [ + "// HybridQuery with a price filter (FT.HYBRID)\n", + "HybridQuery filteredHybridQuery = HybridQuery.builder()\n", + " .text(\"professional equipment\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.9f, 0.1f, 0.05f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .filterExpression(Filter.numeric(\"price\").gt(100))\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", + " .numResults(5)\n", + " .combinationMethod(HybridQuery.CombinationMethod.LINEAR)\n", + " .yieldTextScoreAs(\"text_score\")\n", + " .yieldVsimScoreAs(\"vector_similarity\")\n", + " .yieldCombinedScoreAs(\"hybrid_score\")\n", + " .build();\n", + "\n", + "List> filteredHybridResults = index.query(filteredHybridQuery);\n", + "System.out.println(\"HybridQuery filtered results (price > $100):\");\n", + "for (Map result : filteredHybridResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\") + \" - $\" + result.get(\"price\"));\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "p2yl7z26pd", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "multi-vector-filters", - "metadata": {}, - "source": [ - "### Multi-Vector Query with Filters\n", - "\n", - "Combine multi-vector search with filters to narrow results:" - ] - }, + "ename": "EvalException", + "evalue": "Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mredis.clients.jedis.exceptions.JedisConnectionException: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.connectToFirstSuccessfulHost(DefaultJedisSocketFactory.java:68)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.createSocket(DefaultJedisSocketFactory.java:94)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.connect(Connection.java:269)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:507)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:499)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection$Builder.build(Connection.java:60)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.build(ConnectionFactory.java:154)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.lambda$initAuthXManager$0(ConnectionFactory.java:144)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.makeObject(ConnectionFactory.java:177)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:557)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:299)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:231)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.util.Pool.getResource(Pool.java:38)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:55)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:13)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.providers.PooledConnectionProvider.getConnection(PooledConnectionProvider.java:83)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.executors.DefaultCommandExecutor.executeCommand(DefaultCommandExecutor.java:23)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.executeCommand(UnifiedJedis.java:317)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.ftAggregate(UnifiedJedis.java:4547)\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1552)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#62:1)\u001b[0m" + ] + } + ], + "source": [ + "// AggregateHybridQuery with a price filter (FT.AGGREGATE)\n", + "AggregateHybridQuery filteredAggHybridQuery = AggregateHybridQuery.builder()\n", + " .text(\"professional equipment\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.9f, 0.1f, 0.05f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .filterExpression(Filter.numeric(\"price\").gt(100))\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", + " .numResults(5)\n", + " .build();\n", + "\n", + "List> filteredAggResults = index.query(filteredAggHybridQuery);\n", + "System.out.println(\"AggregateHybridQuery filtered results (price > $100):\");\n", + "for (Map result : filteredAggResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\") + \" - $\" + result.get(\"price\"));\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "hybrid-query-scorers", + "metadata": {}, + "source": [ + "### Using Different Text Scorers\n", + "\n", + "AggregateHybridQuery supports the same text scoring algorithms as TextQuery:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "hybrid-query-scorer-code", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "multi-vector-query-filter", - "metadata": {}, - "outputs": [], - "source": [ - "// Multi-vector search with category filter\n", - "com.redis.vl.query.Vector textVecFilter = com.redis.vl.query.Vector.builder()\n", - " .vector(new float[]{0.1f, 0.2f, 0.1f})\n", - " .fieldName(\"text_embedding\")\n", - " .dtype(\"float32\")\n", - " .weight(0.6)\n", - " .build();\n", - "\n", - "com.redis.vl.query.Vector imageVecFilter = com.redis.vl.query.Vector.builder()\n", - " .vector(new float[]{0.8f, 0.1f})\n", - " .fieldName(\"image_embedding\")\n", - " .dtype(\"float32\")\n", - " .weight(0.4)\n", - " .build();\n", - "\n", - "MultiVectorQuery filteredMultiQuery = MultiVectorQuery.builder()\n", - " .vectors(textVecFilter, imageVecFilter)\n", - " .filterExpression(Filter.tag(\"category\", \"footwear\"))\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", - " .numResults(5)\n", - " .build();\n", - "\n", - "List> filteredMultiResults = index.query(filteredMultiQuery);\n", - "System.out.println(\"Filtered multi-vector results (footwear only):\");\n", - "for (Map result : filteredMultiResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\") + \" (\" + result.get(\"category\") + \")\");\n", - "}" - ] - }, + "ename": "EvalException", + "evalue": "Failed to execute hybrid query: Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mjava.lang.RuntimeException: Failed to execute hybrid query: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1537)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#64:1)\u001b[0m" + ] + } + ], + "source": [ + "// HybridQuery with TFIDF scorer (FT.HYBRID)\n", + "HybridQuery hybridTfidf = HybridQuery.builder()\n", + " .text(\"shoes support\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.12f, 0.18f, 0.12f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .textScorer(\"TFIDF\")\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", + " .numResults(3)\n", + " .combinationMethod(HybridQuery.CombinationMethod.LINEAR)\n", + " .yieldTextScoreAs(\"text_score\")\n", + " .yieldVsimScoreAs(\"vector_similarity\")\n", + " .yieldCombinedScoreAs(\"hybrid_score\")\n", + " .build();\n", + "\n", + "List> hybridTfidfResults = index.query(hybridTfidf);\n", + "System.out.println(\"HybridQuery with TFIDF scorer:\");\n", + "for (Map result : hybridTfidfResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "gerdav306fh", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "comparison", - "metadata": {}, - "source": [ - "## Comparing Query Types\n", - "\n", - "Let's compare the two query types side by side:" - ] - }, + "ename": "EvalException", + "evalue": "Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mredis.clients.jedis.exceptions.JedisConnectionException: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.connectToFirstSuccessfulHost(DefaultJedisSocketFactory.java:68)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.createSocket(DefaultJedisSocketFactory.java:94)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.connect(Connection.java:269)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:507)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:499)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection$Builder.build(Connection.java:60)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.build(ConnectionFactory.java:154)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.lambda$initAuthXManager$0(ConnectionFactory.java:144)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.makeObject(ConnectionFactory.java:177)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:557)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:299)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:231)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.util.Pool.getResource(Pool.java:38)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:55)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:13)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.providers.PooledConnectionProvider.getConnection(PooledConnectionProvider.java:83)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.executors.DefaultCommandExecutor.executeCommand(DefaultCommandExecutor.java:23)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.executeCommand(UnifiedJedis.java:317)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.ftAggregate(UnifiedJedis.java:4547)\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1552)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#66:1)\u001b[0m" + ] + } + ], + "source": [ + "// AggregateHybridQuery with TFIDF scorer (FT.AGGREGATE)\n", + "AggregateHybridQuery aggTfidf = AggregateHybridQuery.builder()\n", + " .text(\"shoes support\")\n", + " .textFieldName(\"brief_description\")\n", + " .vector(new float[]{0.12f, 0.18f, 0.12f})\n", + " .vectorFieldName(\"text_embedding\")\n", + " .textScorer(\"TFIDF\")\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", + " .numResults(3)\n", + " .build();\n", + "\n", + "List> aggTfidfResults = index.query(aggTfidf);\n", + "System.out.println(\"AggregateHybridQuery with TFIDF scorer:\");\n", + "for (Map result : aggTfidfResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "a5k1r2yt0r", + "metadata": {}, + "source": [ + "### Runtime Parameters for Vector Search Tuning\n", + "\n", + "**Important:** `AggregateHybridQuery` uses FT.AGGREGATE commands which do NOT support runtime parameters.\n", + "\n", + "Runtime parameters (such as `ef_runtime` for HNSW indexes) are only supported with FT.SEARCH (and partially FT.HYBRID) commands.\n", + "\n", + "**For runtime parameter support, use `HybridQuery`, `VectorQuery`, or `VectorRangeQuery` instead:**\n", + "\n", + "- `HybridQuery`: Supports `knnEfRuntime` for HNSW indexes\n", + "- `VectorQuery`: Supports all runtime parameters\n", + "- `VectorRangeQuery`: Supports all runtime parameters\n", + "- `AggregateHybridQuery`: Does NOT support runtime parameters (uses FT.AGGREGATE)" + ] + }, + { + "cell_type": "markdown", + "id": "multi-vector-intro", + "metadata": {}, + "source": [ + "## 3. MultiVectorQuery: Multi-Vector Search\n", + "\n", + "The `MultiVectorQuery` allows you to search over multiple vector fields simultaneously. This is useful when you have different types of embeddings (e.g., text and image embeddings) and want to find results that match across multiple modalities.\n", + "\n", + "The final score is calculated as a weighted combination:\n", + "\n", + "```\n", + "combined_score = w_1 * score_1 + w_2 * score_2 + w_3 * score_3 + ...\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "multi-vector-basic", + "metadata": {}, + "source": [ + "### Basic Multi-Vector Query\n", + "\n", + "First, we need to import the `Vector` class to define our query vectors:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "multi-vector-query-basic", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "compare-queries", - "metadata": {}, - "outputs": [], - "source": [ - "// TextQuery - keyword-based search\n", - "TextQuery textQ = TextQuery.builder()\n", - " .text(\"shoes\")\n", - " .textField(\"brief_description\")\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", - " .numResults(3)\n", - " .build();\n", - "\n", - "System.out.println(\"TextQuery Results (keyword-based):\");\n", - "List> textResults = index.query(textQ);\n", - "for (Map result : textResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\"));\n", - "}\n", - "System.out.println();\n", - "\n", - "// MultiVectorQuery - searches multiple vector fields\n", - "com.redis.vl.query.Vector mvText = com.redis.vl.query.Vector.builder()\n", - " .vector(new float[]{0.1f, 0.2f, 0.1f})\n", - " .fieldName(\"text_embedding\")\n", - " .dtype(\"float32\")\n", - " .weight(0.5)\n", - " .build();\n", - "\n", - "com.redis.vl.query.Vector mvImage = com.redis.vl.query.Vector.builder()\n", - " .vector(new float[]{0.8f, 0.1f})\n", - " .fieldName(\"image_embedding\")\n", - " .dtype(\"float32\")\n", - " .weight(0.5)\n", - " .build();\n", - "\n", - "MultiVectorQuery multiQ = MultiVectorQuery.builder()\n", - " .vectors(mvText, mvImage)\n", - " .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n", - " .numResults(3)\n", - " .build();\n", - "\n", - "System.out.println(\"MultiVectorQuery Results (multiple vectors):\");\n", - "List> mvResults = index.query(multiQ);\n", - "for (Map result : mvResults) {\n", - " System.out.println(\" \" + result.get(\"brief_description\"));\n", - "}" - ] - }, + "ename": "EvalException", + "evalue": "Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mredis.clients.jedis.exceptions.JedisConnectionException: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.connectToFirstSuccessfulHost(DefaultJedisSocketFactory.java:68)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.createSocket(DefaultJedisSocketFactory.java:94)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.connect(Connection.java:269)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:507)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:499)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection$Builder.build(Connection.java:60)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.build(ConnectionFactory.java:154)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.lambda$initAuthXManager$0(ConnectionFactory.java:144)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.makeObject(ConnectionFactory.java:177)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:557)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:299)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:231)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.util.Pool.getResource(Pool.java:38)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:55)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:13)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.providers.PooledConnectionProvider.getConnection(PooledConnectionProvider.java:83)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.executors.DefaultCommandExecutor.executeCommand(DefaultCommandExecutor.java:23)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.executeCommand(UnifiedJedis.java:317)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.ftAggregate(UnifiedJedis.java:4547)\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1552)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#70:1)\u001b[0m" + ] + } + ], + "source": [ + "// Define multiple vectors for the query\n", + "com.redis.vl.query.Vector textVector = com.redis.vl.query.Vector.builder()\n", + " .vector(new float[]{0.1f, 0.2f, 0.1f})\n", + " .fieldName(\"text_embedding\")\n", + " .dtype(\"float32\")\n", + " .weight(0.7) // 70% weight for text embedding\n", + " .build();\n", + "\n", + "com.redis.vl.query.Vector imageVector = com.redis.vl.query.Vector.builder()\n", + " .vector(new float[]{0.8f, 0.1f})\n", + " .fieldName(\"image_embedding\")\n", + " .dtype(\"float32\")\n", + " .weight(0.3) // 30% weight for image embedding\n", + " .build();\n", + "\n", + "// Create a multi-vector query\n", + "MultiVectorQuery multiVectorQuery = MultiVectorQuery.builder()\n", + " .vectors(textVector, imageVector)\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\"))\n", + " .numResults(5)\n", + " .build();\n", + "\n", + "List> multiResults = index.query(multiVectorQuery);\n", + "System.out.println(\"Multi-vector search results:\");\n", + "for (Map result : multiResults) {\n", + " System.out.println(\" \" + result.get(\"product_id\") + \": \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "multi-vector-weights", + "metadata": {}, + "source": [ + "### Adjusting Vector Weights\n", + "\n", + "You can adjust the weights to prioritize different vector fields:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "multi-vector-query-weights", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "best-practices", - "metadata": {}, - "source": [ - "## Best Practices\n", - "\n", - "### When to Use Each Query Type:\n", - "\n", - "1. **`TextQuery`**:\n", - " - When you need precise keyword matching\n", - " - For traditional search engine functionality\n", - " - When text relevance scoring is important\n", - " - Example: Product search, document retrieval\n", - "\n", - "2. **`AggregateHybridQuery`**:\n", - " - When you want to combine keyword and semantic search\n", - " - For improved search quality over pure text or vector search\n", - " - When you have both text and vector representations of your data\n", - " - Example: E-commerce search, content recommendation\n", - "\n", - "3. **`MultiVectorQuery`**:\n", - " - When you have multiple types of embeddings (text, image, audio, etc.)\n", - " - For multi-modal search applications\n", - " - When you want to balance multiple semantic signals\n", - " - Example: Image-text search, cross-modal retrieval" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with emphasis on image similarity:\n" + ] }, { - "cell_type": "markdown", - "id": "cleanup", - "metadata": {}, - "source": [ - "## Cleanup" - ] - }, + "ename": "EvalException", + "evalue": "Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mredis.clients.jedis.exceptions.JedisConnectionException: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.connectToFirstSuccessfulHost(DefaultJedisSocketFactory.java:68)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.createSocket(DefaultJedisSocketFactory.java:94)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.connect(Connection.java:269)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:507)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:499)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection$Builder.build(Connection.java:60)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.build(ConnectionFactory.java:154)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.lambda$initAuthXManager$0(ConnectionFactory.java:144)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.makeObject(ConnectionFactory.java:177)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:557)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:299)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:231)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.util.Pool.getResource(Pool.java:38)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:55)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:13)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.providers.PooledConnectionProvider.getConnection(PooledConnectionProvider.java:83)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.executors.DefaultCommandExecutor.executeCommand(DefaultCommandExecutor.java:23)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.executeCommand(UnifiedJedis.java:317)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.ftAggregate(UnifiedJedis.java:4547)\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1552)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#75:1)\u001b[0m" + ] + } + ], + "source": [ + "// More emphasis on image similarity\n", + "com.redis.vl.query.Vector textVec = com.redis.vl.query.Vector.builder()\n", + " .vector(new float[]{0.9f, 0.1f, 0.05f})\n", + " .fieldName(\"text_embedding\")\n", + " .dtype(\"float32\")\n", + " .weight(0.2) // 20% weight\n", + " .build();\n", + "\n", + "com.redis.vl.query.Vector imageVec = com.redis.vl.query.Vector.builder()\n", + " .vector(new float[]{0.1f, 0.9f})\n", + " .fieldName(\"image_embedding\")\n", + " .dtype(\"float32\")\n", + " .weight(0.8) // 80% weight\n", + " .build();\n", + "\n", + "MultiVectorQuery imageHeavyQuery = MultiVectorQuery.builder()\n", + " .vectors(textVec, imageVec)\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\"))\n", + " .numResults(3)\n", + " .build();\n", + "\n", + "System.out.println(\"Results with emphasis on image similarity:\");\n", + "List> imageHeavyResults = index.query(imageHeavyQuery);\n", + "for (Map result : imageHeavyResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\"));\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "multi-vector-filters", + "metadata": {}, + "source": [ + "### Multi-Vector Query with Filters\n", + "\n", + "Combine multi-vector search with filters to narrow results:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "multi-vector-query-filter", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "cleanup-code", - "metadata": {}, - "outputs": [], - "source": [ - "// Delete the index and all data\n", - "index.delete(true);\n", - "\n", - "// Close the Redis connection\n", - "client.close();\n", - "\n", - "System.out.println(\"Cleanup complete\");" - ] + "ename": "EvalException", + "evalue": "Failed to connect to localhost:6379.", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mredis.clients.jedis.exceptions.JedisConnectionException: Failed to connect to localhost:6379.\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.connectToFirstSuccessfulHost(DefaultJedisSocketFactory.java:68)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.DefaultJedisSocketFactory.createSocket(DefaultJedisSocketFactory.java:94)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.connect(Connection.java:269)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:507)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection.initializeFromClientConfig(Connection.java:499)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.Connection$Builder.build(Connection.java:60)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.build(ConnectionFactory.java:154)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.lambda$initAuthXManager$0(ConnectionFactory.java:144)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionFactory.makeObject(ConnectionFactory.java:177)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:557)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:299)\u001b[0m", + "\u001b[1m\u001b[31m\tat org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:231)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.util.Pool.getResource(Pool.java:38)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:55)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.ConnectionPool.getResource(ConnectionPool.java:13)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.providers.PooledConnectionProvider.getConnection(PooledConnectionProvider.java:83)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.executors.DefaultCommandExecutor.executeCommand(DefaultCommandExecutor.java:23)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.executeCommand(UnifiedJedis.java:317)\u001b[0m", + "\u001b[1m\u001b[31m\tat redis.clients.jedis.UnifiedJedis.ftAggregate(UnifiedJedis.java:4547)\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.query(SearchIndex.java:1552)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#79:1)\u001b[0m" + ] } - ], - "metadata": { - "kernelspec": { - "display_name": "Java", - "language": "java", - "name": "java" - }, - "language_info": { - "name": "java", - "version": "11" + ], + "source": [ + "// Multi-vector search with category filter\n", + "com.redis.vl.query.Vector textVecFilter = com.redis.vl.query.Vector.builder()\n", + " .vector(new float[]{0.1f, 0.2f, 0.1f})\n", + " .fieldName(\"text_embedding\")\n", + " .dtype(\"float32\")\n", + " .weight(0.6)\n", + " .build();\n", + "\n", + "com.redis.vl.query.Vector imageVecFilter = com.redis.vl.query.Vector.builder()\n", + " .vector(new float[]{0.8f, 0.1f})\n", + " .fieldName(\"image_embedding\")\n", + " .dtype(\"float32\")\n", + " .weight(0.4)\n", + " .build();\n", + "\n", + "MultiVectorQuery filteredMultiQuery = MultiVectorQuery.builder()\n", + " .vectors(textVecFilter, imageVecFilter)\n", + " .filterExpression(Filter.tag(\"category\", \"footwear\"))\n", + " .returnFields(Arrays.asList(\"product_id\", \"brief_description\", \"category\", \"price\"))\n", + " .numResults(5)\n", + " .build();\n", + "\n", + "List> filteredMultiResults = index.query(filteredMultiQuery);\n", + "System.out.println(\"Filtered multi-vector results (footwear only):\");\n", + "for (Map result : filteredMultiResults) {\n", + " System.out.println(\" \" + result.get(\"brief_description\") + \" (\" + result.get(\"category\") + \")\");\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "comparison", + "metadata": {}, + "source": [ + "## Comparing Query Types\n", + "\n", + "Let's compare the three query types side by side:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "compare-queries", + "metadata": {}, + "outputs": [], + "source": "// TextQuery - keyword-based search\nTextQuery textQ = TextQuery.builder()\n .text(\"shoes\")\n .textField(\"brief_description\")\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n .numResults(3)\n .build();\n\nSystem.out.println(\"TextQuery Results (keyword-based):\");\nList> textResults = index.query(textQ);\nfor (Map result : textResults) {\n System.out.println(\" \" + result.get(\"brief_description\"));\n}\nSystem.out.println();\n\n// HybridQuery - combines text and vector search (native FT.HYBRID)\nHybridQuery hybridQ = HybridQuery.builder()\n .text(\"shoes\")\n .textFieldName(\"brief_description\")\n .vector(new float[]{0.1f, 0.2f, 0.1f})\n .vectorFieldName(\"text_embedding\")\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n .numResults(3)\n .combinationMethod(HybridQuery.CombinationMethod.LINEAR)\n .yieldTextScoreAs(\"text_score\")\n .yieldVsimScoreAs(\"vector_similarity\")\n .yieldCombinedScoreAs(\"hybrid_score\")\n .build();\n\nSystem.out.println(\"HybridQuery Results (text + vector):\");\nList> hybridQResults = index.query(hybridQ);\nfor (Map result : hybridQResults) {\n System.out.println(\" \" + result.get(\"brief_description\"));\n}\nSystem.out.println();\n\n// MultiVectorQuery - searches multiple vector fields\ncom.redis.vl.query.Vector mvText = com.redis.vl.query.Vector.builder()\n .vector(new float[]{0.1f, 0.2f, 0.1f})\n .fieldName(\"text_embedding\")\n .dtype(\"float32\")\n .weight(0.7)\n .build();\n\ncom.redis.vl.query.Vector mvImage = com.redis.vl.query.Vector.builder()\n .vector(new float[]{0.8f, 0.1f})\n .fieldName(\"image_embedding\")\n .dtype(\"float32\")\n .weight(0.3)\n .build();\n\nMultiVectorQuery multiQ = MultiVectorQuery.builder()\n .vectors(mvText, mvImage)\n .returnFields(Arrays.asList(\"product_id\", \"brief_description\"))\n .numResults(3)\n .build();\n\nSystem.out.println(\"MultiVectorQuery Results (multiple vectors):\");\nList> mvResults = index.query(multiQ);\nfor (Map result : mvResults) {\n System.out.println(\" \" + result.get(\"brief_description\"));\n}" + }, + { + "cell_type": "markdown", + "id": "best-practices", + "metadata": {}, + "source": [ + "## Best Practices\n", + "\n", + "### When to Use Each Query Type:\n", + "\n", + "1. **`TextQuery`**:\n", + " - When you need precise keyword matching\n", + " - For traditional search engine functionality\n", + " - When text relevance scoring is important\n", + " - Example: Product search, document retrieval\n", + "\n", + "2. **`HybridQuery`** (Redis 8.4+):\n", + " - When you want to combine keyword and semantic search using native FT.HYBRID\n", + " - Supports both LINEAR and RRF score combination methods\n", + " - Supports runtime parameters for HNSW indexes\n", + " - Falls back automatically to AggregateHybridQuery on older Redis versions\n", + " - Example: E-commerce search, content recommendation\n", + "\n", + "3. **`AggregateHybridQuery`** (all Redis versions with search):\n", + " - Backward-compatible hybrid search using FT.AGGREGATE\n", + " - Only supports LINEAR score combination\n", + " - Does not support runtime parameters\n", + " - Example: Hybrid search on Redis versions before 8.4\n", + "\n", + "4. **`MultiVectorQuery`**:\n", + " - When you have multiple types of embeddings (text, image, audio, etc.)\n", + " - For multi-modal search applications\n", + " - When you want to balance multiple semantic signals\n", + " - Example: Image-text search, cross-modal retrieval" + ] + }, + { + "cell_type": "markdown", + "id": "cleanup", + "metadata": {}, + "source": [ + "## Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "cleanup-code", + "metadata": {}, + "outputs": [ + { + "ename": "EvalException", + "evalue": "Index advanced_queries does not exist", + "output_type": "error", + "traceback": [ + "\u001b[1m\u001b[31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1m\u001b[31mcom.redis.vl.exceptions.RedisVLException: Index advanced_queries does not exist\u001b[0m", + "\u001b[1m\u001b[31m\tat com.redis.vl.index.SearchIndex.delete(SearchIndex.java:971)\u001b[0m", + "\u001b[1m\u001b[31m\tat .(#81:2)\u001b[0m" + ] } + ], + "source": [ + "// Delete the index and all data\n", + "index.delete(true);\n", + "\n", + "// Close the Redis connection\n", + "client.close();\n", + "\n", + "System.out.println(\"Cleanup complete\");" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Java", + "language": "java", + "name": "java" }, - "nbformat": 4, - "nbformat_minor": 5 -} + "language_info": { + "codemirror_mode": "java", + "file_extension": ".jshell", + "mimetype": "text/x-java-source", + "name": "Java", + "pygments_lexer": "java", + "version": "21.0.10+7-Ubuntu-124.04" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/notebooks/README.md b/notebooks/README.md index 55762c3..5ec5a10 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -25,7 +25,7 @@ This project uses Docker Compose to set up a complete environment for running Java-based AI applications with RedisVL. The environment includes: - A Jupyter Notebook server with Java kernel support -- Redis Stack (includes Redis and RedisInsight) +- Redis (includes built-in search, vector, and JSON capabilities) - RedisVL library built from source - Pre-installed dependencies for AI/ML workloads @@ -67,7 +67,7 @@ OPENAI_API_KEY=your_openai_api_key_here 4. Access RedisInsight (optional): - Open your browser and navigate to [http://localhost:8001](http://localhost:8001) - Connect to Redis using the following details: - - Host: redis-stack + - Host: redis - Port: 6379 - No password (unless configured) @@ -84,6 +84,7 @@ OPENAI_API_KEY=your_openai_api_key_here | [01_getting_started.ipynb](./01_getting_started.ipynb) | Introduction to RedisVL basic concepts and usage | ✅ | | [02_hybrid_queries.ipynb](./02_hybrid_queries.ipynb) | Demonstrates hybrid search capabilities combining vector and text queries | ✅ | | [05_hash_vs_json.ipynb](./05_hash_vs_json.ipynb) | Comparison of Redis Hash vs JSON storage types for vector data | ✅ | +| [11_advanced_queries.ipynb](./11_advanced_queries.ipynb) | TextQuery, HybridQuery, AggregateHybridQuery, and MultiVectorQuery | ✅ | ## Project Structure @@ -135,7 +136,7 @@ The Docker setup includes: - Includes Python environment for utilities 2. **Redis Container**: - - Uses Redis Stack image with Vector Search capabilities + - Uses Redis image with built-in search and vector capabilities - Persists data using Docker volumes - Exposes Redis on port 6379 and RedisInsight on port 8001 diff --git a/notebooks/docker-compose.yml b/notebooks/docker-compose.yml index f30e1d6..19e5f67 100644 --- a/notebooks/docker-compose.yml +++ b/notebooks/docker-compose.yml @@ -15,9 +15,9 @@ services: - ./resources:/home/jovyan/resources - ../core/build/libs:/home/jovyan/libs depends_on: - - redis-stack - redis-stack: - image: redis/redis-stack:latest + - redis + redis: + image: redis:latest ports: - "6379:6379" # Redis database port - "8001:8001" # RedisInsight port diff --git a/notebooks/jupyter/Dockerfile b/notebooks/jupyter/Dockerfile index eed2d5a..d332c49 100644 --- a/notebooks/jupyter/Dockerfile +++ b/notebooks/jupyter/Dockerfile @@ -26,6 +26,7 @@ COPY ./gradlew /home/jovyan/redisvl-src/ COPY ./gradlew.bat /home/jovyan/redisvl-src/ COPY ./build.gradle.kts /home/jovyan/redisvl-src/ COPY ./settings.gradle.kts /home/jovyan/redisvl-src/ +COPY ./gradle.properties /home/jovyan/redisvl-src/ COPY ./spotbugs-exclude.xml /home/jovyan/redisvl-src/ # Build RedisVL from source @@ -36,7 +37,7 @@ RUN chmod +x gradlew && ./gradlew clean build publishToMavenLocal -x test WORKDIR /home/jovyan/java # Copy the built JAR to a known location (excluding javadoc and sources JARs) -RUN cp /home/jovyan/redisvl-src/core/build/libs/redisvl-0.12.2.jar /home/jovyan/java/redisvl-core.jar +RUN cp $(ls /home/jovyan/redisvl-src/core/build/libs/redisvl-*.jar | grep -v javadoc | grep -v sources) /home/jovyan/java/redisvl-core.jar # Download all dependencies including Jedis and its transitive dependencies RUN mvn dependency:copy-dependencies -DoutputDirectory=./lib diff --git a/notebooks/jupyter/java/pom.xml b/notebooks/jupyter/java/pom.xml index 3e8aa00..a0cf948 100644 --- a/notebooks/jupyter/java/pom.xml +++ b/notebooks/jupyter/java/pom.xml @@ -26,7 +26,7 @@ redis.clients jedis - 5.2.0 + 7.3.0