diff --git a/snooty.toml b/snooty.toml index c0d688f0..f8b6d380 100644 --- a/snooty.toml +++ b/snooty.toml @@ -30,6 +30,7 @@ toc_landing_pages = [ "/security", "/data-formats", "/upgrade", + "/aggregation" ] sharedinclude_root = "https://raw.githubusercontent.com/10gen/docs-shared/main/" diff --git a/source/aggregation.txt b/source/aggregation.txt index 96e7f63e..527a9626 100644 --- a/source/aggregation.txt +++ b/source/aggregation.txt @@ -18,12 +18,12 @@ Transform Your Data with Aggregation :depth: 2 :class: singlecol -.. TODO: - .. toctree:: - :titlesonly: - :maxdepth: 1 +.. toctree:: + :titlesonly: + :maxdepth: 1 - /aggregation/aggregation-tutorials + Atlas Search + Atlas Vector Search Overview -------- @@ -88,7 +88,7 @@ Aggregation Example ------------------- .. note:: - + The examples in this guide use the ``restaurants`` collection in the ``sample_restaurants`` database from the :atlas:`Atlas sample datasets `. To learn how to create a free MongoDB Atlas cluster and load the sample datasets, see the :atlas:`Get Started with Atlas @@ -110,7 +110,7 @@ of New York. To do so, it uses an aggregation pipeline that contains the followi .. io-code-block:: :copyable: - .. input:: /includes/aggregation.php + .. input:: /includes/aggregation/aggregation.php :start-after: start-match-group :end-before: end-match-group :language: php @@ -146,13 +146,13 @@ from the preceding :ref:`php-aggregation-example`: .. io-code-block:: :copyable: - .. input:: /includes/aggregation.php + .. input:: /includes/aggregation/aggregation.php :start-after: start-explain :end-before: end-explain :language: php :dedent: - .. output:: + .. output:: :visible: false {"explainVersion":"2","queryPlanner":{"namespace":"sample_restaurants.restaurants", @@ -188,6 +188,15 @@ pages in the {+mdb-server+} manual: :manual:`Explain Output ` and :manual:`Query Plans `. +Atlas Search and Vector Search +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can perform full-text searches by using the Atlas Search feature. To +learn more, see the :ref:`php-atlas-search` guide. + +You can perform similarity searches on vector embeddings by using the +Atlas Vector Search feature. To learn more, see the :ref:`php-vector-search` guide. + .. TODO: Aggregation Tutorials ~~~~~~~~~~~~~~~~~~~~~ diff --git a/source/aggregation/atlas-search.txt b/source/aggregation/atlas-search.txt new file mode 100644 index 00000000..d47ac750 --- /dev/null +++ b/source/aggregation/atlas-search.txt @@ -0,0 +1,256 @@ +.. _php-atlas-search: + +============ +Atlas Search +============ + +.. facet:: + :name: genre + :values: reference + +.. meta:: + :keywords: code example, semantic, text + +.. contents:: On this page + :local: + :backlinks: none + :depth: 2 + :class: singlecol + +Overview +-------- + +In this guide, you can learn how to perform searches on your documents +by using the Atlas Search feature. The {+library-short+} allows you to +perform Atlas Search queries by using the :ref:`Aggregation Builder API +`. + +.. note:: Deployment Compatibility + + You can use the Atlas Search feature only when + you connect to MongoDB Atlas clusters. This feature is not + available for self-managed deployments. + +To learn more about Atlas Search, see the :atlas:`Atlas Search Overview +`. The Atlas Search implementation +for the {+library-short+} internally uses the ``$search`` aggregation operator +to perform queries. To learn more about this operator, see the +:atlas:`$search ` reference in +the Atlas documentation. + +.. note:: Atlas Vector Search + + To perform searches on vector embeddings in MongoDB, you can use the + Atlas Vector Search API. To learn about this feature, see + the :ref:`php-vector-search` guide. + +Atlas Search Index +~~~~~~~~~~~~~~~~~~ + +Before you can perform Atlas Search queries, you must create an Atlas +Search index on your collection. To learn more about creating this index +type, see the :ref:`php-atlas-search-index` guide. + +Search Aggregation Stage +------------------------ + +Import the following classes into your application to perform Atlas +Search queries by using the Aggregation Builder: + +.. literalinclude:: /includes/aggregation/atlas-search.php + :language: php + :dedent: + :start-after: start-imports + :end-before: end-imports + +To create a ``$search`` stage in your aggregation pipeline, perform the +following actions: + +1. Create an array to store the pipeline stages + +#. Call the ``Stage::search()`` method to create the Atlas Search stage + +#. Within the body of the ``search()`` method, use methods from the + ``Search`` builder class to construct your Search query criteria + +The following code demonstrates the template for constructing basic Atlas Search +queries: + +.. code-block:: php + + $pipeline = [ + Stage::search( + /* Atlas Search query specifications + Search::compound(...) */ + ), + ]; + +Atlas Search Query Examples +--------------------------- + +In this section, you can learn how to perform different types of Atlas +Search queries by using the Aggregation Builder. The examples in this +section use sample data from the ``sample_restaurants.restaurants`` +collection. + +Compound Query with Filter +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use the ``Search::compound()`` method to combine two or more operators +into a single query. This method takes named arguments for your clauses, +such as ``must`` and ``filter``. In each clause, use the +``Search::text()`` method to specify the strings to look for when +performing the full-text search. + +This example performs an Atlas Search query that has the following +specifications: + +- Includes a ``must`` clause to search the ``name`` field for the string + ``"kitchen"`` +- Includes a ``should`` clause to highly rank documents in which the + ``cuisine`` field includes ``"american"`` +- Includes a ``filter`` field to include only documents in which the + ``borough`` value is ``"Queens"`` in the results + +.. io-code-block:: + :copyable: true + + .. input:: /includes/aggregation/atlas-search.php + :language: php + :dedent: + :start-after: start-compound-search-query + :end-before: end-compound-search-query + + .. output:: + :language: json + :visible: false + + {"_id":...,"borough":"Queens","cuisine":"American","name":"Kitchen Door"} + {"_id":...,"borough":"Queens","cuisine":"American","name":"Cc Kitchen"} + {"_id":...,"borough":"Queens","cuisine":"American","name":"Suite Kitchen"} + // Results truncated + +Autocomplete Query +~~~~~~~~~~~~~~~~~~ + +The {+library-short+} provides the ``Search::autocomplete()`` method to run +autocomplete searches on documents in your collections. + +To learn more about this type of Atlas Search query, see the +:atlas:`autocomplete ` reference in the +Atlas documentation. + +.. note:: + + Your Atlas Search index must be configured for autocomplete queries. + To learn more, see :atlas:`How to Index Fields for Autocompletion + ` in the Atlas + documentation. + +The following code performs an Atlas Search autocomplete query for the +string ``"Lucy"`` on the ``name`` field: + +.. io-code-block:: + :copyable: true + + .. input:: /includes/aggregation/atlas-search.php + :language: php + :dedent: + :start-after: start-autocomplete-search-query + :end-before: end-autocomplete-search-query + + .. output:: + :language: json + :visible: false + + {"name":"Juicy Lucy"} + {"name":"Lucy'S Vietnamese Kitchen"} + {"name":"Lucy'S Cantina Royale"} + // Results Truncated + +You can also pass the following optional parameters to the ``autocomplete()`` +method to customize the query: + +.. list-table:: + :header-rows: 1 + + * - Optional Parameter + - Description + - Default Value + + * - ``fuzzy`` + - Enables fuzzy search and fuzzy search options + - ``false`` + + * - ``tokenOrder`` + - Specifies order in which to search for tokens + - ``'any'`` + +To learn more about these parameters, see the :atlas:`Options +` section of the +``autocomplete`` operator reference in the Atlas documentation. + +Search Options +-------------- + +You can use the ``search()`` method to perform many types of Atlas +Search queries. Depending on your desired query, you can pass the +following optional parameters to ``search()``: + +.. list-table:: + :header-rows: 1 + + * - Optional Parameter + - Type + - Description + + * - ``index`` + - ``string`` + - Provides the name of the Atlas Search index to use + + * - ``highlight`` + - ``array`` + - Specifies highlighting options for displaying search terms in their + original context + + * - ``concurrent`` + - ``bool`` + - Parallelizes search query across segments on dedicated search nodes + + * - ``count`` + - ``string`` + - Specifies the count options for retrieving a count of the results + + * - ``searchAfter`` + - ``string`` + - Specifies a reference point for returning documents starting + immediately following that point + + * - ``searchBefore`` + - ``string`` + - Specifies a reference point for returning documents starting + immediately preceding that point + + * - ``scoreDetails`` + - ``bool`` + - Specifies whether to retrieve a detailed breakdown of the score + for results + + * - ``sort`` + - ``array`` + - Specifies the fields on which to sort the results + + * - ``returnStoredSource`` + - ``bool`` + - Specifies whether to perform a full document lookup on the + backend database or return only stored source fields directly + from Atlas Search + + * - ``tracking`` + - ``array`` + - Specifies the tracking option to retrieve analytics information + on the search terms + +To learn more about these parameters, see the :atlas:`Fields +` section of the +``$search`` operator reference in the Atlas documentation. diff --git a/source/aggregation/vector-search.txt b/source/aggregation/vector-search.txt new file mode 100644 index 00000000..b9ca56dc --- /dev/null +++ b/source/aggregation/vector-search.txt @@ -0,0 +1,217 @@ +.. _php-vector-search: + +=================== +Atlas Vector Search +=================== + +.. facet:: + :name: genre + :values: reference + +.. meta:: + :keywords: code example, semantic, text, embeddings + +.. contents:: On this page + :local: + :backlinks: none + :depth: 2 + :class: singlecol + +Overview +-------- + +In this guide, you can learn how to perform searches on your documents +by using the Atlas Vector Search feature. The {+library-short+} allows you to +perform Atlas Vector Search queries by using the :ref:`Aggregation Builder API +`. + +.. note:: Deployment Compatibility + + You can use the Atlas Search feature only when + you connect to MongoDB Atlas clusters. This feature is not + available for self-managed deployments. + +To learn more about Atlas Vector Search, see the :atlas:`Atlas Vector +Search Overview `. The +Atlas Vector Search implementation for the {+library-short+} internally +uses the ``$vectorSearch`` aggregation operator to perform queries. To +learn more about this operator, see the :atlas:`$vectorSearch +` reference in the +Atlas documentation. + +.. note:: Atlas Search + + To perform advanced full-text search on your documents, you can use the + Atlas Search API. To learn about this feature, see the + :ref:`php-atlas-search` guide. + +Atlas Vector Search Index +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Before you can perform Atlas Vector Search queries, you must create an +Atlas Vector Search index on your collection. To learn more about +creating this index type, see the :ref:`php-atlas-search-index` guide. + +Vector Search Aggregation Stage +------------------------------- + +Import the following classes into your application to perform Atlas +Search queries by using the Aggregation Builder: + +.. literalinclude:: /includes/aggregation/vector-search.php + :language: php + :dedent: + :start-after: start-imports + :end-before: end-imports + +To create a ``$vectorSearch`` stage in your aggregation pipeline, perform the +following actions: + +1. Create an array to store the pipeline stages + +#. Call the ``Stage::vectorSearch()`` method to create the Atlas Vector + Search stage + +#. Within the body of the ``vectorSearch()`` method, specify the + criteria for your vector query + +The following code demonstrates the template for constructing basic Atlas Search +queries: + +.. code-block:: php + + $pipeline = [ + Stage::vectorSearch( + /* Atlas Vector Search query specifications + index: '', + path: '', ...*/ + ), + ]; + +You must pass the following parameters to the ``vectorSearch()`` method: + +.. list-table:: + :header-rows: 1 + + * - Parameter + - Type + - Description + + * - ``index`` + - ``string`` + - Name of the vector search index + + * - ``path`` + - ``array`` or ``string`` + - Field that stores vector embeddings + + * - ``queryVector`` + - ``array`` + - Vector representation of your query + + * - ``limit`` + - ``int`` + - Number of results to return + +Atlas Search Query Examples +--------------------------- + +In this section, you can learn how to perform Atlas Vector +Search queries by using the Aggregation Builder. The examples in this +section use sample data from the ``sample_mflix.embedded_movies`` +collection. + +.. note:: Query Vector Length + + For demonstrative purposes, the examples in this section use + sample query vectors that contain very few elements, compared to + the query vector you might use in a runnable application. To view an + example that contains the full-length query vector, see the + :atlas:`Atlas Vector Search Quick Start ` + and select :guilabel:`PHP` from the :guilabel:`Select your language` dropdown in the upper-right + corner of the page. + +Basic Vector Search Query +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following code performs an Atlas Vector Search query on the +``plot_embedding`` vector field: + +.. io-code-block:: + :copyable: true + + .. input:: /includes/aggregation/vector-search.php + :language: php + :dedent: + :start-after: start-basic-query + :end-before: end-basic-query + + .. output:: + :language: json + :visible: false + + {"title":"Thrill Seekers"} + {"title":"About Time"} + {"title":"Timecop"} + // Results truncated + +Vector Search Score +~~~~~~~~~~~~~~~~~~~ + +The following code performs the same query as in the preceding example, +but outputs only the ``title`` field and ``vectorSearchScore`` meta +field, which describes how well the document matches the query vector: + +.. io-code-block:: + :copyable: true + + .. input:: /includes/aggregation/vector-search.php + :language: php + :dedent: + :start-after: start-score-query + :end-before: end-score-query + + .. output:: + :language: json + :visible: false + + {"title":"Thrill Seekers","score":0.927734375} + {"title":"About Time","score":0.925750732421875} + {"title":"Timecop","score":0.9241180419921875} + // Results truncated + +Vector Search Options +--------------------- + +You can use the ``vectorSearch()`` method to perform many types of Atlas +Vector Search queries. Depending on your desired query, you can pass the +following optional parameters to ``vectorSearch()``: + +.. list-table:: + :header-rows: 1 + + * - Optional Parameter + - Type + - Description + - Default Value + + * - ``exact`` + - ``bool`` + - Specifies whether to run an Exact Nearest Neighbor (``true``) or + Approximate Nearest Neighbor (``false``) search + - ``false`` + + * - ``filter`` + - ``QueryInterface`` or ``array`` + - Specifies a pre-filter for documents to search on + - no filtering + + * - ``numCandidates`` + - ``int`` or ``null`` + - Specifies the number of nearest neighbors to use during the + search + - ``null`` + +To learn more about these parameters, see the :atlas:`Fields +` section of the +``$vectorSearch`` operator reference in the Atlas documentation. diff --git a/source/includes/aggregation.php b/source/includes/aggregation/aggregation.php similarity index 100% rename from source/includes/aggregation.php rename to source/includes/aggregation/aggregation.php diff --git a/source/includes/aggregation/atlas-search.php b/source/includes/aggregation/atlas-search.php new file mode 100644 index 00000000..80f165f2 --- /dev/null +++ b/source/includes/aggregation/atlas-search.php @@ -0,0 +1,145 @@ +sample_restaurants->restaurants; + +define('WAIT_TIMEOUT_SEC', 300); + +echo "\nCreating the Atlas Search index.\n"; +$collection->createSearchIndex( + ['mappings' => ['dynamic' => true]], +); + +// Waits for the index to be queryable +wait(function () use ($collection) { + echo '.'; + foreach ($collection->listSearchIndexes() as $index) { + if ($index->name === 'default') { + return $index->queryable; + } + } + + return false; +}); + +echo "\n"; + +// start-compound-search-query +$pipeline = [ + Stage::search( + Search::compound( + must: [ + Search::text( + query: 'kitchen', + path: 'name', + ), + ], + should: [ + Search::text( + query: 'american', + path: 'cuisine', + ), + ], + filter: [ + Search::text( + query: 'Queens', + path: 'borough', + ), + ], + ), + ), + Stage::project( + borough: 1, + cuisine: 1, + name: 1 + ), + Stage::limit(3) +]; + +$cursor = $collection->aggregate($pipeline); + +foreach ($cursor as $doc) { + echo json_encode($doc), PHP_EOL; +} +// end-compound-search-query + +echo "\nUpdating the Atlas Search index for autocomplete.\n"; +$collection->updateSearchIndex( + 'default', + ['mappings' => [ + "dynamic" => false, + "fields" => [ + "name" => [ + ["type" => "stringFacet"], + ["type" => "string"], + [ + "foldDiacritics" => false, + "maxGrams" => 7, + "minGrams" => 3, + "tokenization" => "edgeGram", + "type" => "autocomplete" + ], + ] + ] + ]] +); + +// Waits for the index to be updated. +wait(function () use ($collection) { + echo '.'; + foreach ($collection->listSearchIndexes() as $index) { + if ($index->name === 'default') { + return $index->latestDefinition->mappings->fields->name[2]['type'] === 'autocomplete' + && $index->status === 'READY'; + } + } + + return false; +}); + +echo "\n"; + +// start-autocomplete-search-query +$pipeline = [ + Stage::search( + Search::autocomplete( + query: 'Lucy', + path: 'name', + ), + ), + Stage::limit(3), + Stage::project(_id: 0, name: 1), +]; + +$cursor = $collection->aggregate($pipeline); + +foreach ($cursor as $doc) { + echo json_encode($doc), PHP_EOL; +} +// end-autocomplete-search-query + +/** + * This function waits until the callback returns true or the timeout is reached. + */ +function wait(Closure $callback): void +{ + $timeout = hrtime()[0] + WAIT_TIMEOUT_SEC; + while (hrtime()[0] < $timeout) { + if ($callback()) { + return; + } + + sleep(5); + } + + throw new RuntimeException('Time out'); +} diff --git a/source/includes/aggregation/vector-search.php b/source/includes/aggregation/vector-search.php new file mode 100644 index 00000000..c8e0c924 --- /dev/null +++ b/source/includes/aggregation/vector-search.php @@ -0,0 +1,104 @@ +sample_mflix->embedded_movies; + +define('WAIT_TIMEOUT_SEC', 300); + +echo "\nCreating the Atlas Vector Search index.\n"; +$collection->createSearchIndex( + [ + 'fields' => [[ + 'type' => 'vector', + 'path' => 'plot_embedding', + 'numDimensions' => 1536, + 'similarity' => 'dotProduct', + 'quantization' => 'scalar' + ]] + ], + ['name' => 'vector', 'type' => 'vectorSearch'], +); + +// Wait for the index to be queryable. +wait(function () use ($collection) { + echo '.'; + foreach ($collection->listSearchIndexes() as $index) { + if ($index->name === 'vector') { + return $index->queryable; + } + } + + return false; +}); + +echo "\n"; + +// start-basic-query +$pipeline = [ + Stage::vectorSearch( + index: 'vector', + path: 'plot_embedding', + queryVector: [-0.0016261312, -0.028070757, -0.011342932], + numCandidates: 150, + limit: 5, + ), + Stage::project( + _id: 0, + title: 1, + ), +]; + +$cursor = $collection->aggregate($pipeline); + +foreach ($cursor as $doc) { + echo json_encode($doc), PHP_EOL; +} +// end-basic-query + +// start-score-query +$pipeline = [ + Stage::vectorSearch( + index: 'vector', + path: 'plot_embedding', + queryVector: [-0.0016261312, -0.028070757, -0.011342932], + numCandidates: 150, + limit: 5, + ), + Stage::project( + _id: 0, + title: 1, + score: ['$meta' => 'vectorSearchScore'], + ), +]; + +$cursor = $collection->aggregate($pipeline); + +foreach ($cursor as $doc) { + echo json_encode($doc), PHP_EOL; +} +// end-score-query + +/** + * This function waits until the callback returns true or the timeout is reached. + */ +function wait(Closure $callback): void +{ + $timeout = hrtime()[0] + WAIT_TIMEOUT_SEC; + while (hrtime()[0] < $timeout) { + if ($callback()) { + return; + } + + sleep(5); + } + + throw new RuntimeException('Time out'); +} diff --git a/source/whats-new.txt b/source/whats-new.txt index 5731697e..62b87004 100644 --- a/source/whats-new.txt +++ b/source/whats-new.txt @@ -49,6 +49,12 @@ improvements, and fixes: replaced by these new methods in a future driver release, so consider changing the usages in your application. +- Adds named arguments for the ``SearchStage``, ``VectorSearchStage``, + and ``SearchMetaStage`` builder classes. This change + allows you to write Atlas Search and Atlas Vector Search queries in a + more type-safe and organized way. To learn more, see the + :ref:`php-atlas-search` and :ref:`php-vector-search` guides. + - Deprecates the ``flags`` option, used for the deprecated MMAPv1 storage engine. This option will be removed in {+library-short+} v2.0.