diff --git a/bin/htaccess.yaml b/bin/htaccess.yaml index a3e1cff76cd..ee337dd880e 100644 --- a/bin/htaccess.yaml +++ b/bin/htaccess.yaml @@ -291,4 +291,139 @@ type: 'redirect' code: 303 outputs: - 'v2.2' +--- +redirect-path: '/administration/ulimit' +url-base: '/reference/ulimit' +type: 'redirect' +code: 301 +outputs: + - 'manual' + - 'after-v2.2' +--- +redirect-path: '/applications/2d' +url-base: '/core/2d' +type: 'redirect' +code: 301 +outputs: + - 'manual' + - 'after-v2.2' +--- +redirect-path: '/applications/2dsphere' +url-base: '/core/2dsphere' +type: 'redirect' +code: 301 +outputs: + - 'manual' + - 'after-v2.2' +--- +redirect-path: '/applications/geohaystack' +url-base: '/core/geohaystack' +type: 'redirect' +code: 301 +outputs: + - 'manual' + - 'after-v2.2' +--- +redirect-path: '/applications/gridfs' +url-base: '/core/gridfs' +type: 'redirect' +code: 301 +outputs: + - 'manual' + - 'after-v2.2' +--- +redirect-path: '/reference/gridfs' +url-base: '/applications/gridfs' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/applications/2d' +url-base: '/applications/geospatial-indexes' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/applications/2dsphere' +url-base: '/applications/geospatial-indexes' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/applications/geohaystack' +url-base: '/applications/geospatial-indexes' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/reference/geospatial-queries' +url-base: '/applications/geospatial-indexes' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/tutorial/calculate-distances-using-spherical-geometry-with-2d-geospatial-indexes' +url-base: '/applications/geospatial-indexes' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/reference/privilege-documents' +url-base: '/security' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/reference/user-privileges' +url-base: '/security' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/tutorial/control-access-to-mongodb-with-kerberos-authentication' +url-base: '/security' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/tutorial/install-mongodb-enterprise' +url-base: '/administration/snmp' +type: 'redirect' +code: 303 +outputs: + - 'v2.2' +--- +redirect-path: '/core/object-id' +url-base: '/reference/object-id' +type: 'redirect' +code: 301 +outputs: + - 'manual' + - 'after-v2.2' +--- +redirect-path: '/applications/gridfs' +url-base: '/reference/gridfs' +type: 'redirect' +code: 301 +outputs: + - 'manual' + - 'after-v2.2' +--- +redirect-path: '/applications/database-references' +url-base: '/reference/database-references' +type: 'redirect' +code: 301 +outputs: + - 'manual' + - 'after-v2.2' ... + diff --git a/bin/makecloth/links.yaml b/bin/makecloth/links.yaml index cef176cf81d..3c8230c3301 100644 --- a/bin/makecloth/links.yaml +++ b/bin/makecloth/links.yaml @@ -24,4 +24,52 @@ type: 'use' link-path: '$(public-branch-output)/reference/methods' referent: 'method' type: 'use' -... \ No newline at end of file +--- +link-path: '$(public-branch-output)/core/object-id.txt' +referent: '../reference/object-id.txt' +type: 'redirect' +--- +link-path: '$(public-branch-output)/applications/gridfs.txt' +referent: '../reference/gridfs.txt' +type: 'redirect' +--- +link-path: '$(public-output)/administration/replica-set-architectures' +referent: '../core/replica-set-architectures' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/journaling' +referent: '../core/journaling' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/import-export' +referent: '../core/import-export' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/master-slave' +referent: '../core/master-slave' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/operational-segregation' +referent: '../core/operational-segregation' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/backups' +referent: '../core/backups' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/snmp' +referent: '../tutorial/monitor-with-snmp' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/security' +referent: '../core/security' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/ssl' +referent: '../tutorial/setup-ssl-configuration' +type: 'redirect' +--- +link-path: '$(public-branch-output)/administration/tag-aware-sharding' +referent: '../core/tag-aware-sharding' +type: 'redirect' +... diff --git a/source/administration.txt b/source/administration.txt index ecd3efbea67..bfb403c1949 100644 --- a/source/administration.txt +++ b/source/administration.txt @@ -11,24 +11,40 @@ documentation in other sections including: :doc:`/sharding`, .. toctree:: :maxdepth: 1 - administration/configuration - administration/operational-segregation - administration/journaling - administration/ssl - administration/snmp - administration/monitoring - administration/import-export - administration/backups - administration/ulimit - administration/production-notes + /administration/configuration + /administration/backups + /data-center-awareness + /core/journaling + /tutorial/configure-ssl + /tutorial/monitor-with-snmp + /administration/monitoring + /tutorial/manage-the-database-profiler + /core/import-export + /reference/ulimit + /administration/production-notes + /tutorial .. seealso:: - :doc:`/administration/replica-sets` - - :doc:`/administration/replica-set-architectures` + - :doc:`/core/replica-set-architectures` + - :doc:`/tutorial/configure-replica-set-secondary-sync-target` + - :doc:`/tutorial/configure-a-delayed-replica-set-member` + - :doc:`/tutorial/configure-a-hidden-replica-set-member` + - :doc:`/tutorial/configure-a-non-voting-replica-set-member` + - :doc:`/tutorial/configure-secondary-only-replica-set-member` + - :doc:`/tutorial/manage-chained-replication` + - :doc:`/tutorial/expand-replica-set` + - :doc:`/tutorial/remove-replica-set-member` + - :doc:`/tutorial/add-replica-set-arbiter` + - :doc:`/tutorial/replace-replica-set-member` + - :doc:`/tutorial/adjust-replica-set-member-priority` + - :doc:`/tutorial/resync-replica-set-member` + - :doc:`/tutorial/recover-data-following-unexpected-shutdown` + - :doc:`/tutorial/troubleshoot-replica-sets` - :doc:`/administration/sharded-clusters` - - :doc:`/administration/sharded-cluster-architectures` - - :doc:`/administration/tag-aware-sharding` + - :doc:`/core/sharded-cluster-architectures` + - :doc:`/core/tag-aware-sharding` - :doc:`/core/indexes` - :doc:`/administration/indexes` diff --git a/source/administration/backups.txt b/source/administration/backups.txt index 4c6c67c2e4c..67f55134e6a 100644 --- a/source/administration/backups.txt +++ b/source/administration/backups.txt @@ -1,150 +1,39 @@ -===================================== -Backup Strategies for MongoDB Systems -===================================== +========================================== +Backup and Recovery Operations for MongoDB +========================================== -.. default-domain:: mongodb +.. toctree:: + :hidden: -Backups are an important part of any operational disaster recovery -plan. A good backup plan must be able to capture data in -a consistent and usable state, and operators must be able to automate -both the backup and the recovery operations. Also test all components -of the backup system to ensure that you can recover backed up data as -needed. If you cannot effectively restore your database from the -backup, then your backups are useless. This document addresses -higher level backup strategies, for more information on specific -backup procedures consider the following documents: + /core/backups -- :doc:`/tutorial/backup-databases-with-filesystem-snapshots`. -- :doc:`/tutorial/backup-databases-with-binary-database-dumps`. -- :doc:`/tutorial/backup-small-sharded-cluster-with-mongodump` -- :doc:`/tutorial/backup-sharded-cluster-with-filesystem-snapshots` -- :doc:`/tutorial/backup-sharded-cluster-with-database-dumps` -- :doc:`/tutorial/schedule-backup-window-for-sharded-clusters` -- :doc:`/tutorial/restore-single-shard` -- :doc:`/tutorial/restore-sharded-cluster` +For an overview of backup strategies and considerations for all +MongoDB deployments, consider, :doc:`/core/backups`. For practical +instructions and example backup procedures consider the following +documents: -.. _backup-considerations: +Backup and Recovery Procedures +------------------------------ -Backup Considerations ---------------------- +.. toctree:: + :maxdepth: 1 -As you develop a backup strategy for your MongoDB deployment consider -the following factors: + /tutorial/backup-databases-with-binary-database-dumps + /tutorial/backup-databases-with-filesystem-snapshots + /tutorial/copy-databases-between-instances + /tutorial/recover-data-following-unexpected-shutdown -- Geography. Ensure that you move some backups away from the your - primary database infrastructure. +.. _backup-and-restore-sharded-clusters: -- System errors. Ensure that your backups can survive situations where - hardware failures or disk errors impact the integrity or - availability of your backups. +Backup and Restore Sharded Clusters +----------------------------------- -- Production constraints. Backup operations themselves sometimes require - substantial system resources. It is important to consider the time of - the backup schedule relative to peak usage and maintenance windows. +.. toctree:: + :maxdepth: 1 -- System capabilities. Some of the block-level snapshot tools require - special support on the operating-system or infrastructure level. - -- Database configuration. :term:`Replication` and :term:`sharding - ` can affect the process and impact of the backup - implementation. See :ref:`sharded-cluster-backups` and - :ref:`replica-set-backups`. - -- Actual requirements. You may be able to save time, effort, and space - by including only crucial data in the most frequent backups and - backing up less crucial data less frequently. - -.. _backup-approaches: - -Approaches to Backing Up MongoDB Systems ----------------------------------------- - -There are two main methodologies for backing up MongoDB -instances. Creating binary "dumps" of the database using -:program:`mongodump` or creating filesystem level snapshots. Both -methodologies have advantages and disadvantages: - -- binary database dumps are comparatively small, because they don't - include index content or pre-allocated free space, and :ref:`record - padding `. However, it's impossible to - capture a copy of a running system that reflects a single moment in - time using a binary dump. - -- filesystem snapshots, sometimes called block level backups, produce - larger backup sizes, but complete quickly and can reflect a single - moment in time on a running system. However, snapshot systems - require filesystem and operating system support and tools. - -The best option depends on the requirements of your deployment and -disaster recovery needs. Typically, filesystem snapshots are because -of their accuracy and simplicity; however, :program:`mongodump` is a -viable option used often to generate backups of MongoDB systems. - -The following topics provide details and procedures on the two approaches: - -- :doc:`/tutorial/backup-databases-with-filesystem-snapshots`. -- :doc:`/tutorial/backup-databases-with-binary-database-dumps`. - -In some cases, taking backups is difficult or impossible because of -large data volumes, distributed architectures, and data transmission -speeds. In these situations, increase the number of members in your -replica set or sets. - -Backup Strategies for MongoDB Deployments ------------------------------------------ - -.. _sharded-cluster-backups: - -Sharded Cluster Backup Considerations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /includes/note-shard-cluster-backup.rst - -:term:`Sharded clusters ` complicate backup operations, -as distributed systems. True point-in-time backups are only possible -when stopping all write activity from the application. To create a -precise moment-in-time snapshot of a cluster, stop all application write -activity to the database, capture a backup, and allow only write -operations to the database after the backup is complete. - -However, you can capture a backup of a cluster that **approximates** a -point-in-time backup by capturing a backup from a secondary member of -the replica sets that provide the shards in the cluster at roughly the -same moment. If you decide to use an approximate-point-in-time backup -method, ensure that your application can operate using a copy of the -data that does not reflect a single moment in time. - -The following documents describe sharded cluster related backup -procedures: - -- :doc:`/tutorial/backup-small-sharded-cluster-with-mongodump` -- :doc:`/tutorial/backup-sharded-cluster-with-filesystem-snapshots` -- :doc:`/tutorial/backup-sharded-cluster-with-database-dumps` -- :doc:`/tutorial/schedule-backup-window-for-sharded-clusters` -- :doc:`/tutorial/restore-single-shard` -- :doc:`/tutorial/restore-sharded-cluster` - -.. _replica-set-backups: - -Replica Set Backup Considerations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In most cases, backing up data stored in a :term:`replica set` is -similar to backing up data stored in a single instance. It is possible -to lock a single :term:`secondary` database and then create a backup -from that instance. When you unlock the database, the secondary will -catch up with the :term:`primary`. You may also choose to deploy a -dedicated :term:`hidden member` for backup purposes. - -If you have a :term:`sharded cluster` where each :term:`shard` is itself -a replica set, you can use this method to create a backup of the entire -cluster without disrupting the operation of the node. In these -situations you should still turn off the balancer when you create -backups. - -For any cluster, using a non-primary node to create backups is -particularly advantageous in that the backup operation does not affect -the performance of the primary. Replication itself provides some measure -of redundancy. Nevertheless, keeping point-in time backups of your -cluster to provide for disaster recovery and as an additional layer of -protection is crucial. + /tutorial/backup-small-sharded-cluster-with-mongodump + /tutorial/backup-sharded-cluster-with-filesystem-snapshots + /tutorial/backup-sharded-cluster-with-database-dumps + /tutorial/restore-single-shard + /tutorial/restore-sharded-cluster + /tutorial/schedule-backup-window-for-sharded-clusters diff --git a/source/administration/configuration.txt b/source/administration/configuration.txt index 0a93339d949..cfcd957d35f 100644 --- a/source/administration/configuration.txt +++ b/source/administration/configuration.txt @@ -146,7 +146,7 @@ Consider the following explanation for these configuration decisions: connecting over the ``localhost`` interface for the first time to create user credentials. -.. seealso:: :doc:`/administration/security` +.. seealso:: :doc:`/core/security` Replication and Sharding Configuration -------------------------------------- diff --git a/source/administration/indexes.txt b/source/administration/indexes.txt index 72c4a598c12..36323d4e0ee 100644 --- a/source/administration/indexes.txt +++ b/source/administration/indexes.txt @@ -4,484 +4,28 @@ Indexing Operations .. default-domain:: mongodb -This document provides operational guidelines and procedures for -indexing data in MongoDB collections. For the fundamentals of MongoDB -indexing, see the :doc:`/core/indexes` document. For strategies and -practical approaches, see the :doc:`/applications/indexes` document. - Indexes allow MongoDB to process and fulfill queries quickly by creating small and efficient representations of the documents in a collection. -.. index:: index; create -.. _index-create-index: - -Create an Index ---------------- - -To create an index, use :method:`db.collection.ensureIndex()` or a similar -:api:`method from your driver <>`. For example -the following creates an index on the ``phone-number`` field -of the ``people`` collection: - -.. code-block:: javascript - - db.people.ensureIndex( { "phone-number": 1 } ) - -:method:`ensureIndex() ` only creates an -index if an index of the same specification does not already exist. - -All indexes support and optimize the performance for queries that select -on this field. For queries that cannot use an index, MongoDB must scan -all documents in a collection for documents that match the query. - -.. example:: - - If you create an index on the ``user_id`` field in the ``records``, - this index is, the index will support the following query: - - .. code-block:: javascript - - db.records.find( { user_id: 2 } ) - - However, the following query, on the ``profile_url`` field is not - supported by this index: - - .. code-block:: javascript - - db.records.find( { profile_url: 2 } ) - -If your collection holds a large amount of data, consider building the index in the -background, as described in :ref:`index-creation-background`. To build -indexes on replica sets, see the :ref:`index-build-on-replica-sets` -section for more information. - -.. index:: index; create -.. index:: index; compound -.. _index-create-compound-index: - -Create a Compound Index ------------------------ - -To create a :ref:`compound index ` use an -operation that resembles the following prototype: - -.. code-block:: javascript - - db.collection.ensureIndex( { a: 1, b: 1, c: 1 } ) - -For example, the following operation will create an index on the -``item``, ``category``, and ``price`` fields of the ``products`` -collection: - -.. code-block:: javascript - - db.products.ensureIndex( { item: 1, category: 1, price: 1 } ) - -Some drivers may specify indexes, using ``NumberLong(1)`` rather than -``1`` as the specification. This does not have any affect on the -resulting index. - -.. include:: /includes/note-build-indexes-on-replica-sets.rst - -.. index:: index; options -.. _index-special-creation-options: - -If your collection is large, build the index in the background, as -described in :ref:`index-creation-background`. If you build in the -background on a live replica set, see also -:ref:`index-build-on-replica-sets`. - -Special Creation Options ------------------------- - -.. note:: - - TTL collections use a special ``expire`` index option. See - :doc:`/tutorial/expire-data` for more information. - -.. index:: index; hashed -.. _index-hashed-index: - -Hashed Indexes -~~~~~~~~~~~~~~ - -.. versionadded:: 2.4 - -To create a :ref:`hashed index `, specify -``hashed`` as the value of the index key, as in the following: - -.. example:: - - .. code-block:: javascript - - db.collection.ensureIndex( { a: "hashed" } ) - -MongoDB supports ``hashed`` indexes of any single field. The hashing -function collapses sub-documents and computes the hash for the entire -value, but does not support multi-key (i.e. arrays) indexes. - -You may not create compound indexes that have ``hashed`` index fields. - -.. index:: index; sparse -.. _index-sparse-index: - -Sparse Indexes -~~~~~~~~~~~~~~ - -To create a :ref:`sparse index ` on a field, use an -operation that resembles the following prototype: - -.. code-block:: javascript - - db.collection.ensureIndex( { a: 1 }, { sparse: true } ) - -The following example creates a sparse index on the ``users`` -table that *only* indexes the ``twitter_name`` *if* a document has -this field. This index will not include documents in this collection -without the ``twitter_name`` field. - -.. code-block:: javascript - - db.users.ensureIndex( { twitter_name: 1 }, { sparse: true } ) - -.. note:: - - Sparse indexes can affect the results returned by the query, - particularly with respect to sorts on fields *not* included in the - index. See the :ref:`sparse index ` section for - more information. - -.. index:: index; unique -.. _index-unique-index: - -Unique Indexes -~~~~~~~~~~~~~~ - -To create a :ref:`unique indexes `, consider the -following prototype: - -.. code-block:: javascript - - db.collection.ensureIndex( { a: 1 }, { unique: true } ) - -For example, you may want to create a unique index on the ``"tax-id":`` -of the ``accounts`` collection to prevent storing multiple account -records for the same legal entity: - -.. code-block:: javascript - - db.accounts.ensureIndex( { "tax-id": 1 }, { unique: true } ) - -The :ref:`_id index ` is a unique index. In some -situations you may consider using ``_id`` field itself for this kind -of data rather than using a unique index on another field. - -In many situations you will want to combine the ``unique`` constraint -with the ``sparse`` option. When MongoDB indexes a field, if a -document does not have a value for a field, the index entry for that -item will be ``null``. Since unique indexes cannot have duplicate -values for a field, without the ``sparse`` option, MongoDB will reject -the second document and all subsequent documents without the indexed -field. Consider the following prototype. - -.. code-block:: javascript - - db.collection.ensureIndex( { a: 1 }, { unique: true, sparse: true } ) - -You can also enforce a unique constraint on :ref:`compound indexes -`, as in the following prototype: - -.. code-block:: javascript - - db.collection.ensureIndex( { a: 1, b: 1 }, { unique: true } ) - -These indexes enforce uniqueness for the *combination* of index keys -and *not* for either key individually. - -.. index:: index; create in background -.. _index-create-in-background: - -Create in Background -~~~~~~~~~~~~~~~~~~~~ - -To create an index in the background you can specify :ref:`background -construction `. Consider the following -prototype invocation of :method:`db.collection.ensureIndex()`: - -.. code-block:: javascript - - db.collection.ensureIndex( { a: 1 }, { background: true } ) - -Consider the section on :ref:`background index construction -` for more information about these indexes -and their implications. - -.. index:: index; drop duplicates -.. index:: index; duplicates -.. _index-drop-duplicates: - -Drop Duplicates -~~~~~~~~~~~~~~~ - -To force the creation of a :ref:`unique index ` -index on a collection with duplicate values in the field you are -indexing you can use the ``dropDups`` option. This will force MongoDB -to create a *unique* index by deleting documents with duplicate values -when building the index. Consider the following prototype invocation -of :method:`db.collection.ensureIndex()`: - -.. code-block:: javascript - - db.collection.ensureIndex( { a: 1 }, { dropDups: true } ) - -See the full documentation of :ref:`duplicate dropping -` for more information. - -.. warning:: - - Specifying ``{ dropDups: true }`` may delete data from your - database. Use with extreme caution. - -Refer to the :method:`ensureIndex() ` -documentation for additional index creation options. - -Information about Indexes -------------------------- - -.. index:: index; list indexes -.. _index-list-indexes-for-collection: - -List all Indexes on a Collection -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To return a list of all indexes on a collection, use the, use the -:method:`db.collection.getIndexes()` method or a similar -:api:`method for your driver <>`. - -For example, to view all indexes on the ``people`` collection: - -.. code-block:: javascript - - db.people.getIndexes() - -.. index:: index; list indexes -.. _index-list-indexes-for-database: - -List all Indexes for a Database -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To return a list of all indexes on all collections in a database, use -the following operation in the :program:`mongo` shell: - -.. code-block:: javascript - - db.system.indexes.find() - -.. index:: index; measure use -.. _index-measure-index-use: -.. _indexes-measuring-use: - -Measure Index Use -~~~~~~~~~~~~~~~~~ - -Query performance is a good general indicator of index use; -however, for more precise insight into index use, MongoDB provides the -following tools: - -- :method:`explain() ` - - Append the :method:`explain() ` method to any cursor - (e.g. query) to return a document with statistics about the query - process, including the index used, the number of documents scanned, - and the time the query takes to process in milliseconds. - -- :method:`cursor.hint()` - - Append the :method:`hint() ` to any cursor (e.g. - query) with the index as the argument to *force* MongoDB - to use a specific index to fulfill the query. Consider the following - example: - - .. code-block:: javascript - - db.people.find( { name: "John Doe", zipcode: { $gt: 63000 } } } ).hint( { zipcode: 1 } ) - - You can use :method:`hint() ` and :method:`explain() - ` in conjunction with each other to compare the - effectiveness of a specific index. Specify the ``$natural`` operator - to the :method:`hint() ` method to prevent MongoDB from - using *any* index: - - .. code-block:: javascript - - db.people.find( { name: "John Doe", zipcode: { $gt: 63000 } } } ).hint( { $natural: 1 } ) - -- :data:`~serverStatus.indexCounters` - - Use the :data:`~serverStatus.indexCounters` data in the output of - :dbcommand:`serverStatus` for insight into database-wise index - utilization. - -.. index:: index; remove -.. _index-remove-index: - -Remove Indexes --------------- - -To remove an index, use the :method:`db.collection.dropIndex()` method, -as in the following example: - -.. code-block:: javascript - - db.accounts.dropIndex( { "tax-id": 1 } ) - -This will remove the index on the ``"tax-id"`` field in the ``accounts`` -collection. The shell provides the following document after completing -the operation: - -.. code-block:: javascript - - { "nIndexesWas" : 3, "ok" : 1 } - -Where the value of ``nIndexesWas`` reflects the number of indexes -*before* removing this index. You can also use the -:method:`db.collection.dropIndexes()` to remove *all* indexes, except -for the :ref:`_id index ` from a collection. - -These shell helpers provide wrappers around the -:dbcommand:`dropIndexes` :term:`database command`. Your :doc:`client -library ` may have a different or additional -interface for these operations. - -.. index:: index; rebuild -.. _index-rebuild-index: - -Rebuild Indexes ---------------- - -If you need to rebuild indexes for a collection you can use the -:method:`db.collection.reIndex()` method. This will drop all indexes, -including the :ref:`_id index `, and then rebuild -all indexes. The operation takes the following form: - -.. code-block:: javascript - - db.accounts.reIndex() - -MongoDB will return the following document when the operation -completes: - -.. code-block:: javascript - - { - "nIndexesWas" : 2, - "msg" : "indexes dropped for collection", - "nIndexes" : 2, - "indexes" : [ - { - "key" : { - "_id" : 1, - "tax-id" : 1 - }, - "ns" : "records.accounts", - "name" : "_id_" - } - ], - "ok" : 1 - } - -This shell helper provides a wrapper around the :dbcommand:`reIndex` -:term:`database command`. Your :doc:`client library ` -may have a different or additional interface for this operation. - -.. include:: /includes/note-build-indexes-on-replica-sets.rst - -.. index:: index; replica set -.. index:: replica set; index -.. _index-build-on-replica-sets: -.. _index-building-replica-sets: - -Build Indexes on Replica Sets ------------------------------ - -:ref:`Background index creation operations -` become *foreground* indexing operations -on :term:`secondary` members of replica sets. The foreground index -building process blocks all replication and read operations on the -secondaries while they build the index. - -Secondaries will begin building indexes *after* the -:term:`primary` finishes building the index. In :term:`sharded clusters -`, the :program:`mongos` will send :method:`ensureIndex() -` to the primary members of the replica -set for each shard, which then replicate to the secondaries after the -primary finishes building the index. - -To minimize the impact of building an index on your replica set, use -the following procedure to build indexes on secondaries: - -.. note:: - - If you need to build an index in a :term:`sharded cluster`, repeat - the following procedure for each replica set that provides each - :term:`shard`. - -#. Stop the :program:`mongod` process on one secondary. Restart the - :program:`mongod` process *without* the :option:`--replSet ` - option and running on a different port. [#different-port]_ This - instance is now in "standalone" mode. - -#. Create the new index or rebuild the index on this :program:`mongod` - instance. - -#. Restart the :program:`mongod` instance with the - :option:`--replSet ` option. Allow replication - to catch up on this member. - -#. Repeat this operation on all of the remaining secondaries. - -#. Run :method:`rs.stepDown()` on the :term:`primary` member of the - set, and then repeat this procedure on the former primary. - -.. warning:: - - Ensure that your :term:`oplog` is large enough to permit the - indexing or re-indexing operation to complete without falling - too far behind to catch up. See the ":ref:`oplog sizing - `" documentation for additional - information. - -.. note:: - - This procedure *does* take one member out of the replica set at a - time. However, this procedure will only affect one member of the - set at a time rather than *all* secondaries at the same time. - -.. [#different-port] By running the :program:`mongod` on a different - port, you ensure that the other members of the replica set and all - clients will not contact the member while you are building the - index. - -.. index:: index; monitor index building -.. _index-monitor-index-building: -.. _indexes-admin-stop-in-progress-build: - -Monitor and Control Index Building ----------------------------------- - -.. todo:: insert links to the values in the inprog array following the - completion of DOCS-162 - -To see the status of the indexing processes, you can use the -:method:`db.currentOp()` method in the :program:`mongo` shell. The value -of the ``query`` field and the ``msg`` field will indicate if the -operation is an index build. The ``msg`` field also indicates the -percent of the build that is complete. - -To terminate an ongoing index build, use the -:method:`db.killOp()` method in the :program:`mongo` shell. - -.. versionchanged:: 2.4 - Before MongoDB 2.4, you could *only* terminate *background* index - builds. After 2.4, you can terminate any index build, including - foreground index builds. +The documents in this section outline specific tasks related to +building and maintaining indexes for data in MongoDB collections. For +a conceptual overview of MongoDB indexing, see the +:doc:`/core/indexes` document. For strategies and practical +approaches, see the :doc:`/applications/indexes` document. + +.. toctree:: + :maxdepth: 1 + + /tutorial/create-an-index + /tutorial/create-a-compound-index + /tutorial/create-a-unique-index + /tutorial/create-a-sparse-index + /tutorial/create-a-hashed-index + /tutorial/build-indexes-on-replica-sets + /tutorial/build-indexes-in-the-background + /tutorial/remove-indexes + /tutorial/rebuild-indexes + /tutorial/manage-in-progress-indexing-operations + /tutorial/list-indexes + /tutorial/measure-index-use + /tutorial/roll-back-to-v1.8-index diff --git a/source/administration/production-notes.txt b/source/administration/production-notes.txt index 50a7ad11307..c67201db97e 100644 --- a/source/administration/production-notes.txt +++ b/source/administration/production-notes.txt @@ -14,7 +14,7 @@ Backups ------- To make backups of your MongoDB database, please refer to -:doc:`/administration/backups`. +:doc:`/core/backups`. Networking ---------- @@ -57,7 +57,7 @@ For MongoDB on Linux use the following recommended configurations: files `. - Set the file descriptor limit and the user process limit above 20,000, - according to the suggestions in :doc:`/administration/ulimit`. A low + according to the suggestions in :doc:`/reference/ulimit`. A low ulimit will affect MongoDB when under heavy use and will produce weird errors. @@ -277,12 +277,12 @@ BSON Document Size Limit There is a :limit:`BSON Document Size` -- at the time of this writing 16MB per document. If you have large objects, use :doc:`GridFS -` instead. +` instead. Set Appropriate Write Concern for Write Operations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -See :ref:`write concern ` for more information. +See :doc:`/core/write-concern` for more information. Dynamic Schema ~~~~~~~~~~~~~~ @@ -336,7 +336,7 @@ Consider: - and/or using :doc:`$toLower ` or :doc:`$toUpper ` in the - :doc:`aggregation framework ` + :doc:`aggregation framework ` Type Sensitive Fields ~~~~~~~~~~~~~~~~~~~~~ @@ -382,7 +382,7 @@ get up to an odd number of votes. Don't disable journaling ~~~~~~~~~~~~~~~~~~~~~~~~ -See :doc:`Journaling ` for more information. +See :doc:`Journaling ` for more information. Keep Replica Set Members Up-to-Date ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -394,7 +394,7 @@ here: 1. Monitoring and alerts for any lagging can be done via various means. MMS shows a graph of replica set lag -#. Using :ref:`getLastError ` with +#. Using :doc:`getLastError ` with ``w:'majority'``, you will get a timeout or no return if a majority of the set is lagging. This is thus another way to guard against lag and get some reporting back of its occurrence. @@ -425,7 +425,7 @@ Additional Deployment Considerations sharded collection before a massive bulk import. Usually this isn't necessary but on a bulk import of size it is helpful. -- Use :doc:`security/auth ` mode if you +- Use :doc:`security/auth ` mode if you need it. By default :setting:`auth` is not enabled and :program:`mongod` assumes a trusted environment. @@ -441,7 +441,7 @@ Additional Deployment Considerations prefetch settings. Note on linux the parameter is in *sectors*, not bytes. 32KBytes (a setting of 64 sectors) is pretty reasonable. -- Check :doc:`ulimits ` settings. +- Check :doc:`ulimits ` settings. - Use SSD if available and economical. Spinning disks can work well but SSDs capacity for random I/O operations work well with the update diff --git a/source/administration/replica-sets.txt b/source/administration/replica-sets.txt index 74c0cd26ca7..d0f879a2c0f 100644 --- a/source/administration/replica-sets.txt +++ b/source/administration/replica-sets.txt @@ -1,15 +1,44 @@ -==================================== -Replica Set Operation and Management -==================================== +========================== +Replica Set Administration +========================== .. default-domain:: mongodb :term:`Replica sets ` automate most administrative tasks -associated with database replication. Nevertheless, several -operations related to deployment and systems management require -administrator intervention remain. This document provides an overview -of those tasks, in addition to a collection of troubleshooting -suggestions for administers of replica sets. +associated with database replication. Nevertheless, several operations +related to deployment and systems management require administrator +intervention remain. + +The following tutorials provide task-oriented instructions for +specific administrative tasks related to replica set operation. + +.. Updates to this tutorial list should also be made in + source/replication.txt + +.. toctree:: + :maxdepth: 1 + + /tutorial/deploy-replica-set + /tutorial/convert-standalone-to-replica-set + /tutorial/expand-replica-set + /tutorial/remove-replica-set-member + /tutorial/replace-replica-set-member + /tutorial/adjust-replica-set-member-priority + /tutorial/resync-replica-set-member + /tutorial/deploy-geographically-distributed-replica-set + /tutorial/change-oplog-size + /tutorial/force-member-to-be-primary + /tutorial/manage-chained-replication + /tutorial/change-hostnames-in-a-replica-set + /tutorial/troubleshoot-replica-sets + /tutorial/add-replica-set-arbiter + /tutorial/convert-secondary-into-arbiter + /tutorial/configure-a-delayed-replica-set-member + /tutorial/configure-a-hidden-replica-set-member + /tutorial/configure-a-non-voting-replica-set-member + /tutorial/configure-secondary-only-replica-set-member + /tutorial/configure-replica-set-secondary-sync-target + /tutorial/reconfigure-replica-set-with-unavailable-members .. seealso:: @@ -17,1231 +46,3 @@ suggestions for administers of replica sets. - :ref:`Replica Set Reconfiguration Process ` - :method:`rs.conf()` and :method:`rs.reconfig()` - :doc:`/reference/replica-configuration` - - The following tutorials provide task-oriented instructions for - specific administrative tasks related to replica set operation. - - .. Updates to this tutorial list should also be made in - source/replication.txt - - - :doc:`/tutorial/deploy-replica-set` - - :doc:`/tutorial/convert-standalone-to-replica-set` - - :doc:`/tutorial/expand-replica-set` - - :doc:`/tutorial/deploy-geographically-distributed-replica-set` - - :doc:`/tutorial/change-oplog-size` - - :doc:`/tutorial/force-member-to-be-primary` - - :doc:`/tutorial/change-hostnames-in-a-replica-set` - - :doc:`/tutorial/convert-secondary-into-arbiter` - - :doc:`/tutorial/reconfigure-replica-set-with-unavailable-members` - - :doc:`/tutorial/recover-data-following-unexpected-shutdown` - -.. _replica-set-node-configurations: -.. _replica-set-member-configurations: - -Member Configurations ---------------------- - -All :term:`replica sets ` have a single :term:`primary` and one or more -:term:`secondaries `. Replica sets allow you to configure -secondary members in a variety of ways. This section describes these -configurations. - -.. note:: - - A replica set can have up to 12 members, but only 7 members can have - votes. For configuration information regarding non-voting members, see - :ref:`replica-set-non-voting-members`. - -.. warning:: - - The :method:`rs.reconfig()` shell method can force the current - primary to step down, which causes an :ref:`election `. - When the primary steps down, the :program:`mongod` closes all client - connections. While this typically takes 10-20 seconds, attempt to - make these changes during scheduled maintenance periods. To - successfully reconfigure a replica set, a majority of the members - must be accessible. - -.. include:: /includes/seealso-elections.rst - -.. index:: replica set members; secondary only -.. _replica-set-secondary-only-members: -.. _replica-set-secondary-only-configuration: - -Secondary-Only Members -~~~~~~~~~~~~~~~~~~~~~~ - -The secondary-only configuration prevents a :term:`secondary` member in a -:term:`replica set` from ever becoming a :term:`primary` in a -:term:`failover`. You can set secondary-only mode for any member of -the set except the current primary. - -For example, you may want to configure all members of a replica sets -located outside of the main data centers as secondary-only to prevent -these members from ever becoming primary. - -To configure a member as secondary-only, set its -:data:`~local.system.replset.members[n].priority` value to ``0``. Any member with a -:data:`~local.system.replset.members[n].priority` equal to ``0`` will never seek -:ref:`election ` and cannot become primary in any -situation. For more information on priority levels, see -:ref:`replica-set-node-priority`. - -.. include:: /includes/note-rs-conf-array-index.rst - -As an example of modifying member priorities, assume a four-member -replica set. Use the following sequence of operations in the -:program:`mongo` shell to modify member priorities: - -.. code-block:: javascript - - cfg = rs.conf() - cfg.members[0].priority = 2 - cfg.members[1].priority = 1 - cfg.members[2].priority = 0.5 - cfg.members[3].priority = 0 - rs.reconfig(cfg) - -This reconfigures the set, with the following priority settings: - -- Member ``0`` to a priority of ``2`` so that it becomes primary, under - most circumstances. - -- Member ``1`` to a priority of ``1``, which is the default value. - Member ``1`` becomes primary if no member with a *higher* priority is - eligible. - -- Member ``2`` to a priority of ``0.5``, which makes it less likely to - become primary than other members but doesn't prohibit the - possibility. - -- Member ``3`` to a priority of ``0``. - Member ``3`` cannot become the :term:`primary` member under any - circumstances. - -.. note:: - - If your replica set has an even number of members, add an - :ref:`arbiter ` to ensure that - members can quickly obtain a majority of votes in an - election for primary. - -.. note:: - - MongoDB does not permit the current :term:`primary` to have a - :data:`~local.system.replset.members[n].priority` of ``0``. If you - want to prevent the current primary from becoming primary, first - use :method:`rs.stepDown()` to step down the current primary, and - then :ref:`reconfigure the replica set - ` with :method:`rs.conf()` and - :method:`rs.reconfig()`. - -.. seealso:: :data:`~local.system.replset.members[n].priority` and - :ref:`Replica Set Reconfiguration `. - -.. index:: replica set members; hidden -.. _replica-set-hidden-members: -.. _replica-set-hidden-configuration: - -Hidden Members -~~~~~~~~~~~~~~ - -Hidden members are part of a replica set but cannot become -primary and are invisible to client applications. *However,* -hidden members **do** vote in :ref:`elections `. - -Hidden members are ideal for instances that will have significantly -different usage patterns than the other members and require separation -from normal traffic. Typically, hidden members provide reporting, -dedicated backups, and dedicated read-only testing and integration -support. - -Hidden members have :data:`~local.system.replset.members[n].priority` set -``0`` and have :data:`~local.system.replset.members[n].hidden` set to ``true``. - -To configure a :term:`hidden member`, use the following sequence of -operations in the :program:`mongo` shell: - -.. code-block:: javascript - - cfg = rs.conf() - cfg.members[0].priority = 0 - cfg.members[0].hidden = true - rs.reconfig(cfg) - -After re-configuring the set, the first member of the set in the -:data:`~local.system.replset.members` array will have a priority of ``0`` -so that it cannot become primary. The other members in the set will -not advertise the hidden member in the :dbcommand:`isMaster` or -:method:`db.isMaster()` output. - -.. note:: - - You must send the :method:`rs.reconfig()` command to a set member - that *can* become :term:`primary`. In the above example, if you - issue the :method:`rs.reconfig()` operation to a member with a - :data:`~local.system.replset.members[n].priority` of ``0`` the operation will - fail. - -.. note:: - - .. versionchanged:: 2.0 - - For :term:`sharded clusters ` running with replica sets before 2.0 if - you reconfigured a member as hidden, you *had* to restart - :program:`mongos` to prevent queries from reaching the hidden - member. - -.. seealso:: :ref:`Replica Set Read Preference ` - and :ref:`Replica Set Reconfiguration `. - -.. index:: replica set members; delayed -.. _replica-set-delayed-members: -.. _replica-set-delayed-configuration: - -Delayed Members -~~~~~~~~~~~~~~~ - -Delayed members copy and apply operations from the primary's :term:`oplog` with -a specified delay. If a member has a delay of one hour, then -the latest entry in this member's oplog will not be more recent than -one hour old, and the state of data for the member will reflect the state of the -set an hour earlier. - -.. example:: If the current time is 09:52 and the secondary is a - delayed by an hour, no operation will be more recent than 08:52. - -Delayed members may help recover from various kinds of human error. Such -errors may include inadvertently deleted databases or botched -application upgrades. Consider the following factors when determining -the amount of slave delay to apply: - -- Ensure that the length of the delay is equal to or greater than your - maintenance windows. - -- The size of the oplog is sufficient to capture *more than* the - number of operations that typically occur in that period of - time. For more information on oplog size, see the - :ref:`replica-set-oplog-sizing` topic in the :doc:`/core/replication` document. - -Delayed members must have a :term:`priority` set to ``0`` to prevent -them from becoming primary in their replica sets. Also these members -should be :ref:`hidden ` to prevent your -application from seeing or querying this member. - -To configure a :term:`replica set` member with a one hour delay, use the -following sequence of operations in the :program:`mongo` shell: - -.. code-block:: javascript - - cfg = rs.conf() - cfg.members[0].priority = 0 - cfg.members[0].slaveDelay = 3600 - rs.reconfig(cfg) - -After the replica set reconfigures, the first member of the set in the -:data:`~local.system.replset.members` array will have a priority -of ``0`` and cannot become :term:`primary`. The -:data:`slaveDelay ` value -delays both replication and the member's :term:`oplog` by 3600 seconds (1 -hour). Setting :data:`~local.system.replset.members[n].slaveDelay` to a -non-zero value also sets :data:`~local.system.replset.members[n].hidden` to -``true`` for this replica set so that it does not receive application -queries in normal operations. - -.. warning:: - - The length of the secondary - :data:`~local.system.replset.members[n].slaveDelay` must - fit within the window of the oplog. If the oplog is shorter than - the :data:`~local.system.replset.members[n].slaveDelay` - window, the delayed member cannot successfully replicate - operations. - -.. seealso:: :data:`~local.system.replset.members[n].slaveDelay`, :ref:`Replica Set Reconfiguration - `, :ref:`replica-set-oplog-sizing`, - :ref:`replica-set-procedure-change-oplog-size` in this document, - and the :doc:`/tutorial/change-oplog-size` tutorial. - -.. index:: replica set members; arbiters -.. _replica-set-arbiters: -.. _replica-set-arbiter-configuration: - -Arbiters -~~~~~~~~ - -Arbiters are special :program:`mongod` instances that do not hold a -copy of the data and thus cannot become primary. Arbiters exist solely -to participate in :ref:`elections `. - -.. note:: - - Because of their minimal system requirements, you may safely deploy an - arbiter on a system with another workload, such as an application - server or monitoring member. - -.. warning:: - - Do not run arbiter processes on a system that is an active - :term:`primary` or :term:`secondary` of its :term:`replica set`. - -Arbiters never receive the contents of any collection but do have the -following interactions with the rest of the replica set: - -- Credential exchanges that authenticate the arbiter with - the replica set. All MongoDB processes within a replica set use - keyfiles. These exchanges are encrypted. - - MongoDB only transmits the authentication credentials in a - cryptographically secure exchange, and encrypts no other - exchange. - -- Exchanges of replica set configuration data and of votes. These are - not encrypted. - -If your MongoDB deployment uses SSL, then all communications between -arbiters and the other members of the replica set are secure. See the -documentation for :doc:`/administration/ssl` for more information. -As with all MongoDB components, run arbiters on secure networks. - -To add an arbiter, see :ref:`replica-set-procedure-add-arbiter`. - -.. index:: replica set members; non-voting -.. _replica-set-non-voting-members: -.. _replica-set-non-voting-configuration: - -Non-Voting Members -~~~~~~~~~~~~~~~~~~ - -You may choose to change the number of votes that each member has in -:ref:`elections ` for :term:`primary`. In general, all -members should have only 1 vote to prevent intermittent ties, deadlock, -or the wrong members from becoming :term:`primary`. Use :ref:`replica -set priorities ` to control which members -are more likely to become primary. - -To disable a member's ability to vote in elections, use the following -command sequence in the :program:`mongo` shell. - -.. code-block:: javascript - - cfg = rs.conf() - cfg.members[3].votes = 0 - cfg.members[4].votes = 0 - cfg.members[5].votes = 0 - rs.reconfig(cfg) - -This sequence gives ``0`` votes to the fourth, fifth, and sixth -members of the set according to the order of the -:data:`~local.system.replset.members` array in the output of -:method:`rs.conf()`. This setting allows the set to elect these -members as :term:`primary` but does not allow them to vote in -elections. If you have three non-voting members, you can add three -additional voting members to your set. Place voting members so that -your designated primary or primaries can reach a majority of votes in -the event of a network partition. - -.. note:: - - In general and when possible, all members should have only 1 vote. This - prevents intermittent ties, deadlocks, or the wrong members from - becoming primary. Use :ref:`Replica Set Priorities - ` to control which members are more - likely to become primary. - -.. seealso:: :data:`~local.system.replset.members[n].votes` and :ref:`Replica Set - Reconfiguration `. - -.. _replica-set-chained-replication: - -Chained Replication -~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 2.0 - -Chained replication occurs when a :term:`secondary` member replicates -from another secondary member instead of from the :term:`primary`. This -might be the case, for example, if a secondary selects its replication -target based on ping time and if the closest member is another secondary. - -Chained replication can reduce load on the primary. But chained -replication can also result in increased replication lag, depending on -the topology of the network. - -Beginning with version 2.2.2, you can use the -:data:`~local.system.replset.settings.chainingAllowed` setting in -:doc:`/reference/replica-configuration` to disable chained replication -for situations where chained replication is causing lag. For details, -see :ref:`replica-set-config-chained-replication`. - -Procedures ----------- - -This section gives overview information on a number of replica set -administration procedures. You can find documentation of additional -procedures in the :ref:`replica set tutorials -` section. - -.. _replica-set-admin-procedure-add-member: - -Adding Members -~~~~~~~~~~~~~~ - -Before adding a new member to an existing :term:`replica set`, do one of -the following to prepare the new member's :term:`data directory `: - -- Make sure the new member's data directory *does not* contain data. The - new member will copy the data from an existing member. - - If the new member is in a :term:`recovering` state, it must exit and - become a :term:`secondary` before MongoDB - can copy all data as part of the replication process. This process - takes time but does not require administrator intervention. - -- Manually copy the data directory from an existing member. The new - member becomes a secondary member and will catch up to the current - state of the replica set after a short interval. Copying the data over - manually shortens the amount of time for the new member to become - current. - - Ensure that you can copy the data directory to the new member and - begin replication within the :ref:`window allowed by the oplog `. If the - difference in the amount of time between the most recent operation and - the most recent operation to the database exceeds the length of the - :term:`oplog` on the existing members, then the new instance will have - to perform an initial sync, which completely resynchronizes the data, as described in - :ref:`replica-set-resync-stale-member`. - - Use :method:`db.printReplicationInfo()` to check the current state of - replica set members with regards to the oplog. - -For the procedure to add a member to a replica set, see -:doc:`/tutorial/expand-replica-set`. - -.. _replica-set-admin-procedure-remove-members: - -Removing Members -~~~~~~~~~~~~~~~~ - -You may remove a member of a replica set at any time; *however*, for best -results always *shut down* the :program:`mongod` instance before -removing it from a replica set. - -.. versionchanged:: 2.2 - Before 2.2, you *had* to shut down the :program:`mongod` instance - before removing it. While 2.2 removes this requirement, it remains - good practice. - -To remove a member, use the -:method:`rs.remove()` method in the :program:`mongo` shell while -connected to the current :term:`primary`. Issue the -:method:`db.isMaster()` command when connected to *any* member of the -set to determine the current primary. Use a command in either -of the following forms to remove the member: - -.. code-block:: javascript - - rs.remove("mongo2.example.net:27017") - rs.remove("mongo3.example.net") - -This operation disconnects the shell briefly and forces a -re-connection as the :term:`replica set` renegotiates which member -will be primary. The shell displays an error even if this -command succeeds. - -You can re-add a removed member to a replica set at any time using the -:ref:`procedure for adding replica set members `. -Additionally, consider using the :ref:`replica set reconfiguration procedure -` to change the -:data:`~local.system.replset.members[n].host` value to rename a member in a replica set -directly. - -.. _replica-set-admin-procedure-replace-member: - -Replacing a Member -~~~~~~~~~~~~~~~~~~ - -Use this procedure to replace a member of a replica set when the hostname -has changed. This procedure preserves all existing configuration -for a member, except its hostname/location. - -You may need to replace a replica set member if you want to replace an -existing system and only need to change the hostname rather than -completely replace all configured options related to the previous -member. - -Use :method:`rs.reconfig()` to change the value of the -:data:`~local.system.replset.members[n].host` field to reflect the new hostname or port -number. :method:`rs.reconfig()` will not change the value of -:data:`~local.system.replset.members[n]._id`. - -.. code-block:: javascript - - cfg = rs.conf() - cfg.members[0].host = "mongo2.example.net:27019" - rs.reconfig(cfg) - -.. warning:: - - Any replica set configuration change can trigger the current - :term:`primary` to step down, which forces an :ref:`election `. This - causes the current shell session, and clients connected to this replica set, - to produce an error even when the operation succeeds. - -.. _replica-set-node-priority-configuration: -.. _replica-set-member-priority-configuration: - -Adjusting Priority -~~~~~~~~~~~~~~~~~~ - -To change the value of the :data:`~local.system.replset.members[n].priority` in the -replica set configuration, use the following sequence of commands in -the :program:`mongo` shell: - -.. code-block:: javascript - - cfg = rs.conf() - cfg.members[0].priority = 0.5 - cfg.members[1].priority = 2 - cfg.members[2].priority = 2 - rs.reconfig(cfg) - -The first operation uses :method:`rs.conf()` to set the local variable -``cfg`` to the contents of the current replica set configuration, which -is a :term:`document`. The next three operations change the -:data:`~local.system.replset.members[n].priority` value in the ``cfg`` document for the -first three members configured in the :data:`members -` array. The final operation -calls :method:`rs.reconfig()` with the argument of ``cfg`` to initialize -the new configuration. - -.. include:: /includes/note-rs-conf-array-index.rst - -If a member has :data:`~local.system.replset.members[n].priority` set to ``0``, it is -ineligible to become :term:`primary` and will not seek -election. :ref:`Hidden members `, -:ref:`delayed members `, and -:ref:`arbiters ` all have :data:`~local.system.replset.members[n].priority` -set to ``0``. - -All members have a :data:`~local.system.replset.members[n].priority` equal to ``1`` by default. - -The value of :data:`~local.system.replset.members[n].priority` can be any floating point -(i.e. decimal) number between ``0`` and ``1000``. Priorities -are only used to determine the preference in election. The priority -value is used only in relation to other members. With the exception of -members with a priority of ``0``, the absolute value of the -:data:`~local.system.replset.members[n].priority` value is irrelevant. - -Replica sets will preferentially elect and maintain the primary status -of the member with the highest :data:`~local.system.replset.members[n].priority` setting. - -.. warning:: - - Replica set reconfiguration can force the current primary to step - down, leading to an election for primary in the replica - set. Elections cause the current primary to close all open - :term:`client` connections. - - Perform routine replica set reconfiguration during scheduled - maintenance windows. - -.. seealso:: The :ref:`Replica Reconfiguration Usage - ` example revolves around - changing the priorities of the :data:`~local.system.replset.members` of a replica set. - -.. _replica-set-procedure-add-arbiter: - -Adding an Arbiter -~~~~~~~~~~~~~~~~~ - -For a description of :term:`arbiters ` and their purpose in -:term:`replica sets `, see :ref:`replica-set-arbiters`. - -To prevent tied :term:`elections `, do not add an arbiter to a -set if the set already has an odd number of voting members. - -Because arbiters do not hold a copies of collection data, they have minimal -resource requirements and do not require dedicated hardware. - -1. Create a data directory for the arbiter. The :program:`mongod` uses - this directory for - configuration information. It *will not* hold database collection data. - The following example creates the ``/data/arb`` data directory: - - .. code-block:: sh - - mkdir /data/arb - -#. Start the arbiter, making sure to specify the replica set name and - the data directory. Consider the following example: - - .. code-block:: sh - - mongod --port 30000 --dbpath /data/arb --replSet rs - -#. In a :program:`mongo` shell connected to the :term:`primary`, add the - arbiter to the replica set by issuing the :method:`rs.addArb()` - method, which uses the following syntax: - - .. code-block:: javascript - - rs.addArb("<:port>") - - For example, if the arbiter runs on ``m1.example.net:30000``, you - would issue this command: - - .. code-block:: javascript - - rs.addArb("m1.example.net:30000") - -.. _replica-set-configure-sync-target: - -Manually Configure a Secondary's Sync Target -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To override the default sync target selection logic, you may manually -configure a :term:`secondary` member's sync target for pulling -:term:`oplog` entries temporarily. The following operations provide -access to this functionality: - -- :dbcommand:`replSetSyncFrom` command, or - -- :method:`rs.syncFrom()` helper in the :program:`mongo` shell - -Only modify the default sync logic as needed, and always exercise -caution. :method:`rs.syncFrom()` will not affect an in-progress -initial sync operation. To affect the sync target for the initial sync, run -:method:`rs.syncFrom()` operation *before* initial sync. - -If you run :method:`rs.syncFrom()` during initial sync, MongoDB -produces no error messages, but the sync target will not change until -after the initial sync operation. - -.. note:: - - .. include:: /includes/fact-replica-set-sync-from-is-temporary.rst - -.. _replica-set-config-chained-replication: - -Manage Chained Replication -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 2.4 - -MongoDB enables :ref:`chained replication -` by default. This procedure -describes how to disable it and how to re-enable it. - -To disable chained replication, set the -:data:`~local.system.replset.settings.chainingAllowed` -field in :doc:`/reference/replica-configuration` to ``false``. - -You can use the following sequence of commands to set -:data:`~local.system.replset.settings.chainingAllowed` to -``false``: - -1. Copy the configuration settings into the ``cfg`` object: - - .. code-block:: javascript - - cfg = rs.config() - -#. Take note of whether the current configuration settings contain the - ``settings`` sub-document. If they do, skip this step. - - .. warning:: To avoid data loss, skip this step if the configuration - settings contain the ``settings`` sub-document. - - If the current configuration settings **do not** contain the - ``settings`` sub-document, create the sub-document by issuing the - following command: - - .. code-block:: javascript - - cfg.settings = { } - -#. Issue the following sequence of commands to set - :data:`~local.system.replset.settings.chainingAllowed` to - ``false``: - - .. code-block:: javascript - - cfg.settings.chainingAllowed = false - rs.reconfig(cfg) - -To re-enable chained replication, set -:data:`~local.system.replset.settings.chainingAllowed` to ``true``. -You can use the following sequence of commands: - -.. code-block:: javascript - - cfg = rs.config() - cfg.settings.chainingAllowed = true - rs.reconfig(cfg) - -.. note:: - - If chained replication is disabled, you still can use - :dbcommand:`replSetSyncFrom` to specify that a secondary replicates - from another secondary. But that configuration will last only until the - secondary recalculates which member to sync from. - -.. _replica-set-procedure-change-oplog-size: - -Changing Oplog Size -~~~~~~~~~~~~~~~~~~~ - -The following is an overview of the procedure for changing the size of -the oplog. For a detailed procedure, see -:doc:`/tutorial/change-oplog-size`. - -.. include:: /includes/procedure-change-oplog-size.rst - -.. _replica-set-resync-stale-member: - -Resyncing a Member of a Replica Set -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When a secondary's replication process falls behind so far that -:term:`primary` overwrites oplog entries that the secondary has not -yet replicated, that secondary cannot catch up and becomes "stale." -When that occurs, you must completely resynchronize the member by removing its data and -performing an initial sync. - -To do so, use one of the following approaches: - -- Restart the :program:`mongod` with an empty data directory and let - MongoDB's normal initial syncing feature restore the data. This - is the more simple option, but may take longer to replace the data. - - See :ref:`replica-set-auto-resync-stale-member`. - -- Restart the machine with a copy of a recent data directory from - another member in the :term:`replica set`. This procedure can replace - the data more quickly but requires more manual steps. - - See :ref:`replica-set-resync-by-copying`. - -.. index:: replica set; resync -.. _replica-set-auto-resync-stale-member: - -Automatically Resync a Stale Member -``````````````````````````````````` - -This procedure relies on MongoDB's regular process for initial -sync. This will restore the data on the stale member to reflect the -current state of the set. For an overview of MongoDB initial sync -process, see the :ref:`replica-set-syncing` section. - -To resync the stale member: - -1. Stop the stale member's :program:`mongod` instance. On Linux - systems you can use :option:`mongod --shutdown` Set - :option:`--dbpath ` to the member's data - directory, as in the following: - - .. code-block:: sh - - mongod --dbpath /data/db/ --shutdown - -#. Delete all data and sub-directories from the member's data - directory. By removing the data :setting:`dbpath`, MongoDB will - perform a complete resync. Consider making a backup first. - -#. Restart the :program:`mongod` instance on the member. For example: - - .. code-block:: sh - - mongod --dbpath /data/db/ --replSet rsProduction - - At this point, the :program:`mongod` will perform an initial - sync. The length of the initial sync may process depends on the - size of the database and network connection between members of the - replica set. - - Initial sync operations can impact the other members of the set and - create additional traffic to the primary, and can only occur if - another member of the set is accessible and up to date. - -.. index:: replica set; resync -.. _replica-set-resync-by-copying: - -Resync by Copying All Datafiles from Another Member -``````````````````````````````````````````````````` - -This approach uses a copy of the data files from an existing member of -the replica set, or a back of the data files to "seed" the stale member. - -The copy or backup of the data files **must** be sufficiently recent -to allow the new member to catch up with the :term:`oplog`, otherwise -the member would need to perform an initial sync. - -.. note:: - - In most cases you cannot copy data files from a running - :program:`mongod` instance to another, because the data files will - change during the file copy operation. Consider the - :doc:`/administration/backups` documentation for several methods - that you can use to capture a consistent snapshot of a running - :program:`mongod` instance. - -After you have copied the data files from the "seed" source, start the -:program:`mongod` instance and allow it to apply all operations from -the oplog until it reflects the current state of the replica set. - -.. _replica-set-security: - -Security Considerations for Replica Sets ----------------------------------------- - -In most cases, the most effective ways to control access and to secure -the connection between members of a :term:`replica set` depend on -network-level access control. Use your environment's firewall and -network routing to ensure that traffic *only* from clients and other -replica set members can reach your :program:`mongod` instances. If needed, -use virtual private networks (VPNs) to ensure secure connections -over wide area networks (WANs.) - -Additionally, MongoDB provides an authentication mechanism for -:program:`mongod` and :program:`mongos` instances connecting to -replica sets. These instances enable authentication but specify a -shared key file that serves as a shared password. - -.. versionadded:: 1.8 - Added support authentication in replica set deployments. - -.. versionchanged:: 1.9.1 - Added support authentication in sharded replica set deployments. - - -To enable authentication add the following option to your configuration file: - -.. code-block:: cfg - - keyFile = /srv/mongodb/keyfile - -.. note:: - - You may chose to set these run-time configuration options using the - :option:`--keyFile ` (or :option:`mongos --keyFile`) - options on the command line. - -Setting :setting:`keyFile` enables authentication and specifies a key -file for the replica set members to use when authenticating to each -other. The content of the key file is arbitrary but must be the same -on all members of the replica set and on all :program:`mongos` -instances that connect to the set. - -The key file must be less one kilobyte in size and may only contain -characters in the base64 set. The key file must not have group or "world" -permissions on UNIX systems. Use the following command to use the -OpenSSL package to generate "random" content for use in a key file: - -.. code-block:: bash - - openssl rand -base64 753 - -.. note:: - - Key file permissions are not checked on Windows systems. - -.. _replica-set-troubleshooting: - -Troubleshooting Replica Sets ----------------------------- - -This section describes common strategies for troubleshooting -:term:`replica sets `. - -.. seealso:: :doc:`/administration/monitoring`. - -.. _replica-set-troubleshooting-check-replication-status: - -Check Replica Set Status -~~~~~~~~~~~~~~~~~~~~~~~~ - -To display the current state of the replica set and current state of -each member, run the :method:`rs.status()` method in a :program:`mongo` -shell connected to the replica set's :term:`primary`. For descriptions -of the information displayed by :method:`rs.status()`, see -:doc:`/reference/replica-status`. - -.. note:: - - The :method:`rs.status()` method is a wrapper that runs the - :dbcommand:`replSetGetStatus` database command. - -.. _replica-set-replication-lag: - -Check the Replication Lag -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Replication lag is a delay between an operation on the :term:`primary` -and the application of that operation from the :term:`oplog` to the -:term:`secondary`. Replication lag can be a significant issue and can -seriously affect MongoDB :term:`replica set` deployments. Excessive -replication lag makes "lagged" members ineligible to quickly become -primary and increases the possibility that distributed -read operations will be inconsistent. - -To check the current length of replication lag: - -- In a :program:`mongo` shell connected to the primary, call the - :method:`db.printSlaveReplicationInfo()` method. - - The returned document displays the ``syncedTo`` value for each member, - which shows you when each member last read from the oplog, as shown in the following - example: - - .. code-block:: javascript - - source: m1.example.net:30001 - syncedTo: Tue Oct 02 2012 11:33:40 GMT-0400 (EDT) - = 7475 secs ago (2.08hrs) - source: m2.example.net:30002 - syncedTo: Tue Oct 02 2012 11:33:40 GMT-0400 (EDT) - = 7475 secs ago (2.08hrs) - - .. note:: - - The :method:`rs.status()` method is a wrapper around the - :dbcommand:`replSetGetStatus` database command. - -- Monitor the rate of replication by watching the oplog time in the - "replica" graph in the `MongoDB Monitoring Service`_. For more - information see the `documentation for MMS`_. - -.. _`MongoDB Monitoring Service`: http://mms.10gen.com/ -.. _`documentation for MMS`: http://mms.10gen.com/help/ - -Possible causes of replication lag include: - -- **Network Latency** - - Check the network routes between the members of your set to ensure - that there is no packet loss or network routing issue. - - Use tools including ``ping`` to test latency between set - members and ``traceroute`` to expose the routing of packets - network endpoints. - -- **Disk Throughput** - - If the file system and disk device on the secondary is - unable to flush data to disk as quickly as the primary, then - the secondary will have difficulty keeping state. Disk-related - issues are incredibly prevalent on multi-tenant systems, including - vitalized instances, and can be transient if the system accesses - disk devices over an IP network (as is the case with Amazon's - EBS system.) - - Use system-level tools to assess disk status, including - ``iostat`` or ``vmstat``. - -- **Concurrency** - - In some cases, long-running operations on the primary can block - replication on secondaries. For best results, configure :ref:`write concern ` - to require confirmation of replication to secondaries, as described in :ref:`replica-set-write-concern`. - This prevents write operations from returning if replication cannot keep up - with the write load. - - Use the :term:`database profiler` to see if there are slow queries - or long-running operations that correspond to the incidences of lag. - -- **Appropriate Write Concern** - - If you are performing a large data ingestion or bulk load operation - that requires a large number of writes to the primary, particularly - with :ref:`unacknowledged write concern `, the - secondaries will not be able to read the oplog fast enough to keep - up with changes. - - To prevent this, require :ref:`write acknowledgment or journaled - write concern ` after every 100, - 1,000, or an another interval to provide an opportunity for - secondaries to catch up with the primary. - - For more information see: - - - :ref:`replica-set-write-concern` - - :ref:`replica-set-oplog-sizing` - -.. _replica-set-troubleshooting-check-connection: - -Test Connections Between all Members -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -All members of a :term:`replica set` must be able to connect to every -other member of the set to support replication. Always verify -connections in both "directions." Networking topologies and firewall -configurations prevent normal and required connectivity, which can -block replication. - -Consider the following example of a bidirectional test of networking: - -.. example:: Given a replica set with three members running on three separate - hosts: - - - ``m1.example.net`` - - ``m2.example.net`` - - ``m3.example.net`` - - 1. Test the connection from ``m1.example.net`` to the other hosts - with the following operation set ``m1.example.net``: - - .. code-block:: sh - - mongo --host m2.example.net --port 27017 - - mongo --host m3.example.net --port 27017 - - #. Test the connection from ``m2.example.net`` to the other two - hosts with the following operation set from ``m2.example.net``, - as in: - - .. code-block:: sh - - mongo --host m1.example.net --port 27017 - - mongo --host m3.example.net --port 27017 - - You have now tested the connection between - ``m2.example.net`` and ``m1.example.net`` in both directions. - - #. Test the connection from ``m3.example.net`` to the other two - hosts with the following operation set from the - ``m3.example.net`` host, as in: - - .. code-block:: sh - - mongo --host m1.example.net --port 27017 - - mongo --host m2.example.net --port 27017 - - If any connection, in any direction fails, check your networking - and firewall configuration and reconfigure your environment to - allow these connections. - -.. _replica-set-troubleshooting-check-oplog-size: - -Check the Size of the Oplog -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A larger :term:`oplog` can give a replica set a greater tolerance for -lag, and make the set more resilient. - -To check the size of the oplog for a given :term:`replica set` member, -connect to the member in a :program:`mongo` shell and run the -:method:`db.printReplicationInfo()` method. - -The output displays the size of the oplog and the date ranges of the -operations contained in the oplog. In the following example, the oplog -is about 10MB and is able to fit about 26 hours (94400 seconds) of -operations: - -.. code-block:: javascript - - configured oplog size: 10.10546875MB - log length start to end: 94400 (26.22hrs) - oplog first event time: Mon Mar 19 2012 13:50:38 GMT-0400 (EDT) - oplog last event time: Wed Oct 03 2012 14:59:10 GMT-0400 (EDT) - now: Wed Oct 03 2012 15:00:21 GMT-0400 (EDT) - -The oplog should be long enough to hold all transactions for the -longest downtime you expect on a secondary. At a minimum, an oplog -should be able to hold minimum 24 hours of operations; however, many -users prefer to have 72 hours or even a week's work of operations. - -For more information on how oplog size affects operations, see: - -- The :ref:`replica-set-oplog-sizing` topic in the :doc:`/core/replication` document. -- The :ref:`replica-set-delayed-members` topic in this document. -- The :ref:`replica-set-replication-lag` topic in this document. - -.. note:: You normally want the oplog to be the same size on all - members. If you resize the oplog, resize it on all members. - -To change oplog size, see :ref:`replica-set-procedure-change-oplog-size` -in this document or see the :doc:`/tutorial/change-oplog-size` tutorial. - - -.. index:: pair: replica set; failover -.. _replica-set-failover-administration: -.. _failover: - -Failover and Recovery -~~~~~~~~~~~~~~~~~~~~~ - -.. TODO Revisit whether this belongs in troubleshooting. Perhaps this - should be an H2 before troubleshooting. - -Replica sets feature automated failover. If the :term:`primary` -goes offline or becomes unresponsive and a majority of the original -set members can still connect to each other, the set will elect a new -primary. - -While :term:`failover` is automatic, :term:`replica set` -administrators should still understand exactly how this process -works. This section below describe failover in detail. - -In most cases, failover occurs without administrator intervention -seconds after the :term:`primary` either steps down, becomes inaccessible, -or becomes otherwise ineligible to act as primary. If your MongoDB deployment -does not failover according to expectations, consider the following -operational errors: - -- No remaining member is able to form a majority. This can happen as a - result of network partitions that render some members - inaccessible. Design your deployment to ensure that a majority of - set members can elect a primary in the same facility as core - application systems. - -- No member is eligible to become primary. Members must have a - :data:`~local.system.replset.members[n].priority` setting greater than ``0``, have a state - that is less than ten seconds behind the last operation to the - :term:`replica set`, and generally be *more* up to date than the - voting members. - -In many senses, :ref:`rollbacks ` represent a -graceful recovery from an impossible failover and recovery situation. - -Rollbacks occur when -a primary accepts writes that other members of -the set do not successfully replicate before the primary steps -down. When the former primary begins replicating again it performs a -"rollback." Rollbacks remove those operations from the instance that -were never replicated to the set so that the data set is in a -consistent state. The :program:`mongod` program writes rolled back -data to a :term:`BSON` file that you can view using -:program:`bsondump`, applied manually using :program:`mongorestore`. - -You can prevent rollbacks using a :ref:`replica acknowledged -` write concern. These write -operations require not only the :term:`primary` to acknowledge the -write operation, sometimes even the majority of the set to confirm the -write operation before returning. - - enabling :term:`write concern`. - -.. include:: /includes/seealso-elections.rst - -Oplog Entry Timestamp Error -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. todo:: link this topic to assertion 13290 once assertion guide exists. - -Consider the following error in :program:`mongod` output and logs: - -.. code-block:: javascript - - replSet error fatal couldn't query the local local.oplog.rs collection. Terminating mongod after 30 seconds. - [rsStart] bad replSet oplog entry? - -Often, an incorrectly typed value in the ``ts`` field in the last -:term:`oplog` entry causes this error. The correct data type is -Timestamp. - -Check the type of the ``ts`` value using the following two queries -against the oplog collection: - -.. code-block:: javascript - - db = db.getSiblingDB("local") - db.oplog.rs.find().sort({$natural:-1}).limit(1) - db.oplog.rs.find({ts:{$type:17}}).sort({$natural:-1}).limit(1) - -The first query returns the last document in the oplog, while the -second returns the last document in the oplog where the ``ts`` value -is a Timestamp. The :operator:`$type` operator allows you to select -:term:`BSON type ` 17, is the Timestamp data type. - -If the queries don't return the same document, then the last document in -the oplog has the wrong data type in the ``ts`` field. - -.. example:: - - If the first query returns this as the last oplog entry: - - .. code-block:: javascript - - { "ts" : {t: 1347982456000, i: 1}, - "h" : NumberLong("8191276672478122996"), - "op" : "n", - "ns" : "", - "o" : { "msg" : "Reconfig set", "version" : 4 } } - - And the second query returns this as the last entry where ``ts`` - has the ``Timestamp`` type: - - .. code-block:: javascript - - { "ts" : Timestamp(1347982454000, 1), - "h" : NumberLong("6188469075153256465"), - "op" : "n", - "ns" : "", - "o" : { "msg" : "Reconfig set", "version" : 3 } } - - Then the value for the ``ts`` field in the last oplog entry is of the - wrong data type. - -To set the proper type for this value and resolve this issue, -use an update operation that resembles the following: - -.. code-block:: javascript - - db.oplog.rs.update( { ts: { t:1347982456000, i:1 } }, - { $set: { ts: new Timestamp(1347982456000, 1)}}) - -Modify the timestamp values as needed based on your oplog entry. This -operation may take some period to complete because the update must -scan and pull the entire oplog into memory. - -Duplicate Key Error on ``local.slaves`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The *duplicate key on local.slaves* error, occurs when a -:term:`secondary` or :term:`slave` changes its hostname and the -:term:`primary` or :term:`master` tries to update its ``local.slaves`` -collection with the new name. The update fails because it contains the -same ``_id`` value as the document containing the previous hostname. The -error itself will resemble the following. - -.. code-block:: none - - exception 11000 E11000 duplicate key error index: local.slaves.$_id_ dup key: { : ObjectId('') } 0ms - -This is a benign error and does not affect replication operations on -the :term:`secondary` or :term:`slave`. - -To prevent the error from appearing, drop the ``local.slaves`` -collection from the :term:`primary` or :term:`master`, with the -following sequence of operations in the :program:`mongo` shell: - -.. code-block:: javascript - - use local - db.slaves.drop() - -The next time a :term:`secondary` or :term:`slave` polls the -:term:`primary` or :term:`master`, the :term:`primary` or :term:`master` -recreates the ``local.slaves`` collection. - -.. index:: replica set; network partitions -.. index:: replica set; elections -.. _replica-set-elections-and-network-partitions: - -Elections and Network Partitions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Members on either side of a network partition cannot see each other when -determining whether a majority is available to hold an election. - -That means that if a primary steps down and neither side of the -partition has a majority on its own, the set will not elect a new -primary and the set will become read only. To avoid this situation, -attempt to place a majority of instances in one data center with a -minority of instances in a secondary facility. - -.. see:: :ref:`replica-set-election-internals`. diff --git a/source/administration/sharded-clusters.txt b/source/administration/sharded-clusters.txt index d6b31f087ae..7c677a92a28 100644 --- a/source/administration/sharded-clusters.txt +++ b/source/administration/sharded-clusters.txt @@ -7,409 +7,45 @@ Sharded Cluster Administration .. default-domain:: mongodb -Sharding occurs within a :term:`sharded cluster`. A sharded cluster -consists of the following components: +The following tutorials provide instructions for administering +:term:`sharded clusters `. For conceptual topics, see +:doc:`/sharding`. -- :ref:`Shards `. Each shard is a separate - :program:`mongod` instance or :term:`replica set` that holds a portion - of the database collections. +Deploy Sharded Clusters +----------------------- -- :ref:`Config servers `. Each config server is - a :program:`mongod` instance that holds metadata about the cluster. - The metadata maps :term:`chunks ` to shards. +.. toctree:: + :maxdepth: 1 -- :ref:`mongos instances `. The :program:`mongos` - instances route the reads and writes to the shards. + /tutorial/deploy-shard-cluster + /tutorial/select-shard-key + /tutorial/shard-collection-with-a-hashed-shard-key + /tutorial/enable-authentication-in-sharded-cluster + /tutorial/view-sharded-cluster-configuration + /tutorial/add-shards-to-shard-cluster + /tutorial/convert-replica-set-to-replicated-shard-cluster -.. seealso:: +Sharded Cluster Maintenance and Administration +---------------------------------------------- - - For specific configurations, see :ref:`sharding-architecture`. +.. toctree:: + :maxdepth: 1 - - To set up sharded clusters, see :ref:`sharding-procedure-setup`. + /tutorial/administer-shard-tags + /tutorial/manage-sharded-cluster-config-server + /tutorial/manage-chunks-in-sharded-cluster + /tutorial/configure-sharded-cluster-balancer + /tutorial/manage-sharded-cluster-balancer + /tutorial/remove-shards-from-cluster -.. index:: sharding; shards -.. index:: shards -.. _sharding-shards: +.. seealso:: :ref:`backup-and-restore-sharded-clusters`. -Shards ------- +Manage Data in Sharded Clusters +------------------------------- -A shard is a container that holds a subset of a collection’s data. Each -shard is either a single :program:`mongod` instance or a :term:`replica -set`. In production, all shards should be replica sets. +.. toctree:: + :maxdepth: 1 -Applications do not access the shards directly. Instead, the -:ref:`mongos instances ` routes reads and writes from -applications to the shards. - -.. index:: sharding; config servers -.. index:: config servers -.. _sharding-config-server: - -Config Servers --------------- - -Config servers maintain the shard metadata in a config database. The -:term:`config database` stores the relationship between :term:`chunks -` and where they reside within a :term:`sharded cluster`. Without -a config database, the :program:`mongos` instances would be unable to -route queries or write operations within the cluster. - -Config servers *do not* run as replica sets. Instead, a :term:`cluster -` operates with a group of *three* config servers that use a -two-phase commit process that ensures immediate consistency and -reliability. - -For testing purposes you may deploy a cluster with a single -config server, but this is not recommended for production. - -.. warning:: - - If your cluster has a single config server, this - :program:`mongod` is a single point of failure. If the instance is - inaccessible the cluster is not accessible. If you cannot recover - the data on a config server, the cluster will be inoperable. - - **Always** use three config servers for production deployments. - -The actual load on configuration servers is small because each -:program:`mongos` instance maintains a cached copy of the configuration -database. MongoDB only writes data to the config server to: - -- create splits in existing chunks, which happens as data in - existing chunks exceeds the maximum chunk size. - -- migrate a chunk between shards. - -Additionally, all config servers must be available on initial setup -of a sharded cluster, each :program:`mongos` instance must be able -to write to the ``config.version`` collection. - -If one or two configuration instances become unavailable, the -cluster's metadata becomes *read only*. It is still possible to read -and write data from the shards, but no chunk migrations or splits will -occur until all three servers are accessible. At the same time, config -server data is only read in the following situations: - -- A new :program:`mongos` starts for the first time, or an existing - :program:`mongos` restarts. - -- After a chunk migration, the :program:`mongos` instances update - themselves with the new cluster metadata. - -If all three config servers are inaccessible, you can continue to use -the cluster as long as you don't restart the :program:`mongos` -instances until after config servers are accessible again. If you -restart the :program:`mongos` instances and there are no accessible -config servers, the :program:`mongos` would be unable to direct -queries or write operations to the cluster. - -Because the configuration data is small relative to the amount of data -stored in a cluster, the amount of activity is relatively low, and 100% -up time is not required for a functioning sharded cluster. As a result, -backing up the config servers is not difficult. Backups of config -servers are critical as clusters become totally inoperable when -you lose all configuration instances and data. Precautions to ensure -that the config servers remain available and intact are critical. - -.. note:: - - Configuration servers store metadata for a single sharded cluster. - You must have a separate configuration server or servers for each - cluster you administer. - -.. index:: mongos -.. _sharding-mongos: -.. _sharding-read-operations: - -Sharded Cluster Operations and ``mongos`` Instances ---------------------------------------------------- - -The :program:`mongos` program provides a single unified interface to a sharded -cluster for applications using MongoDB. Except for the selection of a -:term:`shard key`, application developers and administrators need not -consider any of the :ref:`internal details of sharding `. - -:program:`mongos` caches data from the :ref:`config server -`, and uses this to route operations from -applications and clients to the :program:`mongod` instances. -:program:`mongos` have no *persistent* state and consume -minimal system resources. - -The most common practice is to run :program:`mongos` instances on the -same systems as your application servers, but you can maintain -:program:`mongos` instances on the shards or on other dedicated -resources. - -.. note:: - - .. versionchanged:: 2.1 - - Some aggregation operations using the :dbcommand:`aggregate` - command (i.e. :method:`db.collection.aggregate()`,) will cause - :program:`mongos` instances to require more CPU resources than in - previous versions. This modified performance profile may dictate - alternate architecture decisions if you use the :term:`aggregation - framework` extensively in a sharded environment. - -.. _sharding-query-routing: - -Automatic Operation and Query Routing with ``mongos`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:program:`mongos` uses information from :ref:`config servers -` to route operations to the cluster as -efficiently as possible. In general, operations in a sharded -environment are either: - -1. Targeted at a single shard or a limited group of shards based on - the shard key. - -2. Broadcast to all shards in the cluster that hold documents in a - collection. - -When possible you should design your operations to be as targeted as -possible. Operations have the following targeting characteristics: - -- Query operations broadcast to all shards [#namespace-exception]_ - **unless** the :program:`mongos` can determine which shard or shard - stores this data. - - For queries that include the shard key, :program:`mongos` can target - the query at a specific shard or set of shards, if the portion - of the shard key included in the query is a *prefix* of the shard - key. For example, if the shard key is: - - .. code-block:: javascript - - { a: 1, b: 1, c: 1 } - - The :program:`mongos` program *can* route queries that include the full - shard key or either of the following shard key prefixes at a - specific shard or set of shards: - - .. code-block:: javascript - - { a: 1 } - { a: 1, b: 1 } - - Depending on the distribution of data in the cluster and the - selectivity of the query, :program:`mongos` may still have to - contact multiple shards [#possible-all]_ to fulfill these queries. - -- All :method:`insert() ` operations target to - one shard. - -- All single :method:`update() ` operations - target to one shard. This includes :term:`upsert` operations. - -- The :program:`mongos` broadcasts multi-update operations to every - shard. - -- The :program:`mongos` broadcasts :method:`remove() - ` operations to every shard unless the - operation specifies the shard key in full. - -While some operations must broadcast to all shards, you can improve -performance by using as many targeted operations as possible by -ensuring that your operations include the shard key. - -.. [#namespace-exception] If a shard does not store chunks from a - given collection, queries for documents in that collection are not - broadcast to that shard. - -.. [#a/c-as-a-case-of-a] In this example, a :program:`mongos` could - route a query that included ``{ a: 1, c: 1 }`` fields at a specific - subset of shards using the ``{ a: 1 }`` prefix. A :program:`mongos` - cannot route any of the following queries to specific shards - in the cluster: - - .. code-block:: javascript - - { b: 1 } - { c: 1 } - { b: 1, c: 1 } - -.. [#possible-all] :program:`mongos` will route some queries, even - some that include the shard key, to all shards, if needed. - -Sharded Query Response Process -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To route a query to a :term:`cluster `, -:program:`mongos` uses the following process: - -#. Determine the list of :term:`shards ` that must receive the query. - - In some cases, when the :term:`shard key` or a prefix of the shard - key is a part of the query, the :program:`mongos` can route the - query to a subset of the shards. Otherwise, the :program:`mongos` - must direct the query to *all* shards that hold documents for that - collection. - - .. example:: - - Given the following shard key: - - .. code-block:: javascript - - { zipcode: 1, u_id: 1, c_date: 1 } - - Depending on the distribution of chunks in the cluster, the - :program:`mongos` may be able to target the query at a subset of - shards, if the query contains the following fields: - - .. code-block:: javascript - - { zipcode: 1 } - { zipcode: 1, u_id: 1 } - { zipcode: 1, u_id: 1, c_date: 1 } - -#. Establish a cursor on all targeted shards. - - When the first batch of results returns from the cursors: - - a. For query with sorted results (i.e. using - :method:`cursor.sort()`) the :program:`mongos` instance performs a merge - sort of all queries. - - b. For a query with unsorted results, the :program:`mongos` instance returns - a result cursor that "round robins" results from all cursors on - the shards. - - .. versionchanged:: 2.0.5 - Before 2.0.5, the :program:`mongos` exhausted each cursor, - one by one. - -.. _sharding-security: - -Sharded Cluster Security Considerations ---------------------------------------- - -MongoDB controls access to :term:`sharded clusters ` -with key files that store authentication credentials. The components -of sharded clusters use the secret stored in the key files when -authenticating to each other. Create key files and then point your -:program:`mongos` and :program:`mongod` instances to the files, as -described later in this section. - -Beyond the :setting:`auth` mechanisms described in this section, -always run your sharded clusters in trusted networking environments -that limit access to the cluster with network rules. Your networking -environments should enforce restrictions that ensure only known -traffic reaches your :program:`mongos` and :program:`mongod` -instances. - -This section describes authentication specific to sharded -clusters. For information on authentication across MongoDB, see -:ref:`security-authentication`. - -Access Control Privileges in Sharded Clusters -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In sharded clusters, MongoDB provides separate administrative -privileges for the sharded cluster and for each shard. Beyond these -administration privileges, privileges for sharded cluster deployments -are functionally the same as any other MongoDB deployment. See, -:ref:`security-authentication` for more information. - -For sharded clusters, MongoDB provides these separate administrative -privileges: - -- Administrative privileges for the sharded cluster. These privileges - provide read-and-write access to the config servers' ':term:`admin - `. These users can run all administrative commands. - Administrative privileges also give the user read-and-write access - to all the cluster's databases. - - The credentials for administrative privileges on the cluster reside on - the config servers. To receive admin access to the cluster, you must - authenticate a session while connected to a :program:`mongos` instance - using the admin database. - -- Administrative privileges for the :program:`mongod` instance, or - :term:`replica set`, that provides each individual shard. Each shard - has its own admin database that stores administrative credentials - and access for that shard only. These credentials are *completely* - distinct from the cluster-wide administrative credentials. - - As with all :program:`mongod` instances, MongoDB provides two types - of administrative privileges for a shard: - - - Normal administrative privileges, which provide read-and-write - access to the admin database and access to all administrative - commands, and which provide read-and-write access to all other - databases on that shard. - - - Read-only administrative privileges, which provide read-only access - to the admin database and to all other databases on that shard. - -Also, as with all :program:`mongod` instances, a MongoDB sharded cluster -provides the following non-administrative user privileges: - -- Normal privileges, which provide read-and-write access to a specific - database. Users with normal privilege can add users to the database. - -- Read-only privileges, which provide read-only access to a specific - database. - -For more information on privileges, see :ref:`security-authentication`. - -Enable Authentication in a Sharded Cluster -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 2.0 - Support for authentication with sharded clusters. - -To control access to a sharded cluster, create key files and then set -the :setting:`keyFile` option on *all* components of the sharded -cluster, including all :program:`mongos` instances, all config server -:program:`mongod` instances, and all shard :program:`mongod` -instances. The content of the key file is arbitrary but must be the -same on all cluster members. - -To enable authentication, do the following: - -1. Generate a key file to store authentication information, as described - in the :ref:`generate-key-file` section. - -#. On each component in the sharded cluster, enable authentication by - doing one of the following: - - - In the configuration file, set the :setting:`keyFile` option to the - key file's path and then start the component, as in the following - example: - - .. code-block:: cfg - - keyFile = /srv/mongodb/keyfile - - - When starting the component, set :option:`--keyFile ` option, - which is an option for both :program:`mongos` instances and - :program:`mongod` instances. Set the :option:`--keyFile ` - to the key file's path. - - .. note:: - - The :setting:`keyFile` setting implies :setting:`auth`, which - means in most cases you do not need to set :setting:`auth` - explicitly. - -#. Add the first administrative user and then add subsequent users. See - :ref:`control-access-add-users`. - -Access a Sharded Cluster with Authentication -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To access a sharded cluster as an authenticated user, use the -appropriate authentication options in :program:`mongo`. - -To access a sharded cluster as an authenticated, non-admin user, see -either of the following: - -- :dbcommand:`authenticate` - -- :method:`db.auth()` - -To terminate an authenticated session, see the :dbcommand:`logout` -command. + /core/tag-aware-sharding + /tutorial/enforce-unique-keys-for-sharded-collections + /tutorial/shard-gridfs-data diff --git a/source/administration/tag-aware-sharding.txt b/source/administration/tag-aware-sharding.txt deleted file mode 100644 index d8cbd5744a6..00000000000 --- a/source/administration/tag-aware-sharding.txt +++ /dev/null @@ -1,186 +0,0 @@ -.. _tag-aware-sharding: - -================== -Tag Aware Sharding -================== - -.. default-domain:: mongodb - -For sharded clusters, MongoDB makes it possible to associate specific -ranges of a :term:`shard key` with a specific :term:`shard` or subset -of shards. This association dictates the policy of the cluster -balancer process as it balances the :term:`chunks ` around the -cluster. This capability enables the following deployment patterns: - -- isolating a specific subset of data on specific set of shards. - -- controlling the balancing policy so that in a geographically - distributed cluster the most relevant portions of the data set - reside on the shards with greatest proximity to the application - servers. - -This document describes the behavior, operation, and use of tag aware -sharding in MongoDB deployments. - -.. note:: - - Shard key range tags are entirely distinct from :ref:`replica set member - tags `. - -:term:`Hash-based sharding ` does not support -tag-aware sharding. - -Behavior and Operations ------------------------ - -Tags in a sharded cluster are pieces of metadata that dictate the -policy and behavior of the cluster :term:`balancer`. Using -tags, you may associate individual shards in a cluster with one or -more tags. Then, you can assign this tag string to a range -of :term:`shard key` values for a sharded collection. When migrating a -chunk, the balancer will select a destination shard based on the -configured tag ranges. - -The balancer migrates chunks in tagged ranges to shards with those -tags, if tagged shards are not balanced. [#specific-tagged-migrations]_ - -.. note:: - - Because a single chunk may span different tagged shard key ranges, - the balancer may migrate chunks to tagged shards that contain - values that exceed the upper bound of the selected tag range. - -.. example:: - - Given a sharded collection with two configured tag ranges, such - that: - - - :term:`Shard key` values between ``100`` and ``200`` have tags to - direct corresponding chunks to shards tagged ``NYC``. - - - Shard Key values between ``200`` and ``300`` have tags to direct - corresponding chunks to shards tagged ``SFO``. - - In this cluster, the balancer will migrate a chunk with shard key - values ranging between ``150`` and ``220`` to a shard tagged - ``NYC``, since ``150`` is closer to ``200`` than ``300``. - -After configuring tags on shards and ranges of the shard key, the -cluster may take some time to reach the proper distribution of data, -depending on the division of chunks (i.e. splits) and the current -distribution of data in the cluster. Once configured, the balancer -will respect tag ranges during future :ref:`balancing rounds -`. - -.. [#specific-tagged-migrations] To migrate chunks in a tagged - environment, the balancer selects a target shard with a tag range - that has an *upper* bound that is *greater than* the migrating - chunk's *lower* bound. If a shard with a matching tagged range - exists, the balancer will migrate the chunk to that shard. - - - -Administer Shard Tags ---------------------- - -Associate tags with a particular shard using the -:method:`sh.addShardTag()` method when connected to a -:program:`mongos` instance. A single shard may have multiple tags, and -multiple shards may also have the same tag. - -.. example:: - - The following example adds the tag ``NYC`` to two shards, and the tags - ``SFO`` and ``NRT`` to a third shard: - - .. code-block:: javascript - - sh.addShardTag("shard0000", "NYC") - sh.addShardTag("shard0001", "NYC") - sh.addShardTag("shard0002", "SFO") - sh.addShardTag("shard0002", "NRT") - -You may remove tags from a particular shard using the -:method:`sh.removeShardTag()` method when connected to a -:program:`mongos` instance, as in the following example, which removes -the ``NRT`` tag from a shard: - -.. code-block:: javascript - - sh.removeShardTag("shard0002", "NRT") - -Tag a Shard Key Range -~~~~~~~~~~~~~~~~~~~~~ - -To assign a tag to a range of shard keys use the -:method:`sh.addTagRange()` method when connected to a -:program:`mongos` instance. Any given shard key range may only have -*one* assigned tag. You cannot overlap defined ranges, or tag the same -range more than once. - -.. example:: - - Given a collection named ``users`` in the ``records`` database, - sharded by the ``zipcode`` field. The following operations assign: - - - two ranges of zip codes in Manhattan and Brooklyn the ``NYC`` tag - - - one range of zip codes in San Francisco the ``SFO`` tag - - .. code-block:: javascript - - sh.addTagRange("records.users", { zipcode: "10001" }, { zipcode: "10281" }, "NYC") - sh.addTagRange("records.users", { zipcode: "11201" }, { zipcode: "11240" }, "NYC") - sh.addTagRange("records.users", { zipcode: "94102" }, { zipcode: "94135" }, "SFO") - -.. note:: - - Shard rages are always inclusive of the lower value and exclusive - of the upper boundary. - -Remove a Tag From a Shard Key Range -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :program:`mongod` does not provide a helper for removing a tag -range. You may delete tag assignment from a shard key range by removing -the corresponding document from the :data:`~config.tags` collection of -the ``config`` database. - -Each document in the :data:`~config.tags` holds the :term:`namespace` -of the sharded collection and a minimum shard key value. - -.. example:: - - The following example removes the ``NYC`` tag assignment for the - range of zip codes within Manhattan: - - .. code-block:: javascript - - use config - db.tags.remove({ _id: { ns: "records.users", min: { zipcode: "10001" }}, tag: "NYC" }) - -View Existing Shard Tags -~~~~~~~~~~~~~~~~~~~~~~~~ - -The output from :method:`sh.status()` lists tags associated with a -shard, if any, for each shard. A shard's tags exist in the shard's -document in the :data:`~config.shards` collection of the ``config`` -database. To return all shards with a specific tag, use a sequence of -operations that resemble the following, which will return only those -shards tagged with ``NYC``: - -.. code-block:: javascript - - use config - db.shards.find({ tags: "NYC" }) - -You can find tag ranges for all :term:`namespaces ` in the -:data:`~config.tags` collection of the ``config`` database. The output -of :method:`sh.status()` displays all tag ranges. To return all shard -key ranges tagged with ``NYC``, use the following sequence of -operations: - -.. code-block:: javascript - - use config - db.tags.find({ tags: "NYC" }) diff --git a/source/administration/vulnerability-notification.txt b/source/administration/vulnerability-notification.txt deleted file mode 100644 index fdc2bbb1202..00000000000 --- a/source/administration/vulnerability-notification.txt +++ /dev/null @@ -1,92 +0,0 @@ -========================== -Vulnerability Notification -========================== - -.. default-domain:: mongodb - -`10gen `_ values the privacy and security of all -users of MongoDB, and we work very hard to ensure that MongoDB and -related tools minimize risk exposure and increase the security and -integrity of data and environments using MongoDB. - -Notification ------------- - -If you believe you have discovered a vulnerability in MongoDB or a -related product or have experienced a security incident related to -MongoDB, please report these issues so that 10gen can respond -appropriately and work to prevent additional issues in the -future. All vulnerability reports should contain as much information -as possible so that we can move quickly to resolve the issue. -In particular, please include the following: - -- The name of the product. - -- *Common Vulnerability* information, if applicable, including: - - - CVSS (Common Vulnerability Scoring System) Score. - - - CVE (Common Vulnerability and Exposures) Identifier. - -- Contact information, including an email address and/or phone number, - if applicable. - -10gen will respond to all vulnerability notifications within -48 hours. - -Jira -~~~~ - -10gen prefers `jira.mongodb.org `_ for all -communication regarding MongoDB and related products. - -Submit a ticket in the :issue:`Core Server Security `" -project, at: . The ticket -number will become reference identification for the issue for the -lifetime of the issue, and you can use this identifier for tracking -purposes. - -10gen will respond to any vulnerability notification received in a -Jira case posted to the :issue:`SECURITY` project. - -Email -~~~~~ - -While Jira is the preferred communication vector, you may also report -vulnerabilities via email to . - -You may encrypt email using our `public key -`_, to ensure the privacy -of any sensitive information in your vulnerability report. - -10gen will respond to any vulnerability notification received via -email with a response email that contains a reference number for a Jira ticket -posted to the :issue:`SECURITY` project. - -Evaluation -~~~~~~~~~~ - -10gen will validate all submitted vulnerabilities. 10gen will use Jira -to track all communications regarding the vulnerability, which may -include requests for clarification and for additional information. If -needed 10gen representatives can set up a conference call to exchange -information regarding the vulnerability. - -Disclosure -~~~~~~~~~~ - -10gen requests that you do *not* publicly disclose any information -regarding the vulnerability or exploit the issue until 10gen has had the -opportunity to analyze the vulnerability, respond to the notification, -and to notify key users, customers, and partners if needed. - -The amount of time required to validate a reported vulnerability -depends on the complexity and severity of the issue. 10gen takes all -required vulnerabilities very seriously and will always ensure that -there is a clear and open channel of communication with the reporter -of the vulnerability. - -After validating the issue, 10gen will coordinate public disclosure of -the issue with the reporter in a mutually agreed timeframe and -format. If required or requested, the reporter of a vulnerability will -receive credit in the published security bulletin. diff --git a/source/aggregation.txt b/source/aggregation.txt index addc6770106..096d37b28b2 100644 --- a/source/aggregation.txt +++ b/source/aggregation.txt @@ -7,7 +7,7 @@ Aggregation .. default-domain:: mongodb In version 2.2, MongoDB introduced the :doc:`aggregation framework -` that provides a powerful and flexible set +` that provides a powerful and flexible set of tools to use for many data aggregation tasks. If you're familiar with data aggregation in SQL, consider the :doc:`/reference/sql-aggregation-comparison` document as an introduction to some of the basic concepts in the aggregation framework. Consider the @@ -16,10 +16,10 @@ full documentation of the aggregation framework here: .. toctree:: :maxdepth: 2 - applications/aggregation + core/aggregation tutorial/aggregation-examples reference/aggregation - applications/map-reduce + core/map-reduce In addition to the aggregation framework, MongoDB provides simple :doc:`aggregation methods and commands `, diff --git a/source/applications.txt b/source/applications.txt index 1faea719041..f7dba729367 100644 --- a/source/applications.txt +++ b/source/applications.txt @@ -31,15 +31,20 @@ The following documents outline basic application development topics: applications/drivers applications/optimization - applications/server-side-javascript core/capped-collections + core/server-side-javascript + tutorial/store-javascript-function-on-server .. seealso:: - - :doc:`/applications/replication` + - :doc:`/core/read-preference` + - :doc:`/core/write-concern` - :doc:`/applications/indexes` - - :doc:`/applications/aggregation` - - :doc:`/applications/map-reduce` + - :doc:`/core/aggregation` + - :doc:`/core/map-reduce` + - :doc:`/tutorial/perform-incremental-map-reduce` + - :doc:`/tutorial/troubleshoot-map-function` + - :doc:`/tutorial/troubleshoot-reduce-function` - :doc:`/reference/connection-string` .. _application-patterns: diff --git a/source/applications/geospatial-indexes.txt b/source/applications/geospatial-indexes.txt index 23e4d782c5c..096856f2c30 100644 --- a/source/applications/geospatial-indexes.txt +++ b/source/applications/geospatial-indexes.txt @@ -6,7 +6,7 @@ Geospatial Indexes and Queries MongoDB offers a number of indexes and query mechanisms to handle geospatial information. This section introduces MongoDB's geospatial -features. +features. Surfaces -------- @@ -22,7 +22,7 @@ MongoDB offers two surface types: To calculate geometry over an Earth-like sphere, store your location data on a spherical surface and use :doc:`2dsphere - ` index. + ` index. Store your location data as GeoJSON objects with this coordinate-axis order: **longitude, latitude**. The coordinate reference system for @@ -31,7 +31,7 @@ MongoDB offers two surface types: - **Flat** To calculate distances on a Euclidean plane, store your location data - as legacy coordinate pairs and use a :doc:`2d ` index. + as legacy coordinate pairs and use a :doc:`2d ` index. .. _geo-overview-location-data: @@ -88,7 +88,7 @@ Geospatial Indexes MongoDB provides the following geospatial index types to support the geospatial queries: -- :doc:`2dsphere `, which supports: +- :doc:`2dsphere `, which supports: - Calculations on a sphere @@ -100,7 +100,7 @@ geospatial queries: .. versionadded:: 2.4 ``2dsphere`` indexes are not available before version 2.4. -- :doc:`2d `, which supports: +- :doc:`2d `, which supports: - Calculations using flat geometry @@ -109,16 +109,19 @@ geospatial queries: - A compound index with only one additional field, as a suffix of the ``2d`` index field + +Additional Resources +-------------------- -See the following pages for complete documentation of geospatial indexes -and queries: +Consider the following pages for complete documentation of geospatial +indexes and queries: .. toctree:: :maxdepth: 1 - /applications/2d - /applications/2dsphere - /applications/geohaystack + /core/2d + /core/2dsphere + /core/geohaystack /reference/geospatial-queries /tutorial/calculate-distances-using-spherical-geometry-with-2d-geospatial-indexes /core/geospatial-indexes diff --git a/source/applications/indexes.txt b/source/applications/indexes.txt index 2a9019408d4..9b1e7c76887 100644 --- a/source/applications/indexes.txt +++ b/source/applications/indexes.txt @@ -4,14 +4,6 @@ Indexing Strategies .. default-domain:: mongodb -This document provides strategies for indexing in MongoDB. For -fundamentals of MongoDB indexing, see :doc:`/core/indexes`. For -operational guidelines and procedures, see -:doc:`/administration/indexes`. - -Strategies ----------- - The best indexes for your application are based on a number of factors, including the kinds of queries you expect, the ratio of reads to writes, and the amount of free memory on your system. @@ -32,420 +24,16 @@ of index configurations with data sets similar to the ones you'll be running in production to see which configurations perform best. MongoDB can only use *one* index to support any given -operation. However, each clause of an :operator:`$or` query may use -a different index. - -.. _indexes-create-to-match-queries: - -Create Indexes to Support Your Queries --------------------------------------- - -If you only ever query on a single key in a given collection, then you need -to create just one single-key index for that collection. For example, you -might create an index on ``category`` in the ``product`` collection: - -.. code-block:: javascript - - db.products.ensureIndex( { "category": 1 } ) - -However, if you sometimes query on only one key and at other times -query on that key combined with a second key, then creating a -:ref:`compound index ` is more efficient. MongoDB -will use the compound index for both queries. For example, you might -create an index on both ``category`` and ``item``. - -.. code-block:: javascript - - db.products.ensureIndex( { "category": 1, "item": 1 } ) - -This allows you both options. You can query on just ``category``, and -you also can query on ``category`` combined with ``item``. -(To query on multiple keys and sort the results, see :ref:`index-sort`.) - -With the exception of queries that use the :operator:`$or` operator, a -query does not use multiple indexes. A query uses only one index. - -.. _compound-key-indexes: -.. _indexes-compound-key-indexes: - -Use Compound Indexes to Support Several Different Queries ---------------------------------------------------------- - -A single :ref:`compound index ` on multiple fields -can support all the queries that search a "prefix" subset of those fields. - -.. example:: - - The following index on a collection: - - .. code-block:: javascript - - { x: 1, y: 1, z: 1 } - - Can support queries that the following indexes support: - - .. code-block:: javascript - - { x: 1 } - { x: 1, y: 1 } - - There are some situations where the prefix indexes may offer better - query performance: for example if ``z`` is a large array. - - The ``{ x: 1, y: 1, z: 1 }`` index can also support many of the same - queries as the following index: - - .. code-block:: javascript - - { x: 1, z: 1 } - - Also, ``{ x: 1, z: 1 }`` has an additional use. Given the following - query: - - .. code-block:: javascript - - db.collection.find( { x: 5 } ).sort( { z: 1} ) - - The ``{ x: 1, z: 1 }`` index supports both the query and the sort - operation, while the ``{ x: 1, y: 1, z: 1 }`` index only supports - the query. For more information on sorting, see - :ref:`sorting-with-indexes`. - -.. _covered-queries: -.. _indexes-covered-queries: - -Create Indexes that Support Covered Queries -------------------------------------------- - -A covered query is a query in which: - -- all the fields in the :ref:`query ` - are part of an index, **and** - -- all the fields returned in the results are in the same index. - -Because the index "covers" the query, MongoDB can both match the -:ref:`query conditions ` **and** return -the results using only the index; MongoDB does not need to look at -the documents, only the index, to fulfill the query. - -Querying *only* the index can be much faster than querying documents -outside of the index. Index keys are typically smaller than the -documents they catalog, and indexes are typically available in RAM or -located sequentially on disk. - -MongoDB automatically uses an index that covers a query when possible. -To ensure that an index can *cover* a query, create an index that -includes all the fields listed in the :ref:`query document -` and in the query result. You can -specify the fields to return in the query results with a -:ref:`projection ` document. By default, MongoDB includes -the ``_id`` field in the query result. So, if the index does **not** -include the ``_id`` field, then you must exclude the ``_id`` field -(i.e. ``_id: 0``) from the query results. - -.. example:: - - Given collection ``users`` with an index on the fields ``user`` and - ``status``, as created by the following option: - - .. code-block:: javascript - - db.users.ensureIndex( { status: 1, user: 1 } ) - - Then, this index will cover the following query which selects on - the ``status`` field and returns only the ``user`` field: - - .. code-block:: javascript - - db.users.find( { status: "A" }, { user: 1, _id: 0 } ) - - In the operation, the projection document explicitly specifies - ``_id: 0`` to exclude the ``_id`` field from the result since the - index is only on the ``status`` and the ``user`` fields. - - If the projection document does not specify the exclusion of the - ``_id`` field, the query returns the ``_id`` field. The following - query is **not** covered by the index on the ``status`` and the - ``user`` fields because with the projection document ``{ user: 1 - }``, the query returns both the ``user`` field and the ``_id`` field: - - .. code-block:: javascript - - db.users.find( { status: "A" }, { user: 1 } ) - -An index **cannot** cover a query if: - -- any of the indexed fields in any of the documents in the collection - includes an array. If an indexed field is an array, the index becomes - a :ref:`multi-key index ` index and cannot - support a covered query. - -- any of the indexed fields are fields in subdocuments. To index fields - in subdocuments, use :term:`dot notation`. For example, consider - a collection ``users`` with documents of the following form: - - .. code-block:: javascript - - { _id: 1, user: { login: "tester" } } - - The collection has the following indexes: - - .. code-block:: none - - { user: 1 } - - { "user.login": 1 } - - The ``{ user: 1 }`` index covers the following query: - - .. code-block:: none - - db.users.find( { user: { login: "tester" } }, { user: 1, _id: 0 } ) - - However, the ``{ "user.login": 1 }`` index does **not** cover the - following query: - - .. code-block:: none - - db.users.find( { "user.login": "tester" }, { "user.login": 1, _id: 0 } ) - - The query, however, does use the ``{ "user.login": 1 }`` index to - find matching documents. - -To determine whether a query is a covered query, use the -:method:`~cursor.explain()` method. If the :method:`~cursor.explain()` -output displays ``true`` for the :data:`~explain.indexOnly` field, the query is -covered by an index, and MongoDB queries only that index to match the -query **and** return the results. - -For more information see :ref:`indexes-measuring-use`. - -.. _index-sort: -.. _sorting-with-indexes: - -Use Indexes to Sort Query Results ---------------------------------- - -For the fastest performance when sorting query results by a given field, -create a sorted index on that field. - -To sort query results on multiple fields, create a :ref:`compound -index `. MongoDB sorts results based on the field -order in the index. For queries that include a sort that uses a -compound index, ensure that all fields before the first sorted field -are equality matches. - -.. example:: - - If you create the following index: - - .. code-block:: javascript - - { a: 1, b: 1, c: 1, d: 1 } - - The following query and sort operations can use the index: - - .. code-block:: javascript - - db.collection.find().sort( { a:1 } ) - db.collection.find().sort( { a:1, b:1 } ) - - db.collection.find( { a:4 } ).sort( { a:1, b:1 } ) - db.collection.find( { b:5 } ).sort( { a:1, b:1 } ) - - db.collection.find( { a:5 } ).sort( { b:1, c:1 } ) - - db.collection.find( { a:5, c:4, b:3 } ).sort( { d:1 } ) - - db.collection.find( { a: { $gt:4 } } ).sort( { a:1, b:1 } ) - db.collection.find( { a: { $gt:5 } } ).sort( { a:1, b:1 } ) - - db.collection.find( { a:5, b:3, d:{ $gt:4 } } ).sort( { c:1 } ) - db.collection.find( { a:5, b:3, c:{ $lt:2 }, d:{ $gt:4 } } ).sort( { c:1 } ) - - However, the following queries cannot sort the results using the - index: - - .. code-block:: javascript - - db.collection.find().sort( { b:1 } ) - db.collection.find( { b:5 } ).sort( { b:1 } ) - -.. note:: - - For in-memory sorts that do not use an index, the :method:`sort() - ` operation is significantly slower. The - :method:`~cursor.sort()` operation will abort when it uses 32 - megabytes of memory. - -.. _indexes-ensure-indexes-fit-ram: - -Ensure Indexes Fit RAM ----------------------- - -For the fastest processing, ensure that your indexes fit entirely in RAM so -that the system can avoid reading the index from disk. - -To check the size of your indexes, use the -:method:`db.collection.totalIndexSize()` helper, which returns data in -bytes: - -.. code-block:: javascript - - > db.collection.totalIndexSize() - 4294976499 - -The above example shows an index size of almost 4.3 gigabytes. To ensure -this index fits in RAM, you must not only have more than that much RAM -available but also must have RAM available for the rest of the -:term:`working set`. Also remember: - -If you have and use multiple collections, you must consider the size -of all indexes on all collections. The indexes and the working set must be able to -fit in memory at the same time. - -There are some limited cases where indexes do not need -to fit in memory. See :ref:`indexing-right-handed`. - -.. seealso:: For additional :doc:`collection statistics - `, use :dbcommand:`collStats` or - :method:`db.collection.stats()`. - -.. _indexing-right-handed: - -Indexes that Hold Only Recent Values in RAM -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Indexes do not have to fit *entirely* into RAM in all cases. If the -value of the indexed field increments with every insert, and most queries -select recently added documents; then MongoDB only needs to keep the -parts of the index that hold the most recent or "right-most" values in -RAM. This allows for efficient index use for read and write -operations and minimize the amount of RAM required to support the -index. - -.. _index-selectivity: - -Create Queries that Ensure Selectivity --------------------------------------- - -Selectivity is the ability of a query to narrow results using the index. -Effective indexes are more selective and allow MongoDB to use the index -for a larger portion of the work associated with fulfilling the query. - -To ensure selectivity, -write queries that limit the number of possible documents with the -indexed field. Write queries that are appropriately selective relative -to your indexed data. - -.. example:: - - Suppose you have a field called ``status`` where the possible values - are ``new`` and ``processed``. If you add an index on ``status`` - you've created a low-selectivity index. The index will - be of little help in locating records. - - A better strategy, depending on your queries, would be to create a - :ref:`compound index ` that includes the - low-selectivity field and another field. For example, you could - create a compound index on ``status`` and ``created_at.`` - - Another option, again depending on your use case, might be to use - separate collections, one for each status. - -.. example:: - - Consider an index ``{ a : 1 }`` (i.e. an index on the key ``a`` - sorted in ascending order) on a collection where ``a`` has three - values evenly distributed across the collection: - - .. code-block:: javascript - - { _id: ObjectId(), a: 1, b: "ab" } - { _id: ObjectId(), a: 1, b: "cd" } - { _id: ObjectId(), a: 1, b: "ef" } - { _id: ObjectId(), a: 2, b: "jk" } - { _id: ObjectId(), a: 2, b: "lm" } - { _id: ObjectId(), a: 2, b: "no" } - { _id: ObjectId(), a: 3, b: "pq" } - { _id: ObjectId(), a: 3, b: "rs" } - { _id: ObjectId(), a: 3, b: "tv" } - - If you query for ``{ a: 2, b: "no" }`` MongoDB must scan 3 - :term:`documents ` in the collection to return the one - matching result. Similarly, a query for ``{ a: { $gt: 1}, b: "tv" }`` - must scan 6 documents, also to return one result. - - Consider the same index on a collection where ``a`` has *nine* values - evenly distributed across the collection: - - .. code-block:: javascript - - { _id: ObjectId(), a: 1, b: "ab" } - { _id: ObjectId(), a: 2, b: "cd" } - { _id: ObjectId(), a: 3, b: "ef" } - { _id: ObjectId(), a: 4, b: "jk" } - { _id: ObjectId(), a: 5, b: "lm" } - { _id: ObjectId(), a: 6, b: "no" } - { _id: ObjectId(), a: 7, b: "pq" } - { _id: ObjectId(), a: 8, b: "rs" } - { _id: ObjectId(), a: 9, b: "tv" } - - If you query for ``{ a: 2, b: "cd" }``, MongoDB must scan only one - document to fulfill the query. The index and query are more selective - because the values of ``a`` are evenly distributed *and* the query - can select a specific document using the index. - - However, although the index on ``a`` is more selective, a query such - as ``{ a: { $gt: 5 }, b: "tv" }`` would still need to scan 4 - documents. - - .. todo:: is there an answer to that last "However" paragraph? - -If overall selectivity is low, and if MongoDB must read a number of -documents to return results, then some queries may perform faster -without indexes. To determine performance, see -:ref:`indexes-measuring-use`. - -Consider Performance when Creating Indexes for Write-heavy Applications ------------------------------------------------------------------------ - -If your application is write-heavy, then be careful when creating new -indexes, since each additional index with impose a -write-performance penalty. In general, don't be careless about adding -indexes. Add indexes to complement your queries. Always have -a good reason for adding a new index, and be sure to benchmark -alternative strategies. - -Consider Insert Throughput -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. todo:: insert link to /source/core/write-operations when that page is complete. - Do we want to link to write concern? -bg - -MongoDB must update *all* indexes associated with a collection after -every insert, update, or delete operation. For update operations, if -the updated document does not move to a new location, then MongoDB only -modifies the updated fields in the index. Therefore, every index on a -collection adds some amount of overhead to these write operations. In -almost every case, the performance gains that indexes realize for read -operations are worth the insertion penalty. However, in some cases: - -- An index to support an infrequent query might incur more - insert-related costs than savings in read-time. - - .. todo:: How do you determine if the above is the case? - Empirically. +operation. However, each clause of an :operator:`$or` query may use a +different index. -- If you have many indexes on a collection with a high insert throughput - and a number of related indexes, you may find better overall - performance with a smaller number of indexes, even if some queries - are less optimally supported by an index. +The following topics describe indexing strategies: - .. todo:: The above is unclear. -bg +.. toctree:: + :maxdepth: 1 -- If your indexes and queries are not sufficiently :ref:`selective - `, the speed improvements for query operations - may not offset the costs of maintaining an index. For more - information see :ref:`index-selectivity`. + /tutorial/create-indexes-to-support-queries + /tutorial/create-sorted-indexes + /tutorial/ensure-indexes-fit-ram + /tutorial/create-queries-that-ensure-selectivity + /tutorial/consider-performance-when-creating-indexes diff --git a/source/applications/map-reduce.txt b/source/applications/map-reduce.txt deleted file mode 100644 index 8ba90bc8159..00000000000 --- a/source/applications/map-reduce.txt +++ /dev/null @@ -1,601 +0,0 @@ -========== -Map-Reduce -========== - -.. default-domain:: mongodb - -Map-reduce operations can handle complex aggregation tasks. To perform -map-reduce operations, MongoDB provides the :dbcommand:`mapReduce` -command and, in the :program:`mongo` shell, the -:method:`db.collection.mapReduce()` wrapper method. - -For many simple aggregation tasks, see the :doc:`aggregation framework -`. - -.. _map-reduce-examples: - -Map-Reduce Examples -------------------- - -This section provides some map-reduce examples in the :program:`mongo` -shell using the :method:`db.collection.mapReduce()` method: - -.. code-block:: javascript - - db.collection.mapReduce( - , - , - { - out: , - query: , - sort: , - limit: , - finalize: , - scope: , - jsMode: , - verbose: - } - ) - -For more information on the parameters, see the -:method:`db.collection.mapReduce()` reference page . - -.. include:: /includes/examples-map-reduce.rst - :start-after: map-reduce-document-prototype-begin - -.. _map-reduce-incremental: - -Incremental Map-Reduce ----------------------- - -If the map-reduce dataset is constantly growing, then rather than -performing the map-reduce operation over the entire dataset each time -you want to run map-reduce, you may want to perform an incremental -map-reduce. - -To perform incremental map-reduce: - -#. Run a map-reduce job over the current collection and output the - result to a separate collection. - -#. When you have more data to process, run subsequent map-reduce job - with: - - - the ``query`` parameter that specifies conditions that match - *only* the new documents. - - - the ``out`` parameter that specifies the ``reduce`` action to - merge the new results into the existing output collection. - -Consider the following example where you schedule a map-reduce -operation on a ``sessions`` collection to run at the end of each day. - -Data Setup -~~~~~~~~~~ - -The ``sessions`` collection contains documents that log users' session -each day, for example: - -.. code-block:: javascript - - db.sessions.save( { userid: "a", ts: ISODate('2011-11-03 14:17:00'), length: 95 } ); - db.sessions.save( { userid: "b", ts: ISODate('2011-11-03 14:23:00'), length: 110 } ); - db.sessions.save( { userid: "c", ts: ISODate('2011-11-03 15:02:00'), length: 120 } ); - db.sessions.save( { userid: "d", ts: ISODate('2011-11-03 16:45:00'), length: 45 } ); - - db.sessions.save( { userid: "a", ts: ISODate('2011-11-04 11:05:00'), length: 105 } ); - db.sessions.save( { userid: "b", ts: ISODate('2011-11-04 13:14:00'), length: 120 } ); - db.sessions.save( { userid: "c", ts: ISODate('2011-11-04 17:00:00'), length: 130 } ); - db.sessions.save( { userid: "d", ts: ISODate('2011-11-04 15:37:00'), length: 65 } ); - -Initial Map-Reduce of Current Collection -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Run the first map-reduce operation as follows: - -#. Define the ``map`` function that maps the ``userid`` to an - object that contains the fields ``userid``, ``total_time``, ``count``, - and ``avg_time``: - - .. code-block:: javascript - - var mapFunction = function() { - var key = this.userid; - var value = { - userid: this.userid, - total_time: this.length, - count: 1, - avg_time: 0 - }; - - emit( key, value ); - }; - -#. Define the corresponding ``reduce`` function with two arguments - ``key`` and ``values`` to calculate the total time and the count. - The ``key`` corresponds to the ``userid``, and the ``values`` is an - array whose elements corresponds to the individual objects mapped to the - ``userid`` in the ``mapFunction``. - - .. code-block:: javascript - - var reduceFunction = function(key, values) { - - var reducedObject = { - userid: key, - total_time: 0, - count:0, - avg_time:0 - }; - - values.forEach( function(value) { - reducedObject.total_time += value.total_time; - reducedObject.count += value.count; - } - ); - return reducedObject; - }; - -#. Define ``finalize`` function with two arguments ``key`` and - ``reducedValue``. The function modifies the ``reducedValue`` document - to add another field ``average`` and returns the modified document. - - .. code-block:: javascript - - var finalizeFunction = function (key, reducedValue) { - - if (reducedValue.count > 0) - reducedValue.avg_time = reducedValue.total_time / reducedValue.count; - - return reducedValue; - }; - -#. Perform map-reduce on the ``session`` collection using the - ``mapFunction``, the ``reduceFunction``, and the - ``finalizeFunction`` functions. Output the results to a collection - ``session_stat``. If the ``session_stat`` collection already exists, - the operation will replace the contents: - - .. code-block:: javascript - - db.sessions.mapReduce( mapFunction, - reduceFunction, - { - out: { reduce: "session_stat" }, - finalize: finalizeFunction - } - ) - -Subsequent Incremental Map-Reduce -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Later as the ``sessions`` collection grows, you can run additional -map-reduce operations. For example, add new documents to the -``sessions`` collection: - -.. code-block:: javascript - - db.sessions.save( { userid: "a", ts: ISODate('2011-11-05 14:17:00'), length: 100 } ); - db.sessions.save( { userid: "b", ts: ISODate('2011-11-05 14:23:00'), length: 115 } ); - db.sessions.save( { userid: "c", ts: ISODate('2011-11-05 15:02:00'), length: 125 } ); - db.sessions.save( { userid: "d", ts: ISODate('2011-11-05 16:45:00'), length: 55 } ); - -At the end of the day, perform incremental map-reduce on the -``sessions`` collection but use the ``query`` field to select only the -new documents. Output the results to the collection ``session_stat``, -but ``reduce`` the contents with the results of the incremental -map-reduce: - -.. code-block:: javascript - - db.sessions.mapReduce( mapFunction, - reduceFunction, - { - query: { ts: { $gt: ISODate('2011-11-05 00:00:00') } }, - out: { reduce: "session_stat" }, - finalize: finalizeFunction - } - ); - -.. _map-reduce-temporary-collection: - -Temporary Collection --------------------- - -The map-reduce operation uses a temporary collection during processing. -At completion, the map-reduce operation renames the temporary -collection. As a result, you can perform a map-reduce operation -periodically with the same target collection name without affecting -the intermediate states. Use this mode when -generating statistical output collections on a regular basis. - -.. _map-reduce-concurrency: - -Concurrency ------------ - -The map-reduce operation is composed of many tasks, including: - -- reads from the input collection, - -- executions of the ``map`` function, - -- executions of the ``reduce`` function, - -- writes to the output collection. - -These various tasks take the following locks: - -- The read phase takes a read lock. It yields every 100 documents. - -- The insert into the temporary collection takes a write lock for a - single write. - - If the output collection does not exist, the creation of the output - collection takes a write lock. - - If the output collection exists, then the output actions (i.e. - ``merge``, ``replace``, ``reduce``) take a write lock. - -.. versionchanged:: 2.4 - The V8 JavaScript engine, which became the default in 2.4, allows - multiple JavaScript operations to execute at the same time. Prior to - 2.4, JavaScript code (i.e. ``map``, ``reduce``, ``finalize`` - functions) executed in a single thread. - -.. note:: - - The final write lock during post-processing makes the results appear - atomically. However, output actions ``merge`` and ``reduce`` may - take minutes to process. For the ``merge`` and ``reduce``, the - ``nonAtomic`` flag is available. See the - :method:`db.collection.mapReduce()` reference for more information. - -.. _map-reduce-sharded-cluster: - -Sharded Cluster ---------------- - -Sharded Input -~~~~~~~~~~~~~ - -When using sharded collection as the input for a map-reduce operation, -:program:`mongos` will automatically dispatch the map-reduce job to -each shard in parallel. There is no special option -required. :program:`mongos` will wait for jobs on all shards to -finish. - -Sharded Output -~~~~~~~~~~~~~~ - -By default the output collection is not sharded. The process is: - -- :program:`mongos` dispatches a map-reduce finish job to the shard - that will store the target collection. - -- The target shard pulls results from all other shards, and runs a - final reduce/finalize operation, and write to the output. - -- If using the ``sharded`` option to the ``out`` parameter, MongoDB - shards the output using ``_id`` field as the shard key. - - .. versionchanged:: 2.2 - -- If the output collection does not exist, MongoDB creates and shards - the collection on the ``_id`` field. If the collection is empty, - MongoDB creates :term:`chunks ` using the result of the first - stage of the map-reduce operation. - -- :program:`mongos` dispatches, in parallel, a map-reduce finish job - to every shard that owns a chunk. - -- Each shard will pull the results it owns from all other shards, run a - final reduce/finalize, and write to the output collection. - -.. note:: - - - During later map-reduce jobs, MongoDB splits chunks as needed. - - - Balancing of chunks for the output collection is automatically - prevented during post-processing to avoid concurrency issues. - -In MongoDB 2.0: - -- :program:`mongos` retrieves the results from each shard, and - performs merge sort to order the results, and performs a reduce/finalize as - needed. :program:`mongos` then writes the result to the output - collection in sharded mode. - -- This model requires only a small amount of memory, even for large datasets. - -- Shard chunks are not automatically split during insertion. This - requires manual intervention until the chunks are granular and - balanced. - -.. warning:: - - For best results, only use the sharded output options for - :dbcommand:`mapReduce` in version 2.2 or later. - -.. _map-reduce-troubleshooting: - -Troubleshooting Map-Reduce Operations -------------------------------------- - -You can troubleshoot the ``map`` function and the ``reduce`` function -in the :program:`mongo` shell. - -.. _troubleshoot-map-function: - -Troubleshoot the Map Function -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can verify the ``key`` and ``value`` pairs emitted by the ``map`` -function by writing your own ``emit`` function. - -Consider a collection ``orders`` that contains documents of the -following prototype: - -.. code-block:: javascript - - { - _id: ObjectId("50a8240b927d5d8b5891743c"), - cust_id: "abc123", - ord_date: new Date("Oct 04, 2012"), - status: 'A', - price: 250, - items: [ { sku: "mmm", qty: 5, price: 2.5 }, - { sku: "nnn", qty: 5, price: 2.5 } ] - } - -#. Define the ``map`` function that maps the ``price`` to the - ``cust_id`` for each document and emits the ``cust_id`` and ``price`` - pair: - - .. code-block:: javascript - - var map = function() { - emit(this.cust_id, this.price); - }; - -#. Define the ``emit`` function to print the key and value: - - .. code-block:: javascript - - var emit = function(key, value) { - print("emit"); - print("key: " + key + " value: " + tojson(value)); - } - -#. Invoke the ``map`` function with a single document from the ``orders`` - collection: - - .. code-block:: javascript - - var myDoc = db.orders.findOne( { _id: ObjectId("50a8240b927d5d8b5891743c") } ); - map.apply(myDoc); - -#. Verify the key and value pair is as you expected. - - .. code-block:: javascript - - emit - key: abc123 value:250 - -#. Invoke the ``map`` function with multiple documents from the ``orders`` - collection: - - .. code-block:: javascript - - var myCursor = db.orders.find( { cust_id: "abc123" } ); - - while (myCursor.hasNext()) { - var doc = myCursor.next(); - print ("document _id= " + tojson(doc._id)); - map.apply(doc); - print(); - } - -#. Verify the key and value pairs are as you expected. - -.. _troubleshoot-reduce-function: - -Troubleshoot the Reduce Function -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Confirm Output Type -``````````````````` - -You can test that the ``reduce`` function returns a value that is the -same type as the value emitted from the ``map`` function. - -#. Define a ``reduceFunction1`` function that takes the arguments - ``keyCustId`` and ``valuesPrices``. ``valuesPrices`` is an array of - integers: - - .. code-block:: javascript - - var reduceFunction1 = function(keyCustId, valuesPrices) { - return Array.sum(valuesPrices); - }; - -#. Define a sample array of integers: - - .. code-block:: javascript - - var myTestValues = [ 5, 5, 10 ]; - -#. Invoke the ``reduceFunction1`` with ``myTestValues``: - - .. code-block:: javascript - - reduceFunction1('myKey', myTestValues); - -#. Verify the ``reduceFunction1`` returned an integer: - - .. code-block:: javascript - - 20 - -#. Define a ``reduceFunction2`` function that takes the arguments - ``keySKU`` and ``valuesCountObjects``. ``valuesCountObjects`` is an array of - documents that contain two fields ``count`` and ``qty``: - - .. code-block:: javascript - - var reduceFunction2 = function(keySKU, valuesCountObjects) { - reducedValue = { count: 0, qty: 0 }; - - for (var idx = 0; idx < valuesCountObjects.length; idx++) { - reducedValue.count += valuesCountObjects[idx].count; - reducedValue.qty += valuesCountObjects[idx].qty; - } - - return reducedValue; - }; - -#. Define a sample array of documents: - - .. code-block:: javascript - - var myTestObjects = [ - { count: 1, qty: 5 }, - { count: 2, qty: 10 }, - { count: 3, qty: 15 } - ]; - -#. Invoke the ``reduceFunction2`` with ``myTestObjects``: - - .. code-block:: javascript - - reduceFunction2('myKey', myTestObjects); - -#. Verify the ``reduceFunction2`` returned a document with exactly the - ``count`` and the ``qty`` field: - - .. code-block:: javascript - - { "count" : 6, "qty" : 30 } - -Ensure Insensitivity to the Order of Mapped Values -`````````````````````````````````````````````````` - -The ``reduce`` function takes a ``key`` and a ``values`` array as its -argument. You can test that the result of the ``reduce`` function does -not depend on the order of the elements in the ``values`` array. - -#. Define a sample ``values1`` array and a sample ``values2`` array - that only differ in the order of the array elements: - - .. code-block:: javascript - - var values1 = [ - { count: 1, qty: 5 }, - { count: 2, qty: 10 }, - { count: 3, qty: 15 } - ]; - - var values2 = [ - { count: 3, qty: 15 }, - { count: 1, qty: 5 }, - { count: 2, qty: 10 } - ]; - -#. Define a ``reduceFunction2`` function that takes the arguments - ``keySKU`` and ``valuesCountObjects``. ``valuesCountObjects`` is an array of - documents that contain two fields ``count`` and ``qty``: - - .. code-block:: javascript - - var reduceFunction2 = function(keySKU, valuesCountObjects) { - reducedValue = { count: 0, qty: 0 }; - - for (var idx = 0; idx < valuesCountObjects.length; idx++) { - reducedValue.count += valuesCountObjects[idx].count; - reducedValue.qty += valuesCountObjects[idx].qty; - } - - return reducedValue; - }; - -#. Invoke the ``reduceFunction2`` first with ``values1`` and then with - ``values2``: - - .. code-block:: javascript - - reduceFunction2('myKey', values1); - reduceFunction2('myKey', values2); - -#. Verify the ``reduceFunction2`` returned the same result: - - .. code-block:: javascript - - { "count" : 6, "qty" : 30 } - -Ensure Reduce Function Idempotentcy -``````````````````````````````````` - -Because the map-reduce operation may call a ``reduce`` multiple times -for the same key, the ``reduce`` function must return a value of the -same type as the value emitted from the ``map`` function. You can test -that the ``reduce`` function process "reduced" values without -affecting the *final* value. - -#. Define a ``reduceFunction2`` function that takes the arguments - ``keySKU`` and ``valuesCountObjects``. ``valuesCountObjects`` is an array of - documents that contain two fields ``count`` and ``qty``: - - .. code-block:: javascript - - var reduceFunction2 = function(keySKU, valuesCountObjects) { - reducedValue = { count: 0, qty: 0 }; - - for (var idx = 0; idx < valuesCountObjects.length; idx++) { - reducedValue.count += valuesCountObjects[idx].count; - reducedValue.qty += valuesCountObjects[idx].qty; - } - - return reducedValue; - }; - -#. Define a sample key: - - .. code-block:: javascript - - var myKey = 'myKey'; - -#. Define a sample ``valuesIdempotent`` array that contains an element that is a - call to the ``reduceFunction2`` function: - - .. code-block:: javascript - - var valuesIdempotent = [ - { count: 1, qty: 5 }, - { count: 2, qty: 10 }, - reduceFunction2(myKey, [ { count:3, qty: 15 } ] ) - ]; - -#. Define a sample ``values1`` array that combines the values passed to - ``reduceFunction2``: - - .. code-block:: javascript - - var values1 = [ - { count: 1, qty: 5 }, - { count: 2, qty: 10 }, - { count: 3, qty: 15 } - ]; - -#. Invoke the ``reduceFunction2`` first with ``myKey`` and - ``valuesIdempotent`` and then with ``myKey`` and ``values1``: - - .. code-block:: javascript - - reduceFunction2(myKey, valuesIdempotent); - reduceFunction2(myKey, values1); - -#. Verify the ``reduceFunction2`` returned the same result: - - .. code-block:: javascript - - { "count" : 6, "qty" : 30 } diff --git a/source/applications/optimization.txt b/source/applications/optimization.txt index b26b6e2f9a9..d8228571a89 100644 --- a/source/applications/optimization.txt +++ b/source/applications/optimization.txt @@ -1,208 +1,23 @@ -================================================ -Optimization Strategies for MongoDB Applications -================================================ +=================================== +Optimization Strategies for MongoDB +=================================== .. default-domain:: mongodb -Overview --------- +There are many factors that affect database performance and +responsiveness, including index use, query structure, data models, +application design, and architecture, as well as operational factors +such as architecture and system configuration. -There are many factors that can affect performance of operations in -MongoDB, including index use, query structure, data modeling, -application design and architecture, as well as operational factors -including architecture and system configuration. This document -addresses key application optimization strategies, and includes -examples and links to relevant reference material. +This section describes techniques for optimizing application +performance with MongoDB. -.. seealso:: :ref:`aggregation-optimize-performance`, - :doc:`/faq/fundamentals`, and :doc:`/faq/developers`. +.. toctree:: + :maxdepth: 1 -Strategies ----------- + /tutorial/use-indexes-to-optimize-query-performance + /tutorial/use-index-operators-to-optimize-query-performance + /tutorial/evaluate-operational-performance + /tutorial/use-capped-collections-for-fast-writes-and-reads -This section describes techniques for optimizing database performance -with MongoDB with particular attention to query performance and basic -client operations. - -Use Indexes -~~~~~~~~~~~ - -For commonly issued queries, create :doc:`indexes `. If a -query searches multiple fields, create a :ref:`compound index -`. Scanning an index is much faster than scanning a -collection. The indexes structures are smaller than the documents -reference, and store references in order. - -.. example:: If you have a ``posts`` collection containing blog posts, - and if you regularly issue a query that sorts on the ``author_name`` - field, then you can optimize the query by creating an index on the - ``author_name`` field: - - .. code-block:: javascript - - db.posts.ensureIndex( { author_name : 1 } ) - -Indexes also improve efficiency on queries that routinely sort on a -given field. - -.. example:: If you regularly issue a query that sorts on the - ``timestamp`` field, then you can optimize the query by creating an - index on the ``timestamp`` field: - - Creating this index: - - .. code-block:: javascript - - db.posts.ensureIndex( { timestamp : 1 } ) - - Optimizes this query: - - .. code-block:: javascript - - db.posts.find().sort( { timestamp : -1 } ) - -Because MongoDB can read indexes in both ascending and descending -order, the direction of a single-key index does not matter. - -Indexes support queries, update operations, and some phases of the -:ref:`aggregation pipeline -`. - -.. include:: /includes/fact-bindata-storage-optimization.rst - -Limit Results -~~~~~~~~~~~~~ - -MongoDB :term:`cursors ` return results in groups of multiple -documents. If you know the number of results you want, you can reduce -the demand on network resources by issuing the :method:`cursor.limit()` -method. - -This is typically used in conjunction with sort operations. For example, -if you need only 10 results from your query to the ``posts`` -collection, you would issue the following command: - -.. code-block:: javascript - - db.posts.find().sort( { timestamp : -1 } ).limit(10) - -For more information on limiting results, see :method:`cursor.limit()` - -Use Projections to Return Only Necessary Data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When you need only a subset of fields from documents, you can achieve better -performance by returning only the fields you need: - -For example, if in your query to the ``posts`` collection, you need only -the ``timestamp``, ``title``, ``author``, and ``abstract`` fields, you -would issue the following command: - -.. code-block:: javascript - - db.posts.find( {}, { timestamp : 1 , title : 1 , author : 1 , abstract : 1} ).sort( { timestamp : -1 } ) - -For more information on using projections, see -:ref:`read-operations-projection`. - -Use the Database Profiler to Evaluate Performance -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. todo Add link below: :doc:`database profiler ` - -MongoDB provides a database profiler that shows performance -characteristics of each operation against the database. Use the profiler -to locate any queries or write operations that are running slow. You can -use this information, for example, to determine what indexes to create. - -.. todo Add below: , see :doc:`/tutorial/manage-the-database-profiler` and ... - -For more information, see :ref:`database-profiling`. - -Use ``db.currentOp()`` to Evaluate Performance -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :method:`db.currentOp()` method reports on current operations -running on a :program:`mongod` instance. For documentation of the -output of :method:`db.currentOp()` see :doc:`/reference/current-op`. - -Use ``$explain`` to Evaluate Performance -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :method:`explain() ` method returns statistics -on a query, and reports the index MongoDB selected to fulfill the -query, as well as information about the internal operation of the -query. - -.. example:: To use :method:`explain() ` on a query - for documents matching the expression ``{ a: 1 }``, in the - collection ``records``, use an operation that resembles the - following in the :program:`mongo` shell: - - .. code-block:: javascript - - db.records.find( { a: 1 } ).explain() - -.. todo Link to Kay's new explain doc - -Use ``$hint`` to Select a Particular Index -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In most cases the :ref:`query optimizer -` selects the optimal index for a -specific operation; however, you can force MongoDB to use a specific -index using the :method:`hint() ` method. Use -:method:`hint() ` to support performance testing, or on -some queries where you must select a field or field included in -several indexes. - -Use the Increment Operator to Perform Operations Server-Side -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Use MongoDB's :operator:`$inc` operator to increment or decrement -values in documents. The operator increments the value of the field on -the server side, as an alternative to selecting a document, making -simple modifications in the client and then writing the entire -document to the server. The :operator:`$inc` operator can also help -avoid race conditions, which would result when two application -instances queried for a document, manually incremented a field, and -saved the entire document back at the same time. - -Perform Server-Side Code Execution -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For some kinds of operations, you can perform operations on the -:program:`mongod` server itself rather than writing a client -application to perform a simple task. This can eliminate network -overhead for client operations for some basic administrative -operations. Consider the following example: - -.. example:: For example, if you want to remove a field from all - documents in a collection, performing the operation directly on the - server is more efficient than transmitting the collection to your - client and back again. - -For more information, see the :doc:`/applications/server-side-javascript` -page. - -Use Capped Collections -~~~~~~~~~~~~~~~~~~~~~~ - -:doc:`/core/capped-collections` are circular, fixed-size collections -that keep documents well-ordered, even without the use of an -index. This means that capped collections can receive very high-speed -writes and sequential reads. - -These collections are particularly useful for keeping log files but are -not limited to that purpose. Use capped collections where appropriate. - -Use Natural Order -~~~~~~~~~~~~~~~~~ - -To return documents in the order they exist on disk, return sorted -operations using the :operator:`$natural` operator. :term:`Natural -order ` does not use indexes but can be fast for -operations when you want to select the first or last items on -disk. This is particularly useful for capped collections. - -.. seealso:: :method:`~cursor.sort()` and :method:`~cursor.limit()`. +.. seealso:: :doc:`/core/server-side-javascript`. diff --git a/source/applications/replication.txt b/source/applications/replication.txt index 23e2049db49..7446fb701cd 100644 --- a/source/applications/replication.txt +++ b/source/applications/replication.txt @@ -8,793 +8,13 @@ From the perspective of a client application, whether a MongoDB instance is running as a single server (i.e. "standalone") or a :term:`replica set` is transparent. However, replica sets offer some configuration options for write and read operations. [#sharded-clusters]_ -This document describes those options and their implications. + +.. toctree:: + :maxdepth: 1 + + /core/write-concern + /core/read-preference .. [#sharded-clusters] :term:`Sharded clusters ` where the shards are also replica sets provide the same configuration options with regards to write and read operations. - -.. _replica-set-write-concern: - -Write Concern -------------- - -MongoDB's built-in :term:`write concern` confirms the success of write -operations to a :term:`replica set's ` :term:`primary`. -Write concern uses the :dbcommand:`getLastError` command after write -operations to return an object with error information or confirmation -that there are no errors. - -After the :doc:`driver write concern change -` all officially supported -MongoDB drivers enable write concern by default. - -Verify Write Operations -~~~~~~~~~~~~~~~~~~~~~~~ - -The default write concern confirms write operations only on the -primary. You can configure write concern to confirm write operations -to additional replica set members as well by issuing the -:dbcommand:`getLastError` command with the ``w`` option. - -The ``w`` option confirms that write operations have replicated to the -specified number of replica set members, including the primary. You can -either specify a number or specify ``majority``, which ensures the write -propagates to a majority of set members. The following example ensures -the operation has replicated to two members (the primary and one other -member): - -.. code-block:: javascript - - db.runCommand( { getLastError: 1, w: 2 } ) - -The following example ensures the write operation has replicated to a -majority of the configured members of the set. - -.. code-block:: javascript - - db.runCommand( { getLastError: 1, w: "majority" } ) - -If you specify a ``w`` value greater than the number of members that -hold a copy of the data (i.e., greater than the number of -non-:term:`arbiter` members), the operation blocks until those members -become available. This can cause the operation to block forever. To -specify a timeout threshold for the :dbcommand:`getLastError` operation, -use the ``wtimeout`` argument. The following example sets the timeout to -5000 milliseconds: - -.. code-block:: javascript - - db.runCommand( { getLastError: 1, w: 2, wtimeout:5000 } ) - -Modify Default Write Concern -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can configure your own "default" :dbcommand:`getLastError` -behavior for a replica set. Use the -:data:`~local.system.replset.settings.getLastErrorDefaults` setting in -the :doc:`replica set configuration -`. The following sequence of -commands creates a configuration that waits for the write operation to -complete on a majority of the set members before returning: - -.. code-block:: javascript - - cfg = rs.conf() - cfg.settings = {} - cfg.settings.getLastErrorDefaults = {w: "majority"} - rs.reconfig(cfg) - -The :data:`~local.system.replset.settings.getLastErrorDefaults` -setting affects only those :dbcommand:`getLastError` commands that -have *no* other arguments. - -.. note:: - - Use of insufficient write concern can lead to :ref:`rollbacks - ` in the case of :ref:`replica set failover - `. Always ensure that your operations have - specified the required write concern for your application. - -.. seealso:: :ref:`write-operations-write-concern` and - :ref:`connections-write-concern` - -Custom Write Concerns -~~~~~~~~~~~~~~~~~~~~~ - -You can use replica set tags to create custom write concerns using the -:data:`~local.system.replset.settings.getLastErrorDefaults` and -:data:`~local.system.replset.settings.getLastErrorModes` replica set -settings. - -.. note:: - - Custom write concern modes specify the field name and a number of - *distinct* values for that field. By contrast, read preferences use - the value of fields in the tag document to direct read operations. - - In some cases, you may be able to use the same tags for read - preferences and write concerns; however, you may need to create - additional tags for write concerns depending on the requirements of - your application. - -Single Tag Write Concerns -````````````````````````` - -Consider a five member replica set, where each member has one of the -following tag sets: - -.. code-block:: javascript - - { "use": "reporting" } - { "use": "backup" } - { "use": "application" } - { "use": "application" } - { "use": "application" } - -You could create a custom write concern mode that will ensure that -applicable write operations will not return until members with two -different values of the ``use`` tag have acknowledged the write -operation. Create the mode with the following sequence of operations -in the :program:`mongo` shell: - -.. code-block:: javascript - - cfg = rs.conf() - cfg.settings = { getLastErrorModes: { use2: { "use": 2 } } } - rs.reconfig(cfg) - -.. these examples need to be better so that they avoid overwriting - getLastErrorModes upon repetition (i.e. they don't $push documents - to getLastErrorModes.) - -To use this mode pass the string ``multiUse`` to the ``w`` option of -:dbcommand:`getLastError` as follows: - -.. code-block:: javascript - - db.runCommand( { getLastError: 1, w: use2 } ) - -Specific Custom Write Concerns -`````````````````````````````` - -If you have a three member replica with the following tag sets: - -.. code-block:: javascript - - { "disk": "ssd" } - { "disk": "san" } - { "disk": "spinning" } - -You cannot specify a custom -:data:`~local.system.replset.settings.getLastErrorModes` value to -ensure that the write propagates to the ``san`` before -returning. However, you may implement this write concern policy by -creating the following additional tags, so that the set resembles the -following: - -.. code-block:: javascript - - { "disk": "ssd" } - { "disk": "san", "disk.san": "san" } - { "disk": "spinning" } - -Then, create a custom -:data:`~local.system.replset.settings.getLastErrorModes` value, as -follows: - -.. code-block:: javascript - - cfg = rs.conf() - cfg.settings = { getLastErrorModes: { san: { "disk.san": 1 } } } - rs.reconfig(cfg) - -.. these examples need to be better so that they avoid overwriting - getLastErrorModes upon repetition (i.e. they don't $push documents - to getLastErrorModes.) - -To use this mode pass the string ``san`` to the ``w`` option of -:dbcommand:`getLastError` as follows: - -.. code-block:: javascript - - db.runCommand( { getLastError: 1, w: san } ) - -This operation will not return until a replica set member with the tag -``disk.san`` returns. - -You may set a custom write concern mode as the default write concern -mode using :data:`~local.system.replset.settings.getLastErrorDefaults` -replica set as in the following setting: - -.. code-block:: javascript - - cfg = rs.conf() - cfg.settings.getLastErrorDefaults = { ssd:1 } - rs.reconfig(cfg) - -.. seealso:: :ref:`replica-set-configuration-tag-sets` for further - information about replica set reconfiguration and tag sets. - -.. index:: read preference -.. index:: slaveOk - -.. _replica-set-read-preference: -.. _slaveOk: -.. _read-preference: - -Read Preference ---------------- - -Read preference describes how MongoDB clients route read operations to -members of a :term:`replica set`. - -.. index:: read preference; background -.. _replica-set-read-preference-background: - -Background -~~~~~~~~~~ - -By default, an application directs its read operations to the :term:`primary` -member in a :term:`replica set`. Reading from the primary guarantees that -read operations reflect the latest version of a document. However, -for an application that does not require fully up-to-date data, you -can improve read throughput, or reduce latency, by distributing some or all -reads to secondary members of the replica set. - -The following are use cases where you might use secondary reads: - -- Running systems operations that do not affect the front-end - application, operations such as backups and reports. - -- Providing low-latency queries for geographically distributed - deployments. If one secondary is closer to an application server - than the primary, you may see better - performance for that application if you use secondary reads. - -- Providing graceful degradation in :ref:`failover - ` situations where a set has *no* primary for 10 - seconds or more. In this use case, you should give the application the - :readmode:`primaryPreferred` read preference, which prevents the - application from performing reads if the set has no primary. - -MongoDB :term:`drivers ` allow client applications to configure -a :term:`read preference` on a per-connection, per-collection, or -per-operation basis. For more information about secondary read -operations in the :program:`mongo` shell, see the :method:`readPref() ` -method. For more information about a driver's read preference -configuration, see the appropriate :ref:`driver` API documentation. - -.. note:: - - Read preferences affect how an application selects which member - to use for read operations. As a result read - preferences dictate if the application receives stale or - current data from MongoDB. Use appropriate :term:`write concern` - policies to ensure proper data - replication and consistency. - - If read operations account for a large percentage of your - application's traffic, distributing reads to secondary members can - improve read throughput. However, in most cases :doc:`sharding - ` provides better support for larger scale - operations, as clusters can distribute read and write - operations across a group of machines. - -.. index:: read preference; semantics -.. _replica-set-read-preference-semantics: -.. index:: read preference; modes -.. _replica-set-read-preference-modes: - -Read Preference Modes -~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 2.2 - -MongoDB :doc:`drivers ` support five -read preference modes: - -- :readmode:`primary` -- :readmode:`primaryPreferred` -- :readmode:`secondary` -- :readmode:`secondaryPreferred` -- :readmode:`nearest` - -You can specify a read preference mode on connection objects, database object, -collection object, or per-operation. The syntax for specifying the read -preference mode is :api:`specific to the driver and to the idioms of the host language <>`. - -Read preference modes are also available to clients connecting to a -:term:`sharded cluster` through a :program:`mongos`. The -:program:`mongos` instance obeys specified read preferences when -connecting to the :term:`replica set` that provides each :term:`shard` -in the cluster. - -In the :program:`mongo` shell, the -:method:`readPref() ` cursor method -provides access to read preferences. - -.. warning:: - - All read preference modes except :readmode:`primary` may return - stale data as :term:`secondaries ` replicate operations - from the primary with some delay. - - Ensure that your application can tolerate stale data if you choose - to use a non-:readmode:`primary` mode. - -For more information, see :ref:`read preference background -` and :ref:`read preference -behavior `. See also the -:api:`documentation for your driver <>`. - -.. readmode:: primary - - All read operations use only the current replica set :term:`primary`. - This is the default. If the primary is unavailable, - read operations produce an error or throw an exception. - - The :readmode:`primary` read preference mode is not compatible with - read preference modes that use :ref:`tag sets - `. If you specify a tag set - with :readmode:`primary`, the driver will produce an error. - -.. readmode:: primaryPreferred - - In most situations, operations read from the :term:`primary` member - of the set. However, if the primary is unavailable, as is the case - during :term:`failover` situations, operations read from secondary - members. - - When the read preference includes a :ref:`tag set - `, the client reads first from - the primary, if available, and then from :term:`secondaries - ` that match the specified tags. If no secondaries have - matching tags, the read operation produces an error. - - Since the application may receive data from a secondary, read - operations using the :readmode:`primaryPreferred` mode may return - stale data in some situations. - - .. warning:: - - .. versionchanged:: 2.2 - :program:`mongos` added full support for read preferences. - - When connecting to a :program:`mongos` instance older than 2.2, - using a client that supports read preference modes, - :readmode:`primaryPreferred` will send queries to secondaries. - -.. readmode:: secondary - - Operations read *only* from the :term:`secondary` members of the set. - If no secondaries are available, then this read operation produces an - error or exception. - - Most sets have at least one secondary, but there are situations - where there may be no available secondary. For example, a set - with a primary, a secondary, and an :term:`arbiter` may not have - any secondaries if a member is in recovering state or unavailable. - - When the read preference includes a :ref:`tag set - `, the client attempts to find - secondary members that match the specified tag set and directs reads - to a random secondary from among the :ref:`nearest group - `. If no secondaries - have matching tags, the read operation produces an error. [#capacity-planning]_ - - Read operations using the :readmode:`secondary` mode may return stale data. - -.. readmode:: secondaryPreferred - - In most situations, operations read from :term:`secondary` members, - but in situations where the set consists of a single - :term:`primary` (and no other members,) the read operation will use - the set's primary. - - When the read preference includes a :ref:`tag set - `, the client attempts to find - a secondary member that matches the specified tag set and directs - reads to a random secondary from among the :ref:`nearest group - `. If no secondaries - have matching tags, the read operation produces an error. - - Read operations using the :readmode:`secondaryPreferred` mode may return stale data. - -.. readmode:: nearest - - The driver reads from the *nearest* member of the :term:`set ` according to the :ref:`member selection - ` process. Reads in - the :readmode:`nearest` mode do not consider the member's - *type*. Reads in :readmode:`nearest` mode may read from both - primaries and secondaries. - - Set this mode to minimize the effect of network latency - on read operations without preference for current or stale data. - - If you specify a :ref:`tag set `, - the client attempts to find a replica set member that matches the - specified tag set and directs reads to an arbitrary member from - among the :ref:`nearest group `. - - Read operations using the :readmode:`nearest` mode may return stale data. - - .. note:: - - All operations read from a member of the nearest group of the - replica set that matches the specified read preference mode. The - :readmode:`nearest` mode prefers low latency reads over a - member's :term:`primary` or :term:`secondary` status. - - For :readmode:`nearest`, the client assembles a list of - acceptable hosts based on tag set and then narrows that list to - the host with the shortest ping time and all other members of - the set that are within the "local threshold," or acceptable - latency. See :ref:`replica-set-read-preference-behavior-nearest` - for more information. - - .. For I/O-bound users who want to distribute reads across all - members evenly regardless of ping time, set - secondaryAcceptableLatencyMS very high. - -.. The :method:`readPreference() ` reference - above will error until DOCS-364 is complete. - -.. [#capacity-planning] If your set has more than one secondary, and - you use the :readmode:`secondary` read preference mode, consider - the following effect. If you have a :ref:`three member replica set - ` with a primary and two secondaries, - and if one secondary becomes unavailable, all :readmode:`secondary` - queries must target the remaining secondary. This will double the - load on this secondary. Plan and provide capacity to support this - as needed. - -.. index:: tag sets -.. index:: read preference; tag sets -.. _replica-set-read-preference-tag-sets: - -Tag Sets -~~~~~~~~ - -Tag sets allow you to specify custom :ref:`read preferences -` -and :ref:`write concerns ` -so that your application can target -operations to specific members, based on custom parameters. - -.. note:: - - Consider the following properties of read preferences: - - - Custom read preferences and write concerns evaluate tags sets in - different ways. - - - Read preferences consider the value of a tag when selecting a - member to read from. - - - Write concerns ignore the value of a tag to when selecting a - member *except* to consider whether or not the value is unique. - -A tag set for a read operation may resemble the following document: - -.. code-block:: javascript - - { "disk": "ssd", "use": "reporting" } - -To fulfill the request, a member would need to have both of these tags. -Therefore the following tag sets, would satisfy this -requirement: - -.. code-block:: javascript - - { "disk": "ssd", "use": "reporting" } - { "disk": "ssd", "use": "reporting", "rack": 1 } - { "disk": "ssd", "use": "reporting", "rack": 4 } - { "disk": "ssd", "use": "reporting", "mem": "64"} - -However, the following tag sets would *not* be able to fulfill this query: - -.. code-block:: javascript - - { "disk": "ssd" } - { "use": "reporting" } - { "disk": "ssd", "use": "production" } - { "disk": "ssd", "use": "production", "rack": 3 } - { "disk": "spinning", "use": "reporting", "mem": "32" } - -Therefore, tag sets make it possible to ensure that read operations -target specific members in a particular data center or -:program:`mongod` instances designated for a particular class of -operations, such as reporting or analytics. -For information on configuring tag sets, see -:ref:`replica-set-configuration-tag-sets` in the -:doc:`/reference/replica-configuration` document. -You can specify tag sets with the following read preference modes: - -- :readmode:`primaryPreferred` -- :readmode:`secondary` -- :readmode:`secondaryPreferred` -- :readmode:`nearest` - -You cannot specify tag sets with the :readmode:`primary` read preference mode. - -Tags are not compatible with :readmode:`primary` and only apply when -:ref:`selecting ` -a :term:`secondary` member of a set for a read operation. However, the -:readmode:`nearest` read mode, when combined with a tag set will -select the nearest member that matches the specified tag set, which -may be a primary or secondary. - -All interfaces use the same :ref:`member selection logic -` to choose the -member to which to direct read operations, basing the choice on read -preference mode and tag sets. - -For more information on how read preference :ref:`modes -` interact with tag sets, see the -documentation for each read preference mode. - -.. index:: read preference; behavior -.. _replica-set-read-preference-behavior: - -Behavior -~~~~~~~~ - -.. versionchanged:: 2.2 - -.. _replica-set-read-preference-behavior-retry: - -Auto-Retry -`````````` - -Connection between MongoDB drivers and :program:`mongod` instances in -a :term:`replica set` must balance two concerns: - -#. The client should attempt to prefer current results, and any - connection should read from the same member of the replica set as - much as possible. - -#. The client should minimize the amount of time that the database is - inaccessible as the result of a connection issue, networking - problem, or :term:`failover` in a replica set. - -As a result, MongoDB drivers and :program:`mongos`: - -- Reuse a connection to specific :program:`mongod` for as long as - possible after establishing a connection to that instance. This - connection is *pinned* to this :program:`mongod`. - -- Attempt to reconnect to a new member, obeying existing :ref:`read - preference modes `, if the connection - to :program:`mongod` is lost. - - Reconnections are transparent to the application itself. If - the connection permits reads from :term:`secondary` members, after - reconnecting, the application can receive two sequential reads - returning from different secondaries. Depending on the state of the - individual secondary member's replication, the documents can reflect - the state of your database at different moments. - -- Return an error *only* after attempting to connect to three members - of the set that match the :ref:`read preference mode ` - and :ref:`tag set `. - If there are fewer than three members of the set, the - client will error after connecting to all existing members of the - set. - - After this error, the driver selects a new member using the - specified read preference mode. In the absence of a specified read - preference, the driver uses :readmode:`primary`. - -- After detecting a failover situation, [#fn-failover]_ the driver - attempts to refresh the state of the replica set as quickly as - possible. - -.. [#fn-failover] When a :term:`failover` occurs, all members of the set - close all client connections that produce a socket error in the - driver. This behavior prevents or minimizes :term:`rollback`. - -.. _replica-set-read-preference-behavior-requests: - -Request Association -``````````````````` - -Reads from :term:`secondary` may reflect the state of the data set at -different points in time because :term:`secondary` members of a -:term:`replica set` may lag behind the current state of the primary by -different amounts. To prevent subsequent reads from jumping around in -time, the driver can associate application threads to a specific member -of the set after the first read. The thread will continue to read from -the same member until: - -- The application performs a read with a different read preference. - -- The thread terminates. - -- The client receives a socket exception, as is - the case when there's a network error or when - the :program:`mongod` closes connections during a :term:`failover`. - This triggers a :ref:`retry - `, which may be - transparent to the application. - -If an application thread issues a query with the -:readmode:`primaryPreferred` mode while the primary is inaccessible, -the thread will carry the association with that secondary for the -lifetime of the thread. The thread will associate with the primary, if -available, only after issuing a query with a different read -preference, even if a primary becomes available. By extension, if a -thread issues a read with the :readmode:`secondaryPreferred` when all -secondaries are down, it will carry an association with the -primary. This application thread will continue to read from the -primary even if a secondary becomes available later in the thread's -lifetime. - -.. index:: read preference; ping time -.. index:: read preference; nearest -.. index:: read preference; member selection -.. _replica-set-read-preference-behavior-ping-time: -.. _replica-set-read-preference-behavior-nearest: -.. _replica-set-read-preference-behavior-member-selection: - -Member Selection -```````````````` - -Clients, by way of their drivers, and :program:`mongos` instances for -sharded clusters periodically update their view of the replica set's state: -which members are up or down, which member is primary, and the latency to each -:program:`mongod` instance. - -For any operation that targets a member *other* than the -:term:`primary`, the driver: - -#. Assembles a list of suitable members, taking into account member type - (i.e. secondary, primary, or all members.) - -#. Excludes members not matching the tag sets, if specified. - -#. Determines which suitable member is the closest to the - client in absolute terms. - -#. Builds a list of members that are within a defined ping distance (in - milliseconds) of the "absolute nearest" member. [#acceptable-secondary-latency]_ - -#. Selects a member from these hosts at random. The member receives the read operation. - -Once the application selects a member of the set to use for read -operations, the driver continues to use this connection for read -preference until the application specifies a new read preference or -something interrupts the connection. See :ref:`replica-set-read-preference-behavior-requests` -for more information. - -.. [#acceptable-secondary-latency] Applications can configure the - threshold used in this stage. The default "acceptable latency" is - 15 milliseconds, which you can override in the drivers with their own - ``secondaryAcceptableLatencyMS`` option. - For :program:`mongos` you can use the :option:`--localThreshold ` or - :setting:`localThreshold` runtime options to set this value. - -.. index:: read preference; sharding -.. index:: read preference; mongos -.. _replica-set-read-preference-behavior-sharding: -.. _replica-set-read-preference-behavior-mongos: - -Sharding and ``mongos`` -``````````````````````` - -.. versionchanged:: 2.2 - Before version 2.2, :program:`mongos` did not support the - :ref:`read preference mode semantics `. - -In most :term:`sharded clusters `, a :term:`replica set` -provides each shard where read preferences are also applicable. Read -operations in a sharded cluster, with regard to read preference, are -identical to unsharded replica sets. - -Unlike simple replica sets, in sharded clusters, all interactions with -the shards pass from the clients to the :program:`mongos` instances -that are actually connected to the set members. :program:`mongos` is -responsible for the application of the read preferences, which is -transparent to applications. - -There are no configuration changes required for full support of read -preference modes in sharded environments, as long as the -:program:`mongos` is at least version 2.2. All :program:`mongos` -maintain their own connection pool to the replica set members. As a -result: - -- A request without a specified preference has - :readmode:`primary`, the default, unless, the :program:`mongos` - reuses an existing connection that has a different mode set. - - Always explicitly set your read preference mode to prevent - confusion. - -- All :readmode:`nearest` and latency calculations reflect the - connection between the :program:`mongos` and the :program:`mongod` - instances, not the client and the :program:`mongod` instances. - - This produces the desired result, because all results must pass - through the :program:`mongos` before returning to the client. - -Database Commands -````````````````` - -Because some :term:`database commands ` read and -return data from the database, all of the official drivers support -full :ref:`read preference mode semantics ` -for the following commands: - -- :dbcommand:`group` -- :dbcommand:`mapReduce` [#inline-map-reduce]_ -- :dbcommand:`aggregate` -- :dbcommand:`collStats` -- :dbcommand:`dbStats` -- :dbcommand:`count` -- :dbcommand:`distinct` -- :dbcommand:`geoNear` -- :dbcommand:`geoSearch` -- :dbcommand:`geoWalk` - -.. [#inline-map-reduce] Only "inline" :dbcommand:`mapReduce` - operations that do not write data support read preference, - otherwise these operations must run on the :term:`primary` - members. - -:program:`mongos` currently does not route commands using read -preferences; clients send all commands to shards' primaries. See -:issue:`SERVER-7423`. - -Uses for non-Primary Read Preferences -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You must exercise care when specifying read preferences: modes other -than :readmode:`primary` can *and will* return stale data. These -secondary queries will not -include the most recent write operations to the replica set's -:term:`primary`. Nevertheless, there are several common use cases for -using non-:readmode:`primary` read preference modes: - -- Reporting and analytics workloads. - - Having these queries target a :term:`secondary` helps distribute - load and prevent these operations from affecting the main workload - of the primary. - - Also consider using :readmode:`secondary` in conjunction with a - direct connection to a :ref:`hidden member ` of the set. - -- Providing local reads for geographically distributed applications. - - If you have application servers in multiple data centers, you may - consider having a :ref:`geographically distributed replica set - ` and using a non primary - read preference or the :readmode:`nearest` to avoid network - latency. - -- Maintaining availability during a failover. - - Use :readmode:`primaryPreferred` if you want your application to do - consistent reads from the primary under normal circumstances, but to - allow stale reads from secondaries in an emergency. This provides a - "read-only mode" for your application during a failover. - -.. warning:: - - In some situations using :readmode:`secondaryPreferred` to - distribute read load to replica sets may carry significant - operational risk: if all secondaries are unavailable and your set - has enough :term:`arbiters ` to prevent the primary from - stepping down, then the primary will receive all traffic from - clients. - - For this reason, use :readmode:`secondary` to distribute read load - to replica sets, not :readmode:`secondaryPreferred`. - -Using read modes other than :readmode:`primary` and -:readmode:`primaryPreferred` to provide extra capacity is not in and -of itself justification for non-:readmode:`primary` in many -cases. Furthermore, :doc:`sharding ` increases read and -write capacity by distributing read and write operations across a -group of machines. diff --git a/source/applications/server-side-javascript.txt b/source/applications/server-side-javascript.txt deleted file mode 100644 index 867388bcc78..00000000000 --- a/source/applications/server-side-javascript.txt +++ /dev/null @@ -1,164 +0,0 @@ -====================== -Server-side JavaScript -====================== - -.. default-domain:: mongodb - -MongoDB supports server-side execution of JavaScript code within the -database process. - -.. note:: - - The JavaScript code execution takes a JavaScript lock: each - :program:`mongod` can only execute a single JavaScript operation at - a time. - -.. include:: /includes/fact-disable-javascript-with-noscript.rst - -.. _server-side-map-reduce: - -Map-Reduce ----------- - -MongoDB performs the execution of JavaScript functions for -:doc:`/applications/map-reduce` operations on the server. Within these -JavaScript functions, you must not access the database for any reason, -including to perform reads. - -See the :method:`db.collection.mapReduce()` and the -:doc:`/applications/map-reduce` documentation for more information, -including examples of map-reduce. See :ref:`map-reduce concurrency -` section for concurrency information for -map-reduce. - -.. _server-side-eval: - -``eval`` Command ----------------- - -The :dbcommand:`eval` command, and the corresponding :program:`mongo` -shell method :method:`db.eval()`, evaluates JavaScript functions on the -database server. This command may be useful if you need to touch a lot -of data lightly since the network transfer of the data could become a -bottleneck if performing these operations on the client-side. - -.. warning:: - - By default, :dbcommand:`eval` command requires a write lock. As - such :dbcommand:`eval` will block all other read and write - operations while it runs. Because only a single JavaScript process - can run at a time, *do not* run :dbcommand:`mapReduce`, - :dbcommand:`group`, queries with the :operator:`$where` or any - other operation that requires JavaScript execution within - :dbcommand:`eval` operations. - -See :dbcommand:`eval` command and :method:`db.eval()` documentation for -more information, including examples. - -.. _running-js-scripts-in-mongo-on-mongod-host: - -Running ``.js`` files via a ``mongo`` shell Instance on the Server ------------------------------------------------------------------- - -Running a JavaScript (``.js``) file using a :program:`mongo` shell -instance on the server is a good technique for performing batch -administrative work. When you run :program:`mongo` shell on the -server, connecting via the localhost interface, the connection is fast -with low latency. Additionally, this technique has the advantage over -the :dbcommand:`eval` command since the command :dbcommand:`eval` -blocks all other operations. - -.. _server-side-where: - -``$where`` Operator -------------------- - -To perform :doc:`/core/read-operations`, in addition to the standard -operators (e.g. :operator:`$gt`, :operator:`$lt`), with the -:operator:`$where` operator, you can also express the query condition -either as a string or a full JavaScript function that specifies a -SQL-like ``WHERE`` clause. However, use the standard operators whenever -possible since :operator:`$where` operations have significantly slower -performance. - -.. warning:: - - Do not write to the database within the :operator:`$where` - JavaScript function. - -See :operator:`$where` documentation for more information, including -examples. - -.. _storing-server-side-javascript: - -Storing Functions Server-side ------------------------------ - -.. note:: - - We do **not** recommend using server-side stored functions if - possible. - -There is a special system collection named ``system.js`` that can store -JavaScript functions for reuse. - -To store a function, you can use the :method:`db.collection.save()`, as -in the following example: - -.. code-block:: javascript - - db.system.js.save( - { - _id : "myAddFunction" , - value : function (x, y){ return x + y; } - } - ); - -- The ``_id`` field holds the name of the function and is unique per - database. - -- The ``value`` field holds the function definition - -Once you save a function in the ``system.js`` collection, you can use -the function from any JavaScript context (e.g. :ref:`eval -`, :ref:`$where `, -:ref:`map-reduce `). - -Consider the following example from the :program:`mongo` shell that -first saves a function named ``echoFunction`` to the ``system.js`` -collection and calls the function using :ref:`db.eval() -`: - -.. code-block:: javascript - - db.system.js.save( - { _id: "echoFunction", - value : function(x) { return x; } - } - ) - - db.eval( "echoFunction( 'test' )" ) - -See ``_ for a full example. - -.. versionadded:: 2.1 - In the :program:`mongo` shell, you can use - :method:`db.loadServerScripts()` to load all the scripts saved in - the ``system.js`` collection for the current db. Once loaded, you - can invoke the functions directly in the shell, as in the following - example: - -.. code-block:: javascript - - db.loadServerScripts(); - - echoFunction(3); - - myAddFunction(3, 5); - -Concurrency ------------ - -Refer to the individual method or operator documentation for any -concurrency information. See also the :ref:`concurrency table -`. diff --git a/source/contents.txt b/source/contents.txt index d305e2b5b05..b4bfc637a17 100644 --- a/source/contents.txt +++ b/source/contents.txt @@ -10,14 +10,13 @@ MongoDB Manual Contents security crud aggregation - applications/text-search indexes replication sharding applications mongo use-cases - tutorial faq reference + release-notes about diff --git a/source/applications/2d.txt b/source/core/2d.txt similarity index 98% rename from source/applications/2d.txt rename to source/core/2d.txt index 3a18e3bc660..470a306777a 100644 --- a/source/applications/2d.txt +++ b/source/core/2d.txt @@ -16,7 +16,7 @@ Use a ``2d`` index if: Do not use a ``2d`` index if your location data includes GeoJSON objects. To index on both legacy coordinate pairs *and* GeoJSON objects, use a -:doc:`2dsphere index `. +:doc:`2dsphere index `. The ``2d`` index supports calculations on a flat, Euclidean plane. The ``2d`` index also supports *distance-only* calculations on a sphere, but @@ -193,7 +193,7 @@ Points within a Circle Defined on a Sphere MongoDB supports rudimentary spherical queries on flat ``2d`` indexes for legacy reasons. In general, spherical calculations should use a ``2dsphere`` -index, as described in :doc:`/applications/2dsphere`. +index, as described in :doc:`/core/2dsphere`. To query for legacy coordinate pairs in a "spherical cap" on a sphere, use :operator:`$geoWithin` with the :operator:`$centerSphere` operator. diff --git a/source/applications/2dsphere.txt b/source/core/2dsphere.txt similarity index 100% rename from source/applications/2dsphere.txt rename to source/core/2dsphere.txt diff --git a/source/applications/aggregation.txt b/source/core/aggregation.txt similarity index 100% rename from source/applications/aggregation.txt rename to source/core/aggregation.txt diff --git a/source/core/backups.txt b/source/core/backups.txt new file mode 100644 index 00000000000..4c6c67c2e4c --- /dev/null +++ b/source/core/backups.txt @@ -0,0 +1,150 @@ +===================================== +Backup Strategies for MongoDB Systems +===================================== + +.. default-domain:: mongodb + +Backups are an important part of any operational disaster recovery +plan. A good backup plan must be able to capture data in +a consistent and usable state, and operators must be able to automate +both the backup and the recovery operations. Also test all components +of the backup system to ensure that you can recover backed up data as +needed. If you cannot effectively restore your database from the +backup, then your backups are useless. This document addresses +higher level backup strategies, for more information on specific +backup procedures consider the following documents: + +- :doc:`/tutorial/backup-databases-with-filesystem-snapshots`. +- :doc:`/tutorial/backup-databases-with-binary-database-dumps`. +- :doc:`/tutorial/backup-small-sharded-cluster-with-mongodump` +- :doc:`/tutorial/backup-sharded-cluster-with-filesystem-snapshots` +- :doc:`/tutorial/backup-sharded-cluster-with-database-dumps` +- :doc:`/tutorial/schedule-backup-window-for-sharded-clusters` +- :doc:`/tutorial/restore-single-shard` +- :doc:`/tutorial/restore-sharded-cluster` + +.. _backup-considerations: + +Backup Considerations +--------------------- + +As you develop a backup strategy for your MongoDB deployment consider +the following factors: + +- Geography. Ensure that you move some backups away from the your + primary database infrastructure. + +- System errors. Ensure that your backups can survive situations where + hardware failures or disk errors impact the integrity or + availability of your backups. + +- Production constraints. Backup operations themselves sometimes require + substantial system resources. It is important to consider the time of + the backup schedule relative to peak usage and maintenance windows. + +- System capabilities. Some of the block-level snapshot tools require + special support on the operating-system or infrastructure level. + +- Database configuration. :term:`Replication` and :term:`sharding + ` can affect the process and impact of the backup + implementation. See :ref:`sharded-cluster-backups` and + :ref:`replica-set-backups`. + +- Actual requirements. You may be able to save time, effort, and space + by including only crucial data in the most frequent backups and + backing up less crucial data less frequently. + +.. _backup-approaches: + +Approaches to Backing Up MongoDB Systems +---------------------------------------- + +There are two main methodologies for backing up MongoDB +instances. Creating binary "dumps" of the database using +:program:`mongodump` or creating filesystem level snapshots. Both +methodologies have advantages and disadvantages: + +- binary database dumps are comparatively small, because they don't + include index content or pre-allocated free space, and :ref:`record + padding `. However, it's impossible to + capture a copy of a running system that reflects a single moment in + time using a binary dump. + +- filesystem snapshots, sometimes called block level backups, produce + larger backup sizes, but complete quickly and can reflect a single + moment in time on a running system. However, snapshot systems + require filesystem and operating system support and tools. + +The best option depends on the requirements of your deployment and +disaster recovery needs. Typically, filesystem snapshots are because +of their accuracy and simplicity; however, :program:`mongodump` is a +viable option used often to generate backups of MongoDB systems. + +The following topics provide details and procedures on the two approaches: + +- :doc:`/tutorial/backup-databases-with-filesystem-snapshots`. +- :doc:`/tutorial/backup-databases-with-binary-database-dumps`. + +In some cases, taking backups is difficult or impossible because of +large data volumes, distributed architectures, and data transmission +speeds. In these situations, increase the number of members in your +replica set or sets. + +Backup Strategies for MongoDB Deployments +----------------------------------------- + +.. _sharded-cluster-backups: + +Sharded Cluster Backup Considerations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. include:: /includes/note-shard-cluster-backup.rst + +:term:`Sharded clusters ` complicate backup operations, +as distributed systems. True point-in-time backups are only possible +when stopping all write activity from the application. To create a +precise moment-in-time snapshot of a cluster, stop all application write +activity to the database, capture a backup, and allow only write +operations to the database after the backup is complete. + +However, you can capture a backup of a cluster that **approximates** a +point-in-time backup by capturing a backup from a secondary member of +the replica sets that provide the shards in the cluster at roughly the +same moment. If you decide to use an approximate-point-in-time backup +method, ensure that your application can operate using a copy of the +data that does not reflect a single moment in time. + +The following documents describe sharded cluster related backup +procedures: + +- :doc:`/tutorial/backup-small-sharded-cluster-with-mongodump` +- :doc:`/tutorial/backup-sharded-cluster-with-filesystem-snapshots` +- :doc:`/tutorial/backup-sharded-cluster-with-database-dumps` +- :doc:`/tutorial/schedule-backup-window-for-sharded-clusters` +- :doc:`/tutorial/restore-single-shard` +- :doc:`/tutorial/restore-sharded-cluster` + +.. _replica-set-backups: + +Replica Set Backup Considerations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In most cases, backing up data stored in a :term:`replica set` is +similar to backing up data stored in a single instance. It is possible +to lock a single :term:`secondary` database and then create a backup +from that instance. When you unlock the database, the secondary will +catch up with the :term:`primary`. You may also choose to deploy a +dedicated :term:`hidden member` for backup purposes. + +If you have a :term:`sharded cluster` where each :term:`shard` is itself +a replica set, you can use this method to create a backup of the entire +cluster without disrupting the operation of the node. In these +situations you should still turn off the balancer when you create +backups. + +For any cluster, using a non-primary node to create backups is +particularly advantageous in that the backup operation does not affect +the performance of the primary. Replication itself provides some measure +of redundancy. Nevertheless, keeping point-in time backups of your +cluster to provide for disaster recovery and as an additional layer of +protection is crucial. diff --git a/source/applications/create.txt b/source/core/create.txt similarity index 98% rename from source/applications/create.txt rename to source/core/create.txt index 4f6c52e136d..1eed7657010 100644 --- a/source/applications/create.txt +++ b/source/core/create.txt @@ -106,7 +106,7 @@ create this collection: } ) -You can confirm the insert by :doc:`querying ` +You can confirm the insert by :doc:`querying ` the ``bios`` collection: .. code-block:: javascript @@ -365,7 +365,7 @@ the ``_id`` field: The :method:`~db.collection.update()` operation in MongoDB accepts an "``upsert``" flag that modifies the behavior of :method:`~db.collection.update()` from :doc:`updating existing documents -`, to inserting data. +`, to inserting data. These :method:`~db.collection.update()` operations with the upsert flag eliminate the need to perform an additional operation to check @@ -374,10 +374,10 @@ insert operation. These update operations have the use ```` argument to determine the write operation: - If the query matches an existing document(s), the operation is an - :doc:`update `. + :doc:`update `. - If the query matches no document in the collection, the operation is - an :doc:`insert `. + an :doc:`insert `. An upsert operation has the following syntax: diff --git a/source/core/data-modeling.txt b/source/core/data-modeling.txt index b0ac40c9a25..0fe3fd12a2b 100644 --- a/source/core/data-modeling.txt +++ b/source/core/data-modeling.txt @@ -40,7 +40,7 @@ number of multi-factored decisions when modeling data, including: These decisions reflect the degree to which the data model should store related pieces of data in a single document. Fully normalized data models describe relationships using :doc:`references - ` between documents, while + ` between documents, while de-normalized models may store redundant information across related models. @@ -99,7 +99,7 @@ documents grow after creation. Document growth can impact write performance and lead to data fragmentation. Furthermore, documents in MongoDB must be smaller than the :limit:`maximum BSON document size `. For larger documents, consider using -:doc:`GridFS `. +:doc:`GridFS `. For examples in accessing embedded documents, see :ref:`read-operations-subdocuments`. @@ -120,7 +120,7 @@ Referencing ~~~~~~~~~~~ To normalize data, store :doc:`references -` between two documents to indicate +` between two documents to indicate a relationship between the data represented in each document. In general, use normalized data models: diff --git a/source/applications/delete.txt b/source/core/delete.txt similarity index 100% rename from source/applications/delete.txt rename to source/core/delete.txt diff --git a/source/core/document.txt b/source/core/document.txt index 5bde2f7da35..9f3be3c4b50 100644 --- a/source/core/document.txt +++ b/source/core/document.txt @@ -220,7 +220,7 @@ MongoDB documents: Consider the following options for the value of an ``_id`` field: -- Use an ``ObjectId``. See the :doc:`ObjectId ` +- Use an ``ObjectId``. See the :doc:`ObjectId ` documentation. Although it is common to assign ``ObjectId`` values to ``_id`` @@ -275,13 +275,13 @@ for MongoDB to return, remove, or update, as in the following: .. seealso:: - :ref:`read-operations-query-argument` and - :doc:`/applications/read` for more examples on selecting documents + :doc:`/core/read` for more examples on selecting documents for reads. - - :doc:`/applications/update` for more examples on + - :doc:`/core/update` for more examples on selecting documents for updates. - - :doc:`/applications/delete` for more examples on selecting + - :doc:`/core/delete` for more examples on selecting documents for deletes. .. _documents-update-actions: @@ -339,7 +339,7 @@ When passed as an argument to the :method:`update() - :ref:`update operators ` page for the available update operators and syntax. - - :doc:`update ` for more examples on + - :doc:`update ` for more examples on update documents. For additional examples of updates that involve array elements, @@ -442,7 +442,7 @@ ObjectId ObjectIds are: small, likely unique, fast to generate, and ordered. These values consists of 12-bytes, where the first 4-bytes is a timestamp that reflects the ObjectId's creation. Refer to the -:doc:`ObjectId ` documentation for more information. +:doc:`ObjectId ` documentation for more information. .. _document-bson-type-string: diff --git a/source/applications/geohaystack.txt b/source/core/geohaystack.txt similarity index 100% rename from source/applications/geohaystack.txt rename to source/core/geohaystack.txt diff --git a/source/applications/gridfs.txt b/source/core/gridfs.txt similarity index 58% rename from source/applications/gridfs.txt rename to source/core/gridfs.txt index 9a277ced768..01dfe13f5c4 100644 --- a/source/applications/gridfs.txt +++ b/source/core/gridfs.txt @@ -45,9 +45,6 @@ To store and retrieve files using :term:`GridFS`, use either of the following: - The :program:`mongofiles` command-line tool in the :program:`mongo` shell. See :doc:`/reference/mongofiles`. -.. index:: GridFS; collections -.. _gridfs-collections: - GridFS Collections ------------------ @@ -69,116 +66,12 @@ names prefixed by ``fs`` bucket: You can choose a different bucket name than ``fs``, and create multiple buckets in a single database. -.. index:: GridFS; chunks collection -.. _gridfs-chunks-collection: - -The ``chunks`` Collection -~~~~~~~~~~~~~~~~~~~~~~~~~ - Each document in the ``chunks`` collection represents a distinct chunk -of a file as represented in the :term:`GridFS` store. The following is a -prototype document from the ``chunks`` collection.: - -.. code-block:: javascript - - { - "_id" : , - "files_id" : , - "n" : , - "data" : - } - -A document from the ``chunks`` collection contains the following fields: - -.. data:: chunks._id - - The unique :term:`ObjectID` of the chunk. - -.. data:: chunks.files_id - - The ``_id`` of the "parent" document, as specified in the ``files`` - collection. - -.. data:: chunks.n - - The sequence number of the chunk. GridFS numbers all chunks, - starting with 0. - -.. data:: chunks.data - - The chunk's payload as a :term:`BSON` binary type. - -The ``chunks`` collection uses a :term:`compound index` on -``files_id`` and ``n``, as described in :ref:`gridfs-index`. - -.. index:: GridFS; files collection -.. _gridfs-files-collection: - -The ``files`` Collection -~~~~~~~~~~~~~~~~~~~~~~~~ - -Each document in the ``files`` collection represents a file in the -:term:`GridFS` store. Consider the following prototype of a document in -the ``files`` collection: - -.. code-block:: javascript - - { - "_id" : , - "length" : , - "chunkSize" : - "uploadDate" : - "md5" : - - "filename" : , - "contentType" : , - "aliases" : , - "metadata" : , - } - -Documents in the ``files`` collection contain some or all of the -following fields. Applications may create additional arbitrary fields: - -.. data:: files._id - - The unique ID for this document. The ``_id`` is of the data type you - chose for the original document. The default type for MongoDB - documents is :term:`BSON` :term:`ObjectID`. - -.. data:: files.length - - The size of the document in bytes. - -.. data:: files.chunkSize - - The size of each chunk. GridFS divides the document into chunks of - the size specified here. The default size is 256 kilobytes. - -.. data:: files.uploadDate - - The date the document was first stored by GridFS. This value has the - ``Date`` type. - -.. data:: files.md5 - - An MD5 hash returned from the filemd5 API. This value has the ``String`` - type. - -.. data:: files.filename - - Optional. A human-readable name for the document. - -.. data:: files.contentType - - Optional. A valid MIME type for the document. - -.. data:: files.aliases - - Optional. An array of alias strings. - -.. data:: files.metadata +of a file as represented in the GridFS store. Each chunk is identified +by its unique :term:`ObjectID` stored in its ``_id`` field. - Optional. Any additional information you want to store. +For descriptions of all fields in the ``chunks`` and ``files`` +collections, see :doc:`/reference/gridfs`. .. index:: GridFS; index .. _gridfs-index: @@ -187,8 +80,14 @@ GridFS Index ------------ :term:`GridFS` uses a :term:`unique `, :term:`compound -` index on the ``chunks`` collection for ``files_id`` -and ``n``. The index allows efficient retrieval of chunks using the +` index on the ``chunks`` collection for the +``files_id`` and ``n`` fields. The ``files_id`` field contains the +``_id`` of the chunk's "parent" document. The ``n`` field contains the +sequence number of the chunk. GridFS numbers all chunks, starting with +0. For descriptions of the documents and fields in the ``chunks`` +collection, see :doc:`/reference/gridfs`. + +The GridFS index allows efficient retrieval of chunks using the ``files_id`` and ``n`` values, as shown in the following example: .. code-block:: javascript diff --git a/source/administration/import-export.txt b/source/core/import-export.txt similarity index 97% rename from source/administration/import-export.txt rename to source/core/import-export.txt index 4fa9c95c44e..e29daf46179 100644 --- a/source/administration/import-export.txt +++ b/source/core/import-export.txt @@ -4,7 +4,7 @@ Importing and Exporting MongoDB Data .. default-domain:: mongodb -Full :doc:`database instance backups ` are +Full :doc:`database instance backups ` are useful for disaster recovery protection and routine database backup operation; however, some cases require additional import and export functionality. @@ -38,7 +38,7 @@ with the database itself. :doc:`/reference/mongodb-extended-json` for more information about MongoDB Extended JSON. -.. seealso:: See the ":doc:`/administration/backups`" document for +.. seealso:: See the ":doc:`/core/backups`" document for more information on backing up MongoDB instances. Additionally, consider the following references for commands addressed in this document: @@ -50,8 +50,8 @@ with the database itself. If you want to transform and process data once you've imported it in MongoDB consider the topics in :doc:`/aggregation`, including: - - :doc:`/applications/map-reduce` and - - :doc:`/applications/aggregation`. + - :doc:`/core/map-reduce` and + - :doc:`/core/aggregation`. .. _bson-json-type-conversion-fidelity: @@ -137,7 +137,7 @@ Data Import and Export and Backups Operations For resilient and non-disruptive backups, use a file system or block-level disk snapshot function, such as the -methods described in the ":doc:`/administration/backups`" document. The +methods described in the ":doc:`/core/backups`" document. The tools and operations discussed provide functionality that's useful in the context of providing some kinds of backups. diff --git a/source/core/indexes.txt b/source/core/indexes.txt index efea4ba9d31..af1548b919b 100644 --- a/source/core/indexes.txt +++ b/source/core/indexes.txt @@ -575,7 +575,7 @@ You specify index creation options in the second argument in :method:`ensureIndex() `. The options :ref:`sparse `, :ref:`unique -`, and :ref:`TTL ` affect the +`,and :ref:`TTL ` affect the kind of index that MongoDB creates. This section addresses, :ref:`background construction ` and :ref:`duplicate dropping `, which @@ -838,45 +838,17 @@ to bucket size. .. versionadded:: 2.4 -MongoDB provides ``text`` indexes to support :doc:`text search -` on a collection. You can only access the -``text`` index with the :dbcommand:`text` command. +MongoDB provides ``text`` indexes to support the search of string +content in documents of a collection. ``text`` indexes are +case-insensitive and can include any field that contains string data. +``text`` indexes drop language-specific stop words (e.g. in English, +"the," "an," "a," "and," etc.) and uses simple language-specific suffix +stemming. See :ref:`text-search-languages` for the supported languages. -``text`` indexes are case-insensitive and can include any field that -contains string data. ``text`` indexes drop language-specific stop -words (e.g. in English, "the," "an," "a," "and," etc.) and uses simple -language-specific suffix stemming. See :ref:`text-search-languages` for -the supported languages. +You can only access the ``text`` index with the :dbcommand:`text` +command. -``text`` indexes have the following storage requirements and -performance costs: - -- ``text`` indexes change the space allocation method for all future - record allocations in a collection to :collflag:`usePowerOf2Sizes`. - -- ``text`` indexes can be large. They contain one index entry for each - unique post-stemmed word in each indexed field for each document - inserted. - -- Building a ``text`` index is very similar to building a large - multi-key index and will take longer than building a simple ordered - (scalar) index on the same data. - -- When building a large ``text`` index on an existing collection, - ensure that you have a sufficiently high limit on open file - descriptors. See the :ref:`recommended settings `. - -- ``text`` indexes will impact insertion throughput because MongoDB - must add an index entry for each unique post-stemmed word in each - indexed field of each new source document. - -- Additionally, ``text`` indexes do not store phrases or information - about the proximity of words in the documents. As a result, phrase - queries will run much more effectively when the entire collection - fits in RAM. - -See :doc:`/applications/text-search` for more information on the text -search feature. +See :doc:`/core/text-search` for more information. .. index:: index; limitations .. _index-limitations: diff --git a/source/administration/journaling.txt b/source/core/journaling.txt similarity index 99% rename from source/administration/journaling.txt rename to source/core/journaling.txt index bf87d783208..dee82536812 100644 --- a/source/administration/journaling.txt +++ b/source/core/journaling.txt @@ -15,7 +15,7 @@ operation and maintain a consistent state. *Without* a journal, if :program:`mongod` exits unexpectedly, you must assume your data is in an inconsistent state, and you must run either :doc:`repair ` -or, preferably, :ref:`resync ` from a +or, preferably, :doc:`resync ` from a clean member of the replica set. With journaling enabled, if :program:`mongod` stops unexpectedly, diff --git a/source/core/map-reduce.txt b/source/core/map-reduce.txt new file mode 100644 index 00000000000..adf8ca25541 --- /dev/null +++ b/source/core/map-reduce.txt @@ -0,0 +1,159 @@ +========== +Map-Reduce +========== + +.. default-domain:: mongodb + +Map-reduce operations can handle complex aggregation tasks. To perform +map-reduce operations, MongoDB provides the :dbcommand:`mapReduce` +command and, in the :program:`mongo` shell, the +:method:`db.collection.mapReduce()` wrapper method. + +Examples +-------- + +For examples of map-reduce, see + +.. toctree:: + :maxdepth: 1 + + /tutorial/map-reduce-examples + /tutorial/perform-incremental-map-reduce + +For many simple aggregation tasks, see the :doc:`aggregation framework +`. + +.. _map-reduce-temporary-collection: + +Temporary Collection +-------------------- + +The map-reduce operation uses a temporary collection during processing. +At completion, the map-reduce operation renames the temporary +collection. As a result, you can perform a map-reduce operation +periodically with the same target collection name without affecting +the intermediate states. Use this mode when +generating statistical output collections on a regular basis. + +.. _map-reduce-concurrency: + +Concurrency +----------- + +The map-reduce operation is composed of many tasks, including: + +- reads from the input collection, + +- executions of the ``map`` function, + +- executions of the ``reduce`` function, + +- writes to the output collection. + +These various tasks take the following locks: + +- The read phase takes a read lock. It yields every 100 documents. + +- The insert into the temporary collection takes a write lock for a + single write. + + If the output collection does not exist, the creation of the output + collection takes a write lock. + + If the output collection exists, then the output actions (i.e. + ``merge``, ``replace``, ``reduce``) take a write lock. + +.. versionchanged:: 2.4 + The V8 JavaScript engine, which became the default in 2.4, allows + multiple JavaScript operations to execute at the same time. Prior to + 2.4, JavaScript code (i.e. ``map``, ``reduce``, ``finalize`` + functions) executed in a single thread. + +.. note:: + + The final write lock during post-processing makes the results appear + atomically. However, output actions ``merge`` and ``reduce`` may + take minutes to process. For the ``merge`` and ``reduce``, the + ``nonAtomic`` flag is available. See the + :method:`db.collection.mapReduce()` reference for more information. + +.. _map-reduce-sharded-cluster: + +Sharded Cluster +--------------- + +Sharded Input +~~~~~~~~~~~~~ + +When using sharded collection as the input for a map-reduce operation, +:program:`mongos` will automatically dispatch the map-reduce job to +each shard in parallel. There is no special option +required. :program:`mongos` will wait for jobs on all shards to +finish. + +Sharded Output +~~~~~~~~~~~~~~ + +By default the output collection is not sharded. The process is: + +- :program:`mongos` dispatches a map-reduce finish job to the shard + that will store the target collection. + +- The target shard pulls results from all other shards, and runs a + final reduce/finalize operation, and write to the output. + +- If using the ``sharded`` option to the ``out`` parameter, MongoDB + shards the output using ``_id`` field as the shard key. + + .. versionchanged:: 2.2 + +- If the output collection does not exist, MongoDB creates and shards + the collection on the ``_id`` field. If the collection is empty, + MongoDB creates :term:`chunks ` using the result of the first + stage of the map-reduce operation. + +- :program:`mongos` dispatches, in parallel, a map-reduce finish job + to every shard that owns a chunk. + +- Each shard will pull the results it owns from all other shards, run a + final reduce/finalize, and write to the output collection. + +.. note:: + + - During later map-reduce jobs, MongoDB splits chunks as needed. + + - Balancing of chunks for the output collection is automatically + prevented during post-processing to avoid concurrency issues. + +In MongoDB 2.0: + +- :program:`mongos` retrieves the results from each shard, and + performs merge sort to order the results, and performs a reduce/finalize as + needed. :program:`mongos` then writes the result to the output + collection in sharded mode. + +- This model requires only a small amount of memory, even for large datasets. + +- Shard chunks are not automatically split during insertion. This + requires manual intervention until the chunks are granular and + balanced. + +.. warning:: + + For best results, only use the sharded output options for + :dbcommand:`mapReduce` in version 2.2 or later. + +.. _map-reduce-troubleshooting: + +Troubleshooting Map-Reduce Operations +------------------------------------- + +You can troubleshoot the ``map`` function and the ``reduce`` function +in the :program:`mongo` shell. See the following tutorials for more +information: + +.. toctree:: + :maxdepth: 1 + + /tutorial/troubleshoot-map-function + /tutorial/troubleshoot-reduce-function diff --git a/source/administration/master-slave.txt b/source/core/master-slave.txt similarity index 100% rename from source/administration/master-slave.txt rename to source/core/master-slave.txt diff --git a/source/administration/operational-segregation.txt b/source/core/operational-segregation.txt similarity index 96% rename from source/administration/operational-segregation.txt rename to source/core/operational-segregation.txt index a086cfd586b..65a94437837 100644 --- a/source/administration/operational-segregation.txt +++ b/source/core/operational-segregation.txt @@ -50,10 +50,10 @@ Specifically, with MongoDB, you can: For full documentation of these features, see the following documentation in the MongoDB Manual: -- :ref:`Read Preferences `, which controls how drivers +- :doc:`Read Preferences `, which controls how drivers help applications target read operations to members of a replica set. -- :ref:`Write Concerns `, which controls +- :doc:`Write Concerns `, which controls how MongoDB ensures that write operations propagate to members of a replica set. diff --git a/source/core/read-operations.txt b/source/core/read-operations.txt index aefc8ca9c08..6d7ca5c0d6f 100644 --- a/source/core/read-operations.txt +++ b/source/core/read-operations.txt @@ -90,7 +90,7 @@ syntax: db.collection.findOne( , ) For additional documentation and examples of the main MongoDB read -operators, refer to the :doc:`/applications/read` page of the +operators, refer to the :doc:`/core/read` page of the :doc:`/crud` section. .. [#formal-query-structure] :method:`db.collection.find()` is a @@ -974,7 +974,7 @@ operations for more basic data aggregation operations: - :dbcommand:`mapReduce`. (Also consider :method:`~db.collection.mapReduce()` and - :doc:`/applications/map-reduce`.) + :doc:`/core/map-reduce`.) .. index:: read operation; architecture .. _read-operations-architecture: @@ -1055,6 +1055,6 @@ applications don't rely on this kind of strict consistency, but application developers should always consider the needs of their application before setting read preference. -For more information on :ref:`read preferences ` or -on the read preference modes, see :ref:`read-preference` and +For more information on read preference or on the read preference +modes, see :doc:`/core/read-preference` and :ref:`replica-set-read-preference-modes`. diff --git a/source/core/read-preference.txt b/source/core/read-preference.txt new file mode 100644 index 00000000000..44883affd83 --- /dev/null +++ b/source/core/read-preference.txt @@ -0,0 +1,581 @@ +=============== +Read Preference +=============== + +.. default-domain:: mongodb + +Read preference describes how MongoDB clients route read operations to +members of a :term:`replica set`. + +.. index:: read preference +.. index:: slaveOk +.. index:: read preference; background +.. _replica-set-read-preference: +.. _replica-set-read-preference-background: + +Background +~~~~~~~~~~ + +By default, an application directs its read operations to the :term:`primary` +member in a :term:`replica set`. Reading from the primary guarantees that +read operations reflect the latest version of a document. However, +for an application that does not require fully up-to-date data, you +can improve read throughput, or reduce latency, by distributing some or all +reads to secondary members of the replica set. + +The following are use cases where you might use secondary reads: + +- Running systems operations that do not affect the front-end + application, operations such as backups and reports. + +- Providing low-latency queries for geographically distributed + deployments. If one secondary is closer to an application server + than the primary, you may see better + performance for that application if you use secondary reads. + +- Providing graceful degradation in :ref:`failover + ` situations where a set has *no* primary for 10 + seconds or more. In this use case, you should give the application the + :readmode:`primaryPreferred` read preference, which prevents the + application from performing reads if the set has no primary. + +MongoDB :term:`drivers ` allow client applications to configure +a :term:`read preference` on a per-connection, per-collection, or +per-operation basis. For more information about secondary read +operations in the :program:`mongo` shell, see the :method:`readPref() ` +method. For more information about a driver's read preference +configuration, see the appropriate :ref:`driver` API documentation. + +.. note:: + + Read preferences affect how an application selects which member + to use for read operations. As a result read + preferences dictate if the application receives stale or + current data from MongoDB. Use appropriate :term:`write concern` + policies to ensure proper data + replication and consistency. + + If read operations account for a large percentage of your + application's traffic, distributing reads to secondary members can + improve read throughput. However, in most cases :doc:`sharding + ` provides better support for larger scale + operations, as clusters can distribute read and write + operations across a group of machines. + +.. index:: read preference; semantics +.. _replica-set-read-preference-semantics: +.. index:: read preference; modes +.. _replica-set-read-preference-modes: + +Read Preference Modes +--------------------- + +.. versionadded:: 2.2 + +MongoDB :doc:`drivers ` support five +read preference modes: + +- :readmode:`primary` +- :readmode:`primaryPreferred` +- :readmode:`secondary` +- :readmode:`secondaryPreferred` +- :readmode:`nearest` + +You can specify a read preference mode on connection objects, database object, +collection object, or per-operation. The syntax for specifying the read +preference mode is :api:`specific to the driver and to the idioms of the host language <>`. + +Read preference modes are also available to clients connecting to a +:term:`sharded cluster` through a :program:`mongos`. The +:program:`mongos` instance obeys specified read preferences when +connecting to the :term:`replica set` that provides each :term:`shard` +in the cluster. + +In the :program:`mongo` shell, the +:method:`readPref() ` cursor method +provides access to read preferences. + +.. warning:: + + All read preference modes except :readmode:`primary` may return + stale data as :term:`secondaries ` replicate operations + from the primary with some delay. + + Ensure that your application can tolerate stale data if you choose + to use a non-:readmode:`primary` mode. + +For more information, see :ref:`read preference background +` and :ref:`read preference +behavior `. See also the +:api:`documentation for your driver <>`. + +.. readmode:: primary + + All read operations use only the current replica set :term:`primary`. + This is the default. If the primary is unavailable, + read operations produce an error or throw an exception. + + The :readmode:`primary` read preference mode is not compatible with + read preference modes that use :ref:`tag sets + `. If you specify a tag set + with :readmode:`primary`, the driver will produce an error. + +.. readmode:: primaryPreferred + + In most situations, operations read from the :term:`primary` member + of the set. However, if the primary is unavailable, as is the case + during :term:`failover` situations, operations read from secondary + members. + + When the read preference includes a :ref:`tag set + `, the client reads first from + the primary, if available, and then from :term:`secondaries + ` that match the specified tags. If no secondaries have + matching tags, the read operation produces an error. + + Since the application may receive data from a secondary, read + operations using the :readmode:`primaryPreferred` mode may return + stale data in some situations. + + .. warning:: + + .. versionchanged:: 2.2 + :program:`mongos` added full support for read preferences. + + When connecting to a :program:`mongos` instance older than 2.2, + using a client that supports read preference modes, + :readmode:`primaryPreferred` will send queries to secondaries. + +.. readmode:: secondary + + Operations read *only* from the :term:`secondary` members of the set. + If no secondaries are available, then this read operation produces an + error or exception. + + Most sets have at least one secondary, but there are situations + where there may be no available secondary. For example, a set + with a primary, a secondary, and an :term:`arbiter` may not have + any secondaries if a member is in recovering state or unavailable. + + When the read preference includes a :ref:`tag set + `, the client attempts to find + secondary members that match the specified tag set and directs reads + to a random secondary from among the :ref:`nearest group + `. If no secondaries + have matching tags, the read operation produces an error. [#capacity-planning]_ + + Read operations using the :readmode:`secondary` mode may return stale data. + +.. readmode:: secondaryPreferred + + In most situations, operations read from :term:`secondary` members, + but in situations where the set consists of a single + :term:`primary` (and no other members,) the read operation will use + the set's primary. + + When the read preference includes a :ref:`tag set + `, the client attempts to find + a secondary member that matches the specified tag set and directs + reads to a random secondary from among the :ref:`nearest group + `. If no secondaries + have matching tags, the read operation produces an error. + + Read operations using the :readmode:`secondaryPreferred` mode may return stale data. + +.. readmode:: nearest + + The driver reads from the *nearest* member of the :term:`set ` according to the :ref:`member selection + ` process. Reads in + the :readmode:`nearest` mode do not consider the member's + *type*. Reads in :readmode:`nearest` mode may read from both + primaries and secondaries. + + Set this mode to minimize the effect of network latency + on read operations without preference for current or stale data. + + If you specify a :ref:`tag set `, + the client attempts to find a replica set member that matches the + specified tag set and directs reads to an arbitrary member from + among the :ref:`nearest group `. + + Read operations using the :readmode:`nearest` mode may return stale data. + + .. note:: + + All operations read from a member of the nearest group of the + replica set that matches the specified read preference mode. The + :readmode:`nearest` mode prefers low latency reads over a + member's :term:`primary` or :term:`secondary` status. + + For :readmode:`nearest`, the client assembles a list of + acceptable hosts based on tag set and then narrows that list to + the host with the shortest ping time and all other members of + the set that are within the "local threshold," or acceptable + latency. See :ref:`replica-set-read-preference-behavior-nearest` + for more information. + + .. For I/O-bound users who want to distribute reads across all + members evenly regardless of ping time, set + secondaryAcceptableLatencyMS very high. + +.. The :method:`readPreference() ` reference + above will error until DOCS-364 is complete. + +.. [#capacity-planning] If your set has more than one secondary, and + you use the :readmode:`secondary` read preference mode, consider + the following effect. If you have a :ref:`three member replica set + ` with a primary and two secondaries, + and if one secondary becomes unavailable, all :readmode:`secondary` + queries must target the remaining secondary. This will double the + load on this secondary. Plan and provide capacity to support this + as needed. + +.. index:: tag sets +.. index:: read preference; tag sets +.. _replica-set-read-preference-tag-sets: + +Tag Sets +-------- + +Tag sets allow you to specify custom :doc:`read preferences +` +and :ref:`write concerns ` +so that your application can target +operations to specific members, based on custom parameters. + +.. note:: + + Consider the following properties of read preferences: + + - Custom read preferences and write concerns evaluate tags sets in + different ways. + + - Read preferences consider the value of a tag when selecting a + member to read from. + + - Write concerns ignore the value of a tag to when selecting a + member *except* to consider whether or not the value is unique. + +A tag set for a read operation may resemble the following document: + +.. code-block:: javascript + + { "disk": "ssd", "use": "reporting" } + +To fulfill the request, a member would need to have both of these tags. +Therefore the following tag sets, would satisfy this +requirement: + +.. code-block:: javascript + + { "disk": "ssd", "use": "reporting" } + { "disk": "ssd", "use": "reporting", "rack": 1 } + { "disk": "ssd", "use": "reporting", "rack": 4 } + { "disk": "ssd", "use": "reporting", "mem": "64"} + +However, the following tag sets would *not* be able to fulfill this query: + +.. code-block:: javascript + + { "disk": "ssd" } + { "use": "reporting" } + { "disk": "ssd", "use": "production" } + { "disk": "ssd", "use": "production", "rack": 3 } + { "disk": "spinning", "use": "reporting", "mem": "32" } + +Therefore, tag sets make it possible to ensure that read operations +target specific members in a particular data center or +:program:`mongod` instances designated for a particular class of +operations, such as reporting or analytics. +For information on configuring tag sets, see +:ref:`replica-set-configuration-tag-sets` in the +:doc:`/reference/replica-configuration` document. +You can specify tag sets with the following read preference modes: + +- :readmode:`primaryPreferred` +- :readmode:`secondary` +- :readmode:`secondaryPreferred` +- :readmode:`nearest` + +You cannot specify tag sets with the :readmode:`primary` read preference mode. + +Tags are not compatible with :readmode:`primary` and only apply when +:ref:`selecting ` +a :term:`secondary` member of a set for a read operation. However, the +:readmode:`nearest` read mode, when combined with a tag set will +select the nearest member that matches the specified tag set, which +may be a primary or secondary. + +All interfaces use the same :ref:`member selection logic +` to choose the +member to which to direct read operations, basing the choice on read +preference mode and tag sets. + +For more information on how read preference :ref:`modes +` interact with tag sets, see the +documentation for each read preference mode. + +.. index:: read preference; behavior +.. _replica-set-read-preference-behavior: + +Behavior +-------- + +.. versionchanged:: 2.2 + +.. _replica-set-read-preference-behavior-retry: + +Auto-Retry +~~~~~~~~~~ + +Connection between MongoDB drivers and :program:`mongod` instances in +a :term:`replica set` must balance two concerns: + +#. The client should attempt to prefer current results, and any + connection should read from the same member of the replica set as + much as possible. + +#. The client should minimize the amount of time that the database is + inaccessible as the result of a connection issue, networking + problem, or :term:`failover` in a replica set. + +As a result, MongoDB drivers and :program:`mongos`: + +- Reuse a connection to specific :program:`mongod` for as long as + possible after establishing a connection to that instance. This + connection is *pinned* to this :program:`mongod`. + +- Attempt to reconnect to a new member, obeying existing :ref:`read + preference modes `, if the connection + to :program:`mongod` is lost. + + Reconnections are transparent to the application itself. If + the connection permits reads from :term:`secondary` members, after + reconnecting, the application can receive two sequential reads + returning from different secondaries. Depending on the state of the + individual secondary member's replication, the documents can reflect + the state of your database at different moments. + +- Return an error *only* after attempting to connect to three members + of the set that match the :ref:`read preference mode ` + and :ref:`tag set `. + If there are fewer than three members of the set, the + client will error after connecting to all existing members of the + set. + + After this error, the driver selects a new member using the + specified read preference mode. In the absence of a specified read + preference, the driver uses :readmode:`primary`. + +- After detecting a failover situation, [#fn-failover]_ the driver + attempts to refresh the state of the replica set as quickly as + possible. + +.. [#fn-failover] When a :term:`failover` occurs, all members of the set + close all client connections that produce a socket error in the + driver. This behavior prevents or minimizes :term:`rollback`. + +.. _replica-set-read-preference-behavior-requests: + +Request Association +~~~~~~~~~~~~~~~~~~~ + +Reads from :term:`secondary` may reflect the state of the data set at +different points in time because :term:`secondary` members of a +:term:`replica set` may lag behind the current state of the primary by +different amounts. To prevent subsequent reads from jumping around in +time, the driver can associate application threads to a specific member +of the set after the first read. The thread will continue to read from +the same member until: + +- The application performs a read with a different read preference. + +- The thread terminates. + +- The client receives a socket exception, as is + the case when there's a network error or when + the :program:`mongod` closes connections during a :term:`failover`. + This triggers a :ref:`retry + `, which may be + transparent to the application. + +If an application thread issues a query with the +:readmode:`primaryPreferred` mode while the primary is inaccessible, +the thread will carry the association with that secondary for the +lifetime of the thread. The thread will associate with the primary, if +available, only after issuing a query with a different read +preference, even if a primary becomes available. By extension, if a +thread issues a read with the :readmode:`secondaryPreferred` when all +secondaries are down, it will carry an association with the +primary. This application thread will continue to read from the +primary even if a secondary becomes available later in the thread's +lifetime. + +.. index:: read preference; ping time +.. index:: read preference; nearest +.. index:: read preference; member selection +.. _replica-set-read-preference-behavior-ping-time: +.. _replica-set-read-preference-behavior-nearest: +.. _replica-set-read-preference-behavior-member-selection: + +Member Selection +~~~~~~~~~~~~~~~~ + +Clients, by way of their drivers, and :program:`mongos` instances for +sharded clusters periodically update their view of the replica set's state: +which members are up or down, which member is primary, and the latency to each +:program:`mongod` instance. + +For any operation that targets a member *other* than the +:term:`primary`, the driver: + +#. Assembles a list of suitable members, taking into account member type + (i.e. secondary, primary, or all members.) + +#. Excludes members not matching the tag sets, if specified. + +#. Determines which suitable member is the closest to the + client in absolute terms. + +#. Builds a list of members that are within a defined ping distance (in + milliseconds) of the "absolute nearest" member. [#acceptable-secondary-latency]_ + +#. Selects a member from these hosts at random. The member receives the read operation. + +Once the application selects a member of the set to use for read +operations, the driver continues to use this connection for read +preference until the application specifies a new read preference or +something interrupts the connection. See :ref:`replica-set-read-preference-behavior-requests` +for more information. + +.. [#acceptable-secondary-latency] Applications can configure the + threshold used in this stage. The default "acceptable latency" is + 15 milliseconds, which you can override in the drivers with their own + ``secondaryAcceptableLatencyMS`` option. + For :program:`mongos` you can use the :option:`--localThreshold ` or + :setting:`localThreshold` runtime options to set this value. + +.. index:: read preference; sharding +.. index:: read preference; mongos +.. _replica-set-read-preference-behavior-sharding: +.. _replica-set-read-preference-behavior-mongos: + +Sharding and ``mongos`` +~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionchanged:: 2.2 + Before version 2.2, :program:`mongos` did not support the + :ref:`read preference mode semantics `. + +In most :term:`sharded clusters `, a :term:`replica set` +provides each shard where read preferences are also applicable. Read +operations in a sharded cluster, with regard to read preference, are +identical to unsharded replica sets. + +Unlike simple replica sets, in sharded clusters, all interactions with +the shards pass from the clients to the :program:`mongos` instances +that are actually connected to the set members. :program:`mongos` is +responsible for the application of the read preferences, which is +transparent to applications. + +There are no configuration changes required for full support of read +preference modes in sharded environments, as long as the +:program:`mongos` is at least version 2.2. All :program:`mongos` +maintain their own connection pool to the replica set members. As a +result: + +- A request without a specified preference has + :readmode:`primary`, the default, unless, the :program:`mongos` + reuses an existing connection that has a different mode set. + + Always explicitly set your read preference mode to prevent + confusion. + +- All :readmode:`nearest` and latency calculations reflect the + connection between the :program:`mongos` and the :program:`mongod` + instances, not the client and the :program:`mongod` instances. + + This produces the desired result, because all results must pass + through the :program:`mongos` before returning to the client. + +Database Commands +~~~~~~~~~~~~~~~~~ + +Because some :term:`database commands ` read and +return data from the database, all of the official drivers support +full :ref:`read preference mode semantics ` +for the following commands: + +- :dbcommand:`group` +- :dbcommand:`mapReduce` [#inline-map-reduce]_ +- :dbcommand:`aggregate` +- :dbcommand:`collStats` +- :dbcommand:`dbStats` +- :dbcommand:`count` +- :dbcommand:`distinct` +- :dbcommand:`geoNear` +- :dbcommand:`geoSearch` +- :dbcommand:`geoWalk` + +.. [#inline-map-reduce] Only "inline" :dbcommand:`mapReduce` + operations that do not write data support read preference, + otherwise these operations must run on the :term:`primary` + members. + +:program:`mongos` currently does not route commands using read +preferences; clients send all commands to shards' primaries. See +:issue:`SERVER-7423`. + +Uses for non-Primary Read Preferences +------------------------------------- + +You must exercise care when specifying read preferences: modes other +than :readmode:`primary` can *and will* return stale data. These +secondary queries will not +include the most recent write operations to the replica set's +:term:`primary`. Nevertheless, there are several common use cases for +using non-:readmode:`primary` read preference modes: + +- Reporting and analytics workloads. + + Having these queries target a :term:`secondary` helps distribute + load and prevent these operations from affecting the main workload + of the primary. + + Also consider using :readmode:`secondary` in conjunction with a + direct connection to a :ref:`hidden member ` of the set. + +- Providing local reads for geographically distributed applications. + + If you have application servers in multiple data centers, you may + consider having a :ref:`geographically distributed replica set + ` and using a non primary + read preference or the :readmode:`nearest` to avoid network + latency. + +- Maintaining availability during a failover. + + Use :readmode:`primaryPreferred` if you want your application to do + consistent reads from the primary under normal circumstances, but to + allow stale reads from secondaries in an emergency. This provides a + "read-only mode" for your application during a failover. + +.. warning:: + + In some situations using :readmode:`secondaryPreferred` to + distribute read load to replica sets may carry significant + operational risk: if all secondaries are unavailable and your set + has enough :term:`arbiters ` to prevent the primary from + stepping down, then the primary will receive all traffic from + clients. + + For this reason, use :readmode:`secondary` to distribute read load + to replica sets, not :readmode:`secondaryPreferred`. + +Using read modes other than :readmode:`primary` and +:readmode:`primaryPreferred` to provide extra capacity is not in and +of itself justification for non-:readmode:`primary` in many +cases. Furthermore, :doc:`sharding ` increases read and +write capacity by distributing read and write operations across a +group of machines. diff --git a/source/applications/read.txt b/source/core/read.txt similarity index 100% rename from source/applications/read.txt rename to source/core/read.txt diff --git a/source/administration/replica-set-architectures.txt b/source/core/replica-set-architectures.txt similarity index 98% rename from source/administration/replica-set-architectures.txt rename to source/core/replica-set-architectures.txt index fea5fede290..3b77892e700 100644 --- a/source/administration/replica-set-architectures.txt +++ b/source/core/replica-set-architectures.txt @@ -174,7 +174,7 @@ backup directly from the secondary member. In these cases, use the :option:`--oplog ` option to ensure a consistent point-in-time dump of the database state. -.. seealso:: :doc:`/administration/backups`. +.. seealso:: :doc:`/core/backups`. .. _replica-set-delayed-replication: @@ -255,5 +255,4 @@ environments, if you have just two members, deploy an arbiter. Also, for *any replica set with an even number of members*, deploy an arbiter. -To deploy an arbiter, see the :ref:`replica-set-arbiters` topic in the -:doc:`/administration/replica-sets` document. +To deploy an arbiter, see the :doc:`/tutorial/add-replica-set-arbiter`. diff --git a/source/core/replication-internals.txt b/source/core/replication-internals.txt index 52d8bacfddb..4537c43d276 100644 --- a/source/core/replication-internals.txt +++ b/source/core/replication-internals.txt @@ -59,7 +59,7 @@ Read Preference Internals MongoDB uses :term:`single-master replication` to ensure that the database remains consistent. However, clients may modify the -:ref:`read preferences ` on a +:doc:`read preferences ` on a per-connection basis in order to distribute read operations to the :term:`secondary` members of a :term:`replica set`. Read-heavy deployments may achieve greater query throughput by distributing reads to secondary members. But @@ -106,11 +106,10 @@ configurations that affect membership behavior: In almost every case, replica sets simplify the process of administering database replication. However, replica sets still have a unique set of administrative requirements and concerns. Choosing the -right :doc:`system architecture ` +right :doc:`system architecture ` for your data set is crucial. -.. seealso:: The :ref:`replica-set-member-configurations` topic in the - :doc:`/administration/replica-sets` document. +.. seealso:: :ref:`replica-set-member-configuration` .. index:: replica set; security @@ -210,7 +209,8 @@ aware of the following conditions and possible situations: the set. The absolute value of priorities does not have any impact on the outcome of replica set elections, with the exception of the value ``0``, which indicates the member cannot become primary and cannot seek election. - For details, see :ref:`replica-set-node-priority-configuration`. + For details, see + :doc:`/tutorial/configure-a-non-voting-replica-set-member`. - A replica set member cannot become primary *unless* it has the highest "optime" of any visible member in the set. @@ -222,7 +222,7 @@ aware of the following conditions and possible situations: .. seealso:: :ref:`Non-voting members in a replica set `, - :ref:`replica-set-node-priority-configuration`, and + :doc:`/tutorial/configure-a-non-voting-replica-set-member`, and :data:`replica configuration `. .. [#imply-secondary-only] Remember that :ref:`hidden diff --git a/source/core/replication.txt b/source/core/replication.txt index d9868d1af96..6d21cf33261 100644 --- a/source/core/replication.txt +++ b/source/core/replication.txt @@ -44,51 +44,179 @@ votes. For information regarding non-voting members, see need to use master/slave replication. .. index:: configuration; replica set members +.. _replica-set-member-configuration: -Member Configuration Properties -------------------------------- +Member Configurations +--------------------- You can configure replica set members in a variety of ways, as listed here. In most cases, members of a replica set have the default proprieties. -- **Secondary-Only**: These members have data but cannot become - primary under any circumstance. See - :ref:`replica-set-secondary-only-members`. +.. index:: replica set members; secondary only +.. _replica-set-secondary-only-configuration: +.. _replica-set-secondary-only-members: -- **Hidden**: These members are invisible to client - applications. See :ref:`replica-set-hidden-members`. +Secondary-Only Members +~~~~~~~~~~~~~~~~~~~~~~ + +These members have data but cannot become primary under any +circumstance. To configure a member to be secondary-only, see +:doc:`/tutorial/configure-secondary-only-replica-set-member`. + +.. index:: replica set members; hidden +.. _replica-set-hidden-configuration: +.. _replica-set-hidden-members: + +Hidden Members +~~~~~~~~~~~~~~ + +These members cannot become primary and are invisible to client +applications. *However,* hidden members **do** vote in :ref:`elections +`. + +Hidden members are ideal for instances that will have significantly +different usage patterns than the other members and require separation +from normal traffic. Typically, hidden members provide reporting, +dedicated backups, and dedicated read-only testing and integration +support. + +To configure a member to be a hidden member, see +:doc:`/tutorial/configure-a-hidden-replica-set-member`. + +.. index:: replica set members; delayed +.. _replica-set-delayed-configuration: +.. _replica-set-delayed-members: + +Delayed Members +~~~~~~~~~~~~~~~ + +Delayed members copy and apply operations from the primary's :term:`oplog` with +a specified delay. If a member has a delay of one hour, then +the latest entry in this member's oplog will not be more recent than +one hour old, and the state of data for the member will reflect the state of the +set an hour earlier. + +.. example:: If the current time is 09:52 and the secondary is a + delayed by an hour, no operation will be more recent than 08:52. + +Delayed members may help recover from various kinds of human error. Such +errors may include inadvertently deleted databases or botched +application upgrades. Consider the following factors when determining +the amount of slave delay to apply: -- **Delayed**: These members apply operations from the - primary's :term:`oplog` after a specified delay. You can think of a - delayed member as a form of "rolling backup." - See :ref:`replica-set-delayed-members`. +- Ensure that the length of the delay is equal to or greater than your + maintenance windows. -- **Arbiters**: These members have no data and exist solely to - participate in :ref:`elections `. See - :ref:`replica-set-arbiters`. +- The size of the oplog is sufficient to capture *more than* the + number of operations that typically occur in that period of + time. For more information on oplog size, see the + :ref:`replica-set-oplog-sizing` topic in the :doc:`/core/replication` document. -- **Non-Voting**: These members do not vote in elections. Non-voting - members are only used for larger sets with more than 12 members. See - :ref:`replica-set-non-voting-members`. +Delayed members must have a :term:`priority` set to ``0`` to prevent +them from becoming primary in their replica sets. Also these members +should be :ref:`hidden ` to prevent your +application from seeing or querying this member. -For more information about each member configuration, see the -:ref:`replica-set-member-configurations` section in the -:doc:`/administration/replica-sets` document. +To configure a member to be a delayed member, see +:doc:`/tutorial/configure-a-delayed-replica-set-member`. +.. index:: replica set members; arbiters +.. _replica-set-arbiter-configuration: +.. _replica-set-arbiters: + +Arbiters +~~~~~~~~ + +These members have no data and exist solely to participate in +:ref:`elections `. Arbiters have the following +interactions with the rest of the replica set: + +- Credential exchanges that authenticate the arbiter with the replica + set. All MongoDB processes within a replica set use keyfiles. These + exchanges are encrypted. + + MongoDB only transmits the authentication credentials in a + cryptographically secure exchange, and encrypts no other exchange. + +- Exchanges of replica set configuration data and of votes. These are + not encrypted. + + If your MongoDB deployment uses SSL, then all communications between + arbiters and the other members of the replica set are secure. See the + documentation :doc:`/tutorial/configure-ssl` for more + information. As with all MongoDB components, run arbiters on secure + networks. + +To add an arbiter to the replica set, see +:doc:`/tutorial/add-replica-set-arbiter`. + +.. index:: replica set members; non-voting +.. _replica-set-non-voting-configuration: +.. _replica-set-non-voting-members: + +Non-Voting Members +~~~~~~~~~~~~~~~~~~ + +These members do not vote in elections. Non-voting members are only +used for larger sets with more than 12 members. To configure a member +as non-voting, see +:doc:`/tutorial/configure-a-non-voting-replica-set-member`. + +.. index:: pair: replica set; failover +.. _replica-set-failover-administration: .. _replica-set-failover: +.. _failover: -Failover --------- +Failover and Recovery +--------------------- Replica sets feature automated failover. If the :term:`primary` goes offline or becomes unresponsive and a majority of the original set members can still connect to each other, the set will elect a new primary. -For a detailed explanation of failover, see the -:ref:`replica-set-failover-administration` section in the -:doc:`/administration/replica-sets` document. +While :term:`failover` is automatic, :term:`replica set` +administrators should still understand exactly how this process +works. This section below describe failover in detail. + +In most cases, failover occurs without administrator intervention +seconds after the :term:`primary` either steps down, becomes inaccessible, +or becomes otherwise ineligible to act as primary. If your MongoDB deployment +does not failover according to expectations, consider the following +operational errors: + +- No remaining member is able to form a majority. This can happen as a + result of network partitions that render some members + inaccessible. Design your deployment to ensure that a majority of + set members can elect a primary in the same facility as core + application systems. + +- No member is eligible to become primary. Members must have a + :data:`~local.system.replset.settings.gmembers[n].priority` setting greater than ``0``, have a state + that is less than ten seconds behind the last operation to the + :term:`replica set`, and generally be *more* up to date than the + voting members. + +In many senses, :ref:`rollbacks ` represent a +graceful recovery from an impossible failover and recovery situation. + +Rollbacks occur when a primary accepts writes that other members of the +set do not successfully replicate before the primary steps down. When +the former primary begins replicating again it performs a "rollback." +Rollbacks remove those operations from the instance that were never +replicated to the set so that the data set is in a consistent state. +The :program:`mongod` program writes rolled back data to a :term:`BSON` +file that you can view using :program:`bsondump`, applied manually +using :program:`mongorestore`. + +You can prevent rollbacks using a :ref:`replica acknowledged +` write concern. These write +operations require not only the :term:`primary` to acknowledge the +write operation, sometimes even the majority of the set to confirm the +write operation before returning. + +.. include:: /includes/seealso-elections.rst .. index:: replica set; elections .. index:: failover; elections @@ -117,19 +245,28 @@ remain a secondary. .. note:: - When the current :term:`primary` steps down and triggers an - election, the :program:`mongod` instances will close all client - connections. This ensures that the clients maintain an accurate - view of the :term:`replica set` and helps prevent :term:`rollbacks - `. + - When the current :term:`primary` steps down and triggers an + election, the :program:`mongod` instances will close all client + connections. This ensures that the clients maintain an accurate + view of the :term:`replica set` and helps prevent :term:`rollbacks + `. -For more information on elections and failover, see: + - Members on either side of a network partition cannot see each + other when determining whether a majority is available to hold an + election. -- The :ref:`replica-set-failover-administration` section in the - :doc:`/administration/replica-sets` document. + That means that if a primary steps down and neither side of the + partition has a majority on its own, the set will not elect a new + primary and the set will become read only. To avoid this + situation, attempt to place a majority of instances in one data + center with a minority of instances in a secondary facility. + +For more information on elections and failover, see the +:ref:`replica-set-failover-administration` section in the +:doc:`/tutorial/troubleshoot-replica-sets` document. -- The :ref:`replica-set-election-internals` section in the - :doc:`/core/replication-internals` document +.. index:: replica set; network partitions +.. index:: replica set; elections .. index:: replica set; priority .. _replica-set-node-priority: @@ -152,8 +289,7 @@ have a single vote in elections. members. For more information on member priorities, see the -:ref:`replica-set-node-priority-configuration` section in the -:doc:`/administration/replica-sets` document. +:doc:`/tutorial/adjust-replica-set-member-priority` document. .. index:: pair: replica set; consistency .. _replica-set-consistency: @@ -287,7 +423,8 @@ working with replica sets: :term:`Read preference` and :term:`write concern` have particular :ref:`consistency ` implications. -For a more detailed discussion of application concerns, see :doc:`/applications/replication`. +For a more detailed discussion of application concerns, see +:doc:`/applications/replication`. Administration and Operations ----------------------------- @@ -298,9 +435,18 @@ administrators of :term:`replica set` deployments. For more information on replica set administration, operations, and architecture, see: -- :doc:`/administration/replica-sets` - -- :doc:`/administration/replica-set-architectures` +- :doc:`/tutorial/deploy-replica-set` +- :doc:`/tutorial/expand-replica-set` +- :doc:`/tutorial/remove-replica-set-member` +- :doc:`/tutorial/replace-replica-set-member` +- :doc:`/tutorial/adjust-replica-set-member-priority` +- :doc:`/tutorial/resync-replica-set-member` +- :doc:`/tutorial/configure-replica-set-secondary-sync-target` +- :doc:`/tutorial/configure-a-delayed-replica-set-member` +- :doc:`/tutorial/configure-a-hidden-replica-set-member` +- :doc:`/tutorial/configure-a-non-voting-replica-set-member` +- :doc:`/tutorial/configure-secondary-only-replica-set-member` +- :doc:`/core/replica-set-architectures` .. index:: replica set; oplog .. _replica-set-oplog-sizing: @@ -440,6 +586,8 @@ the existing members. .. index:: pair: replica set; security +.. _replica-set-security: + Security ~~~~~~~~ @@ -455,8 +603,55 @@ that: infrastructure, ensure that you configure a :setting:`keyFile` on all members to permit authentication. -For more information, see the :ref:`replica-set-security` section in the -:doc:`/administration/replica-sets` document. +For most instances, the most effective ways to control access and to +secure the connection between members of a :term:`replica set` depend +on network-level access control. Use your environment's firewall and +network routing to ensure that traffic *only* from clients and other +replica set members can reach your :program:`mongod` instances. If +needed, use virtual private networks (VPNs) to ensure secure +connections over wide area networks (WANs.) + +Additionally, MongoDB provides an authentication mechanism for +:program:`mongod` and :program:`mongos` instances connecting to +replica sets. These instances enable authentication but specify a +shared key file that serves as a shared password. + +.. versionadded:: 1.8 + Added support authentication in replica set deployments. + +.. versionchanged:: 1.9.1 + Added support authentication in sharded replica set deployments. + +To enable authentication add the following option to your configuration file: + +.. code-block:: cfg + + keyFile = /srv/mongodb/keyfile + +.. note:: + + You may chose to set these run-time configuration options using the + :option:`--keyFile ` (or :option:`mongos --keyFile`) + options on the command line. + +Setting :setting:`keyFile` enables authentication and specifies a key +file for the replica set members to use when authenticating to each +other. The content of the key file is arbitrary but must be the same +on all members of the replica set and on all :program:`mongos` +instances that connect to the set. + +The key file must be less one kilobyte in size and may only contain +characters in the base64 set. The key file must not have group or "world" +permissions on UNIX systems. Use the following command to use the +OpenSSL package to generate "random" content for use in a key file: + +.. code-block:: bash + + openssl rand -base64 753 + +.. note:: + + Key file permissions are not checked on Windows systems. .. _replica-set-deployment-overview: .. _replica-set-architecture: @@ -507,4 +702,4 @@ your replica set: successfully. For more information regarding replica set configuration and -deployments see :doc:`/administration/replica-set-architectures`. +deployments see :doc:`/core/replica-set-architectures`. diff --git a/source/administration/security.txt b/source/core/security.txt similarity index 99% rename from source/administration/security.txt rename to source/core/security.txt index 634f8eb81dd..163c8a88b85 100644 --- a/source/administration/security.txt +++ b/source/core/security.txt @@ -77,7 +77,7 @@ Vulnerability Notification seriously. If you discover a vulnerability in MongoDB or another 10gen product, or would like to know more about our vulnerability reporting and response process, see the -:doc:`/administration/vulnerability-notification` document. +:doc:`/tutorial/create-a-vulnerability-report` document. Networking Risk Exposure ------------------------ diff --git a/source/core/server-side-javascript.txt b/source/core/server-side-javascript.txt new file mode 100644 index 00000000000..8ac9d14cd4e --- /dev/null +++ b/source/core/server-side-javascript.txt @@ -0,0 +1,55 @@ +====================== +Server-side JavaScript +====================== + +.. default-domain:: mongodb + +.. versionchanged:: 2.4 + The V8 JavaScript engine, which became the default in 2.4, allows + multiple JavaScript operations to execute at the same time. Prior to + 2.4, MongoDB operations that required the JavaScript interpreter had + to acquire a lock, and a single :program:`mongod` could only run a + single JavaScript operation at a time. + +.. _server-side-javascript: + +Overview +-------- + +MongoDB supports server-side execution of JavaScript code within the +database process: + +- :dbcommand:`mapReduce` and the corresponding :program:`mongo` shell + method :method:`db.collection.mapReduce()`. See + :doc:`/core/map-reduce` for more information. + +- :dbcommand:`eval` command, and the corresponding :program:`mongo` + shell method :method:`db.eval()` + +- :operator:`$where` operator + +- :ref:`running-js-scripts-in-mongo-on-mongod-host` + +.. seealso:: + + :doc:`/tutorial/store-javascript-function-on-server` + +.. include:: /includes/fact-disable-javascript-with-noscript.rst + +.. _running-js-scripts-in-mongo-on-mongod-host: + +Running ``.js`` files via a ``mongo`` shell Instance on the Server +------------------------------------------------------------------ + +You can run a JavaScript (``.js``) file using a :program:`mongo` shell +instance on the server. This is a good technique for performing batch +administrative work. When you run :program:`mongo` shell on the server, +connecting via the localhost interface, the connection is fast with low +latency. + +Concurrency +----------- + +Refer to the individual method or operator documentation for any +concurrency information. See also the :ref:`concurrency table +`. diff --git a/source/administration/sharded-cluster-architectures.txt b/source/core/sharded-cluster-architectures.txt similarity index 66% rename from source/administration/sharded-cluster-architectures.txt rename to source/core/sharded-cluster-architectures.txt index 7994cd95c8b..ba285de8f43 100644 --- a/source/administration/sharded-cluster-architectures.txt +++ b/source/core/sharded-cluster-architectures.txt @@ -8,8 +8,91 @@ Sharded Cluster Architectures .. default-domain:: mongodb -This document describes the organization and design of :term:`sharded -cluster` deployments. +This document describes requirements, organization and design of +:term:`sharded cluster` deployments. + +.. _sharding-requirements-infrastructure: + +Infrastructure Requirements for Sharded Clusters +------------------------------------------------ + +A :term:`sharded cluster` has the following components: + +- Three :term:`config servers `. + + These special :program:`mongod` instances store the metadata for the + cluster. The :program:`mongos` instances cache this data and use it + to determine which :term:`shard` is responsible for which + :term:`chunk`. + + For development and testing purposes you may deploy a cluster with a single + configuration server process, but always use exactly three config + servers for redundancy and safety in production. + +- Two or more shards. Each shard consists of one or more :program:`mongod` + instances that store the data for the shard. + + These "normal" :program:`mongod` instances hold all of the + actual data for the cluster. + + Typically each shard is a :term:`replica sets `. Each + replica set consists of multiple :program:`mongod` instances. The members + of the replica set provide redundancy and high available for the data in each shard. + + .. warning:: + + MongoDB enables data :term:`partitioning `, or + sharding, on a *per collection* basis. You *must* access all data + in a sharded cluster via the :program:`mongos` instances as below. + If you connect directly to a :program:`mongod` in a sharded cluster + you will see its fraction of the cluster's data. The data on any + given shard may be somewhat random: MongoDB provides no guarantee + that any two contiguous chunks will reside on a single shard. + +- One or more :program:`mongos` instances. + + These instance direct queries from the application layer to the + shards that hold the data. The :program:`mongos` instances have no + persistent state or data files and only cache metadata in RAM from + the config servers. + + .. note:: + + In most situations :program:`mongos` instances use minimal + resources, and you can run them on your application servers + without impacting application performance. However, if you use + the :term:`aggregation framework` some processing may occur on + the :program:`mongos` instances, causing that :program:`mongos` + to require more system resources. + +.. _sharding-requirements-data: + +Data Quantity Requirements for Sharded Clusters +----------------------------------------------- + +Your cluster must manage a significant quantity of data for sharding +to have an effect on your collection. The default :term:`chunk` size +is 64 megabytes, and the :ref:`balancer +` will not begin moving data until the imbalance +of chunks in the cluster exceeds the :ref:`migration threshold +`. + +Practically, this means that unless your cluster has many hundreds of +megabytes of data, chunks will remain on a single shard. + +While there are some exceptional situations where you may need to +shard a small collection of data, most of the time the additional +complexity added by sharding the small collection is not worth the additional +complexity and overhead unless +you need additional concurrency or capacity for some reason. If you +have a small data set, usually a properly configured +single MongoDB instance or replica set will be more than sufficient +for your persistence layer needs. + +:term:`Chunk ` size is :option:`user configurable `. +However, the default value is of 64 megabytes is ideal +for most deployments. See the :ref:`sharding-chunk-size` section in the +:doc:`/core/sharded-cluster-internals` document for more information. .. index:: sharding; localhost .. _sharding-localhost: @@ -177,5 +260,3 @@ potential failure scenarios and available resolutions: while it still has current oplog entries, it can catch up to the latest state of the set using the normal :term:`replication process `, otherwise it must perform an :term:`initial sync`. - - diff --git a/source/core/sharded-cluster-internals.txt b/source/core/sharded-cluster-internals.txt index cb16788ac0b..5f51ee63b81 100644 --- a/source/core/sharded-cluster-internals.txt +++ b/source/core/sharded-cluster-internals.txt @@ -1,9 +1,9 @@ .. index:: internals; sharding .. _sharding-internals: -======================================= -Sharded Cluster Internals and Behaviors -======================================= +========================= +Sharded Cluster Internals +========================= .. default-domain:: mongodb @@ -574,12 +574,23 @@ a document with a ``msg`` field that holds the string If the application is instead connected to a :program:`mongod`, the returned document does not include the ``isdbgrid`` string. +Sharded Cluster Metadata +------------------------ + +Sharded cluster metadata is contained in the +:doc:`/reference/config-database` and comprises information about the +:term:`sharded cluster's ` partitioned data sets. The +config database stores the relationship between :term:`chunks ` +and where they reside within a :term:`sharded cluster`. Without a +config database, the :program:`mongos` instances would be unable to +route queries or write operations within the cluster. + .. index:: config database .. index:: database, config .. _sharding-internals-config-database: Config Database ---------------- +~~~~~~~~~~~~~~~ The ``config`` database contains information about your sharding configuration and stores the information in a set of collections @@ -613,48 +624,78 @@ collections: See :doc:`/reference/config-database` for full documentation of these collections and their role in sharded clusters. -Sharding GridFS Stores ----------------------- +.. index:: sharding; config servers +.. index:: config servers +.. _sharding-config-server: +.. _sharded-cluster-config-server: + +Config Servers +~~~~~~~~~~~~~~ + +Config servers are special :program:`mongod` instances that maintain the +sharded cluster metadata in the config database. A sharded cluster +operates with a group of *three* config servers that use a two-phase +commit process that ensures immediate consistency and reliability. +Config servers *do not* run as replica sets. -When sharding a :term:`GridFS` store, consider the following: +For testing purposes you may deploy a cluster with a single +config server, but this is not recommended for production. -- Most deployments will not need to shard the ``files`` - collection. The ``files`` collection is typically small, and only - contains metadata. None of the required keys for GridFS lend - themselves to an even distribution in a sharded situation. If you - *must* shard the ``files`` collection, use the ``_id`` field - possibly in combination with an application field +All config servers must be available on initial setup +of a sharded cluster. Each :program:`mongos` instance must be able +to write to the ``config.version`` collection. - Leaving ``files`` unsharded means that all the file metadata - documents live on one shard. For production GridFS stores you *must* - store the ``files`` collection on a replica set. +.. warning:: + + If your cluster has a single config server, this + :program:`mongod` is a single point of failure. If the instance is + inaccessible the cluster is not accessible. If you cannot recover + the data on a config server, the cluster will be inoperable. + + **Always** use three config servers for production deployments. -- To shard the ``chunks`` collection by ``{ files_id : 1 , n : 1 }``, - issue commands similar to the following: +Read and Write Operations on Config Servers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - .. code-block:: javascript +The load on configuration servers is small because each +:program:`mongos` instance maintains a cached copy of the configuration +database. MongoDB only writes data to the config server to: - db.fs.chunks.ensureIndex( { files_id : 1 , n : 1 } ) +- create splits in existing chunks, which happens as data in + existing chunks exceeds the maximum chunk size. - db.runCommand( { shardCollection : "test.fs.chunks" , key : { files_id : 1 , n : 1 } } ) +- migrate a chunk between shards. - You may also want shard using just the ``file_id`` field, as in the - following operation: +If one or two configuration instances become unavailable, the +cluster's metadata becomes *read only*. It is still possible to read +and write data from the shards, but no chunk migrations or splits will +occur until all three servers are accessible. At the same time, config +server data is only read in the following situations: - .. code-block:: javascript +- A new :program:`mongos` starts for the first time, or an existing + :program:`mongos` restarts. - db.runCommand( { shardCollection : "test.fs.chunks" , key : { files_id : 1 } } ) +- After a chunk migration, the :program:`mongos` instances update + themselves with the new cluster metadata. - .. note:: +If all three config servers are inaccessible, you can continue to use +the cluster as long as you don't restart the :program:`mongos` +instances until after config servers are accessible again. If you +restart the :program:`mongos` instances and there are no accessible +config servers, the :program:`mongos` would be unable to direct +queries or write operations to the cluster. - .. versionchanged:: 2.2 +Because the configuration data is small relative to the amount of data +stored in a cluster, the amount of activity is relatively low, and 100% +up time is not required for a functioning sharded cluster. As a result, +backing up the config servers is not difficult. Backups of config +servers are critical as clusters become totally inoperable when +you lose all configuration instances and data. Precautions to ensure +that the config servers remain available and intact are critical. + +.. note:: - Before 2.2, you had to create an additional index on ``files_id`` - to shard using *only* this field. + Configuration servers store metadata for a single sharded cluster. + You must have a separate configuration server or servers for each + cluster you administer. - The default ``files_id`` value is an :term:`ObjectId`, as a result - the values of ``files_id`` are always ascending, and applications - will insert all new GridFS data to a single chunk and shard. If - your write load is too high for a single server to handle, consider - a different shard key or use a different value for different value - for ``_id`` in the ``files`` collection. diff --git a/source/core/sharded-cluster-query-routing.txt b/source/core/sharded-cluster-query-routing.txt new file mode 100644 index 00000000000..b073e15eab8 --- /dev/null +++ b/source/core/sharded-cluster-query-routing.txt @@ -0,0 +1,171 @@ +.. index:: mongos +.. _sharded-cluster-query-routing: +.. _sharding-read-operations: + +================================= +Query Routing in Sharded Clusters +================================= + +.. default-domain:: mongodb + +MongoDB provides the :program:`mongos` program to handle query routing +in a sharded cluster. + +.. _sharding-mongos: +.. _sharding-query-routing: + +``mongos`` Operational Overview +------------------------------- + +The :program:`mongos` program provides a single unified interface to a +sharded cluster. An application does not access shards (i.e. the +:program:`mongod` instances) directly but instead accesses the +:program:`mongos`. The :program:`mongos` routes queries and returns +results. + +The :program:`mongos` keeps track of what data is on which shard by +caching that information as metadata on the :ref:`config servers +`. The :program:`mongos` uses the metadata to +route operations from applications and clients to the :program:`mongod` +instances. A :program:`mongos` has no *persistent* state and consume +minimal system resources. + +The most common practice is to run :program:`mongos` instances on the +same systems as your application servers, but you can maintain +:program:`mongos` instances on the shards or on other dedicated +resources. + +.. note:: + + .. versionchanged:: 2.1 + + Some aggregation operations using the :dbcommand:`aggregate` + command (i.e. :method:`db.collection.aggregate()`,) will cause + :program:`mongos` instances to require more CPU resources than in + previous versions. This modified performance profile may dictate + alternate architecture decisions if you use the :term:`aggregation + framework` extensively in a sharded environment. + +Broadcast Operations vs Targeted Operations +------------------------------------------- + +In general, operations in a sharded environment are either: + +- Broadcast to all shards in the cluster that hold documents in a + collection + +- Targeted at a single shard or a limited group of shards, based on + the shard key + +For best performance, use targeted operations whenever possible. While +some operations must broadcast to all shards, you can ensure MongoDB +uses targeted operations whenever possible by always including the shard +key. + +Broadcast Operations +~~~~~~~~~~~~~~~~~~~~ + +A query operation is broadcast to all shards [#namespace-exception]_ +**unless** the :program:`mongos` can determine which shard or shard +stores this data. + +Multi-update operations are always broadcast operations. + +The :method:`remove() ` operation is always a +broadcast operation, unless the operation specifies the shard key in +full. + +.. [#namespace-exception] If a shard does not store chunks from a + given collection, queries for documents in that collection are not + broadcast to that shard. + +Targeted Operations +~~~~~~~~~~~~~~~~~~~ + +All :method:`insert() ` operations target to one +shard. + +All single :method:`update() ` operations target +to one shard. This includes :term:`upsert` operations. + +For queries that include the shard key, :program:`mongos` can target the +query at a specific shard or set of shards. This is the case only if the +portion of the shard key included in the query is a *prefix* of the +shard key. For example, if the shard key is: + +.. code-block:: javascript + + { a: 1, b: 1, c: 1 } + +The :program:`mongos` program *can* route queries that include the full +shard key or either of the following shard key prefixes at a +specific shard or set of shards: + +.. code-block:: javascript + + { a: 1 } + { a: 1, b: 1 } + +Depending on the distribution of data in the cluster and the +selectivity of the query, :program:`mongos` may still have to +contact multiple shards [#possible-all]_ to fulfill these queries. + +.. [#possible-all] :program:`mongos` will route some queries, even + some that include the shard key, to all shards, if needed. + +:program:`mongos` Query Routing +------------------------------- + +A :program:`mongos` instance uses the following process to route a query +to a :term:`cluster `. The :program:`mongos`: + +1. Determines the list of :term:`shards ` that must receive the + query. + +#. Establishes a cursor on all targeted shards. + +The following topics describe the process in more detail. + +:program:`mongos` Determines which Shards Receive a Query +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In some cases, when the :term:`shard key` or a prefix of the shard +key is a part of the query, the :program:`mongos` can route the +query to a subset of the shards. Otherwise, the :program:`mongos` +must direct the query to *all* shards that hold documents for that +collection. + +.. example:: + + Given the following shard key: + + .. code-block:: javascript + + { zipcode: 1, u_id: 1, c_date: 1 } + + Depending on the distribution of chunks in the cluster, the + :program:`mongos` may be able to target the query at a subset of + shards, if the query contains the following fields: + + .. code-block:: javascript + + { zipcode: 1 } + { zipcode: 1, u_id: 1 } + { zipcode: 1, u_id: 1, c_date: 1 } + +:program:`mongos` Establishes a Cursor on Targeted Shards +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When the first batch of results returns from the cursors: + +For query with sorted results (i.e. using +:method:`cursor.sort()`) the :program:`mongos` instance performs a merge +sort of all queries. + +For a query with unsorted results, the :program:`mongos` instance returns +a result cursor that "round robins" results from all cursors on +the shards. + +.. versionchanged:: 2.0.5 + Before 2.0.5, the :program:`mongos` exhausted each cursor, + one by one. diff --git a/source/core/sharded-cluster-security.txt b/source/core/sharded-cluster-security.txt new file mode 100644 index 00000000000..c1e04176742 --- /dev/null +++ b/source/core/sharded-cluster-security.txt @@ -0,0 +1,72 @@ +.. _sharding-security: + +======================================= +Security Practices for Sharded Clusters +======================================= + +.. default-domain:: mongodb + +MongoDB controls access to :term:`sharded clusters ` +with key files that store authentication credentials. The components +of sharded clusters use the secret stored in the key files when +authenticating to each other. Create key files and then point your +:program:`mongos` and :program:`mongod` instances to the files, as +described later in this section. + +Beyond the :setting:`auth` mechanisms described in this section, +always run your sharded clusters in trusted networking environments +that limit access to the cluster with network rules. Your networking +environments should enforce restrictions that ensure only known +traffic reaches your :program:`mongos` and :program:`mongod` +instances. + +This section describes authentication specific to sharded +clusters. For information on authentication across MongoDB, see +:ref:`security-authentication`. + +Access Control Privileges in Sharded Clusters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In sharded clusters, MongoDB provides separate administrative +privileges for the sharded cluster and for each shard. Beyond these +administration privileges, privileges for sharded cluster deployments +are functionally the same as any other MongoDB deployment. See, +:ref:`security-authentication` for more information. + +For sharded clusters, MongoDB provides these separate administrative +privileges: + +- Administrative privileges for the sharded cluster. These privileges + provide read-and-write access to the config servers' ':term:`admin + `. These users can run all administrative commands. + Administrative privileges also give the user read-and-write access + to all the cluster's databases. + + The credentials for administrative privileges on the cluster reside on + the config servers. To receive admin access to the cluster, you must + authenticate a session while connected to a :program:`mongos` instance + using the admin database. + +- Administrative privileges for the :program:`mongod` instance, or + :term:`replica set`, that provides each individual shard. Each shard + has its own admin database that stores administrative credentials + and access for that shard only. These credentials are *completely* + distinct from the cluster-wide administrative credentials. + +For more information on privileges, see :ref:`security-authentication`. + +Access a Sharded Cluster with Authentication +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To access a sharded cluster as an authenticated user, use the +appropriate authentication options in :program:`mongo`. + +To access a sharded cluster as an authenticated, non-admin user, see +either of the following: + +- :dbcommand:`authenticate` + +- :method:`db.auth()` + +To terminate an authenticated session, see the :dbcommand:`logout` +command. diff --git a/source/core/sharded-clusters.txt b/source/core/sharded-clusters.txt index 927f0f25219..fe312bf961e 100644 --- a/source/core/sharded-clusters.txt +++ b/source/core/sharded-clusters.txt @@ -18,19 +18,47 @@ provides additional write capacity by distributing the write load over a number of :program:`mongod` instances. Sharding allows users to increase the potential amount of data in the :term:`working set`. -.. index:: shard key - single: sharding; shard key +.. index:: sharding; shards +.. index:: shards +.. _sharding-shards: -How Sharding Works ------------------- +Sharding +-------- -To run sharding, you set up a sharded cluster. For a description of -sharded clusters, see :doc:`/administration/sharded-clusters`. +Sharding occurs within a :term:`sharded cluster`. A sharded cluster +consists of the following components: + +- **Shards**. A shard is a container that holds a + subset of a collection’s data. Each shard is either a single + :program:`mongod` instance or a :term:`replica set`. In production, + all shards should be replica sets. + + Applications do not access shards directly but instead access + :ref:`mongos instances `. + +- **Config servers**. Each :ref:`config server ` + is a :program:`mongod` instance that holds metadata about the cluster. + The metadata maps :term:`chunks ` to shards. + +- **mongos instances**. The :ref:`mongos instances ` + route the reads and writes from applications to the shards. + Applications do not access the shards directly. Within a sharded cluster, you enable sharding on a per-database basis. +When you enable sharding on a database, MongoDB distributes the collections +across the shards. MongoDB does not yet distribute data within a collection. + After enabling sharding for a database, you choose which collections to shard. For each sharded collection, you specify a :term:`shard key`. +To set up a sharded cluster, see :ref:`sharding-procedure-setup`. + +.. _sharding-shard-key: +.. _shard-key: + +Shard Keys +---------- + The shard key determines the distribution of the collection's :term:`documents ` among the cluster's :term:`shards `. The shard key is a :term:`field` that exists in every document in the @@ -45,45 +73,7 @@ shard's range. When a chunk grows beyond the :ref:`chunk size `, MongoDB :term:`splits ` the chunk into smaller chunks, always based on ranges in the shard key. -.. _sharding-shard-key-selection: -.. _sharding-shard-key: -.. _shard-key: - -Shard Key Selection -------------------- - -Choosing the correct shard key can have a great impact on the -performance, capability, and functioning of your database and cluster. -Appropriate shard key choice depends on the schema of your data and the -way that your application queries and writes data to the database. - -The ideal shard key: - -- is easily divisible which makes it easy for MongoDB to distribute - content among the shards. Shard keys that have a limited number of - possible values are not ideal as they can result in some chunks that - are "unsplitable." See the :ref:`sharding-shard-key-cardinality` - section for more information. - -- will distribute write operations among the cluster, to prevent any - single shard from becoming a bottleneck. Shard keys that have a high - correlation with insert time are poor choices for this reason; - however, shard keys that have higher "randomness" satisfy this - requirement better. See the :ref:`sharding-shard-key-write-scaling` - section for additional background. - -- will make it possible for the :program:`mongos` to return most query - operations directly from a single *specific* :program:`mongod` - instance. Your shard key should be the primary field used by your - queries, and fields with a high degree of "randomness" are poor - choices for this reason. See the :ref:`sharding-shard-key-query-isolation` - section for specific examples. - -The challenge when selecting a shard key is that there is not always -an obvious choice. Often, an existing field in your collection may not be -the optimal key. In those situations, computing a special purpose -shard key into an additional field or using a compound shard key may -help produce one that is more ideal. +To select a shard key, see :doc:`/tutorial/select-shard-key`. .. _sharding-hashed-sharding: @@ -96,18 +86,6 @@ Hashed Sharding :ref:`hashed index ` of a single field as the :term:`shard key` to partition data across your sharded cluster. -.. example:: - - To shard a collection using a hashed shard key, issue an operation in - the :program:`mongo` shell that resembles the following: - - .. code-block:: javascript - - sh.shardCollection( "records.active", { a: "hashed" } ) - - This operation shards the ``active`` collection in the ``records`` - database, using a hash of the ``a`` field as the shard key. - The field you choose as your hashed shard key should have a good cardinality, or large number of different values. Hashed keys work well with fields that increase monotonically like :term:`ObjectId` @@ -119,16 +97,8 @@ that each shard has two chunks. You can control how many chunks MongoDB will create with the ``numInitialChunks`` parameter to :dbcommand:`shardCollection`. -See :ref:`index-hashed-index` for limitations on hashed indexes. - -.. include:: /includes/warning-hashed-index-floating-point.rst - -.. warning:: - - Hashed shard keys are only supported by the MongoDB 2.4 and greater - versions of the :program:`mongos` program. After sharding a - collection with a hashed shard key, you must use the MongoDB 2.4 or - greater :program:`mongos` instances in your sharded cluster. +To shard a collection using a hashed shark key, see +:doc:`/tutorial/shard-collection-with-a-hashed-shard-key`. .. index:: balancing .. _sharding-balancing: @@ -156,8 +126,9 @@ You may disable the balancer on a temporary basis for maintenance and limit the window during which it runs to prevent the balancing process from impacting production traffic. -.. seealso:: :doc:`/tutorial/manage-sharded-cluster-balancer` and - :doc:`/core/sharded-cluster-internals`. +To disable the balancer, see :ref:`sharding-balancing-disable-temporally`. + +.. seealso:: :doc:`/tutorial/manage-sharded-cluster-balancer`. .. note:: @@ -170,13 +141,12 @@ When to Use Sharding -------------------- While sharding is a powerful and compelling feature, it comes with -significant :ref:`sharding-requirements-infrastructure` +significant infrastructure requirements and some limited complexity costs. As a result, use -sharding only as necessary, and when indicated by actual operational -requirements. Consider the following overview of indications it may be -time to consider sharding. +sharding only as necessary and when indicated by actual operational +requirements. -You should consider deploying a :term:`sharded cluster`, if: +You should consider deploying a :term:`sharded cluster` if: - your data set approaches or exceeds the storage capacity of a single node in your system. @@ -207,85 +177,8 @@ the corresponding shard keys. in the future, **do not** wait until your system is overcapacity to enable sharding. -.. _sharding-requirements-infrastructure: - -Infrastructure Requirements for Sharded Clusters ------------------------------------------------- - -A :term:`sharded cluster` has the following components: - -- Three :term:`config servers `. - - These special :program:`mongod` instances store the metadata for the - cluster. The :program:`mongos` instances cache this data and use it - to determine which :term:`shard` is responsible for which - :term:`chunk`. - - For development and testing purposes you may deploy a cluster with a single - configuration server process, but always use exactly three config - servers for redundancy and safety in production. - -- Two or more shards. Each shard consists of one or more :program:`mongod` - instances that store the data for the shard. - - These "normal" :program:`mongod` instances hold all of the - actual data for the cluster. - - Typically each shard is a :term:`replica sets `. Each - replica set consists of multiple :program:`mongod` instances. The members - of the replica set provide redundancy and high available for the data in each shard. - - .. warning:: - - MongoDB enables data :term:`partitioning `, or - sharding, on a *per collection* basis. You *must* access all data - in a sharded cluster via the :program:`mongos` instances as below. - If you connect directly to a :program:`mongod` in a sharded cluster - you will see its fraction of the cluster's data. The data on any - given shard may be somewhat random: MongoDB provides no guarantee - that any two contiguous chunks will reside on a single shard. - -- One or more :program:`mongos` instances. - - These instance direct queries from the application layer to the - shards that hold the data. The :program:`mongos` instances have no - persistent state or data files and only cache metadata in RAM from - the config servers. - - .. note:: - - In most situations :program:`mongos` instances use minimal - resources, and you can run them on your application servers - without impacting application performance. However, if you use - the :term:`aggregation framework` some processing may occur on - the :program:`mongos` instances, causing that :program:`mongos` - to require more system resources. - -.. _sharding-requirements-data: - -Data Quantity Requirements for Sharded Clusters ------------------------------------------------ - -Your cluster must manage a significant quantity of data for sharding -to have an effect on your collection. The default :term:`chunk` size -is 64 megabytes, and the :ref:`balancer -` will not begin moving data until the imbalance -of chunks in the cluster exceeds the :ref:`migration threshold -`. - -Practically, this means that unless your cluster has many hundreds of -megabytes of data, chunks will remain on a single shard. +For information on requirements, see the following: -While there are some exceptional situations where you may need to -shard a small collection of data, most of the time the additional -complexity added by sharding the small collection is not worth the additional -complexity and overhead unless -you need additional concurrency or capacity for some reason. If you -have a small data set, usually a properly configured -single MongoDB instance or replica set will be more than sufficient -for your persistence layer needs. +- :ref:`sharding-requirements-infrastructure` -:term:`Chunk ` size is :option:`user configurable `. -However, the default value is of 64 megabytes is ideal -for most deployments. See the :ref:`sharding-chunk-size` section in the -:doc:`/core/sharded-cluster-internals` document for more information. +- :ref:`sharding-requirements-data` diff --git a/source/core/shell-types.txt b/source/core/shell-types.txt index c7c3bbacbd4..0770a6e23e6 100644 --- a/source/core/shell-types.txt +++ b/source/core/shell-types.txt @@ -133,7 +133,7 @@ the following operation in the :program:`mongo` shell: new ObjectId -.. see:: :doc:`/core/object-id` for full documentation of ObjectIds in +.. see:: :doc:`/reference/object-id` for full documentation of ObjectIds in MongoDB. .. _shell-type-long: diff --git a/source/core/tag-aware-sharding.txt b/source/core/tag-aware-sharding.txt new file mode 100644 index 00000000000..cd0a3c52626 --- /dev/null +++ b/source/core/tag-aware-sharding.txt @@ -0,0 +1,83 @@ +.. _tag-aware-sharding: + +================== +Tag Aware Sharding +================== + +.. default-domain:: mongodb + +For sharded clusters, MongoDB makes it possible to associate specific +ranges of a :term:`shard key` with a specific :term:`shard` or subset +of shards. This association dictates the policy of the cluster +balancer process as it balances the :term:`chunks ` around the +cluster. This capability enables the following deployment patterns: + +- isolating a specific subset of data on specific set of shards. + +- controlling the balancing policy so that in a geographically + distributed cluster the most relevant portions of the data set + reside on the shards with greatest proximity to the application + servers. + +This document describes the behavior, operation, and use of tag aware +sharding in MongoDB deployments. + +.. note:: + + Shard key range tags are entirely distinct from :ref:`replica set member + tags `. + +:term:`Hash-based sharding ` does not support +tag-aware sharding. + +Behavior and Operations +----------------------- + +Tags in a sharded cluster are pieces of metadata that dictate the +policy and behavior of the cluster :term:`balancer`. Using +tags, you may associate individual shards in a cluster with one or +more tags. Then, you can assign this tag string to a range +of :term:`shard key` values for a sharded collection. When migrating a +chunk, the balancer will select a destination shard based on the +configured tag ranges. + +The balancer migrates chunks in tagged ranges to shards with those +tags, if tagged shards are not balanced. [#specific-tagged-migrations]_ + +.. note:: + + Because a single chunk may span different tagged shard key ranges, + the balancer may migrate chunks to tagged shards that contain + values that exceed the upper bound of the selected tag range. + +.. example:: + + Given a sharded collection with two configured tag ranges, such + that: + + - :term:`Shard key` values between ``100`` and ``200`` have tags to + direct corresponding chunks to shards tagged ``NYC``. + + - Shard Key values between ``200`` and ``300`` have tags to direct + corresponding chunks to shards tagged ``SFO``. + + In this cluster, the balancer will migrate a chunk with shard key + values ranging between ``150`` and ``220`` to a shard tagged + ``NYC``, since ``150`` is closer to ``200`` than ``300``. + +After configuring tags on shards and ranges of the shard key, the +cluster may take some time to reach the proper distribution of data, +depending on the division of chunks (i.e. splits) and the current +distribution of data in the cluster. Once configured, the balancer +will respect tag ranges during future :ref:`balancing rounds +`. + +.. [#specific-tagged-migrations] To migrate chunks in a tagged + environment, the balancer selects a target shard with a tag range + that has an *upper* bound that is *greater than* the migrating + chunk's *lower* bound. If a shard with a matching tagged range + exists, the balancer will migrate the chunk to that shard. + +.. seealso:: + + :doc:`/tutorial/administer-shard-tags` diff --git a/source/core/text-indexes.txt b/source/core/text-indexes.txt deleted file mode 100644 index 11f17f54379..00000000000 --- a/source/core/text-indexes.txt +++ /dev/null @@ -1,188 +0,0 @@ -============== -``text`` Index -============== - -.. default-domain:: mongodb - -This document provides details on some of the options available when -creating ``text`` indexes. - -Specify a Name for the ``text`` Index -------------------------------------- - -The default name for the index consists of each indexed field name -concatenated with ``_text``. For example, the following command creates -a ``text`` index on the fields ``content``, ``users.comments``, and -``users.profiles``: - -.. code-block:: javascript - - db.collection.ensureIndex( - { - content: "text", - "users.comments": "text", - "users.profiles": "text" - } - ) - -The default name for the index is: - -.. code-block:: javascript - - "content_text_users.comments_text_users.profiles_text" - -To avoid creating an index with a name that exceeds the :limit:`index -name length limit `, you can pass the ``name`` -option to the :method:`db.collection.ensureIndex()` method: - -.. code-block:: javascript - - db.collection.ensureIndex( - { - content: "text", - "users.comments": "text", - "users.profiles": "text" - }, - { - name: "MyTextIndex" - } - ) - -.. note:: - - To drop the ``text`` index, use the index name. To get the name of - an index, use :method:`db.collection.getIndexes()`. - -Index All Fields ----------------- - -To allow for text search on all fields with string content, use the -wildcard specifier (``$**``) to index all fields that contain string -content. - -The following example indexes any string value in the data of every -field of every document in ``collection`` and names the index -``TextIndex``: - -.. code-block:: javascript - - db.collection.ensureIndex( - { "$**": "text" }, - { name: "TextIndex" } - ) - -.. _text-index-default-language: - -Specify Languages for Text Index --------------------------------- - -The default language associated with the indexed data determines the -list of stop words and the rules for the stemmer and tokenizer. The -default language for the indexed data is ``english``. - -To specify a different language, use the ``default_language`` option -when creating the ``text`` index. See :ref:`text-search-languages` for -the languages available for ``default_language``. - -The following example creates a ``text`` index on the -``content`` field and sets the ``default_language`` to -``spanish``: - -.. code-block:: javascript - - db.collection.ensureIndex( - { content : "text" }, - { default_language: "spanish" } - ) - -.. seealso:: - - :doc:`/tutorial/create-text-index-on-multi-language-collection` - -.. _text-index-internals-weights: - -Control Results of Text Search with Weights -------------------------------------------- - -By default, the :dbcommand:`text` command returns matching documents -based on scores, from highest to lowest. For a ``text`` index, the -*weight* of an indexed field denotes the significance of the field -relative to the other indexed fields in terms of the score. The score -for a given word in a document is derived from the weighted sum of the -frequency for each of the indexed fields in that document. - -The default weight is 1 for the indexed fields. To adjust the weights -for the indexed fields, include the ``weights`` option in the -:method:`db.collection.ensureIndex()` method. - -.. warning:: - - Choose the weights carefully in order to prevent the need to reindex. - -A collection ``blog`` has the following documents: - -.. code-block:: javascript - - { _id: 1, - content: "This morning I had a cup of coffee.", - about: "beverage", - keywords: [ "coffee" ] - } - - { _id: 2, - content: "Who doesn't like cake?", - about: "food", - keywords: [ "cake", "food", "dessert" ] - } - -To create a ``text`` index with different field weights for the -``content`` field and the ``keywords`` field, include the ``weights`` -option to the :method:`~db.collection.ensureIndex()` method. For -example, the following command creates an index on three fields and -assigns weights to two of the fields: - -.. code-block:: javascript - - db.blog.ensureIndex( - { - content: "text", - keywords: "text", - about: "text" - }, - { - weights: { - content: 10, - keywords: 5, - }, - name: "TextIndex" - } - ) - -The ``text`` index has the following fields and weights: - -- ``content`` has a weight of 10, - -- ``keywords`` has a weight of 5, and - -- ``about`` has the default weight of 1. - -These weights denote the relative significance of the indexed fields to -each other. For instance, a term match in the ``content`` field has: - -- ``2`` times (i.e. ``10:5``) the impact as a term match in the - ``keywords`` field and - -- ``10`` times (i.e. ``10:1``) the impact as a term match in the - ``about`` field. - -Tutorials ---------- - -The following tutorials offer additional ``text`` index creation -patterns: - -- :doc:`/tutorial/create-text-index-on-multi-language-collection` - -- :doc:`/tutorial/limit-number-of-items-scanned-for-text-search` - -- :doc:`/tutorial/return-text-queries-using-only-text-index` diff --git a/source/applications/text-search.txt b/source/core/text-search.txt similarity index 53% rename from source/applications/text-search.txt rename to source/core/text-search.txt index ab04dc81805..452a59325f3 100644 --- a/source/applications/text-search.txt +++ b/source/core/text-search.txt @@ -25,6 +25,45 @@ The text search process: By default, :dbcommand:`text` command returns at most the top 100 matching documents as determined by the scores. +.. important:: + + Before you can create a text index or :ref:`run the text command + `, you need to manually enable the text + search. See :doc:`/tutorial/enable-text-search` for information on + how to enable the text search feature. + +.. _text-search-storage-requirements: + +Storage Requirements and Performance Costs +------------------------------------------ + +``text`` indexes have the following storage requirements and +performance costs: + +- ``text`` indexes change the space allocation method for all future + record allocations in a collection to :collflag:`usePowerOf2Sizes`. + +- ``text`` indexes can be large. They contain one index entry for each + unique post-stemmed word in each indexed field for each document + inserted. + +- Building a ``text`` index is very similar to building a large + multi-key index and will take longer than building a simple ordered + (scalar) index on the same data. + +- When building a large ``text`` index on an existing collection, + ensure that you have a sufficiently high limit on open file + descriptors. See the :ref:`recommended settings `. + +- ``text`` indexes will impact insertion throughput because MongoDB + must add an index entry for each unique post-stemmed word in each + indexed field of each new source document. + +- Additionally, ``text`` indexes do not store phrases or information + about the proximity of words in the documents. As a result, phrase + queries will run much more effectively when the entire collection + fits in RAM. + .. _create-text-index: Create a ``text`` Index @@ -38,36 +77,24 @@ the string literal ``text``. .. important:: - - Before you can :ref:`create a text index ` or - :ref:`run the text command `, you need - to manually enable the text search. See - :doc:`/tutorial/enable-text-search` for information on how to - enable the text search feature. - - Text indexes have significant storage requirements and performance - costs. See :ref:`text index feature ` for more + costs. See :ref:`text-search-storage-requirements` for more information. - .. include:: /includes/fact-text-index-limit-one.rst -The following example creates a ``text`` index on the fields -``subject`` and ``content``: +The following tutorials offer additional ``text`` index creation +patterns: + +- :doc:`/tutorial/create-text-index-on-multiple-fields` -.. code-block:: javascript +- :doc:`/tutorial/specify-language-for-text-index` - db.collection.ensureIndex( - { - subject: "text", - content: "text" - } - ) +- :doc:`/tutorial/avoid-text-index-name-limit` -This ``text`` index catalogs all string data in the ``subject`` field -and the ``content`` field, where the field value is either a string or -an array of string elements. +- :doc:`/tutorial/create-text-index-on-multi-language-collection` -See :doc:`/core/text-indexes` for details on the options available when -creating ``text`` indexes. +- :doc:`/tutorial/control-results-of-text-search` Additionally, MongoDB permits :ref:`compound indexes ` that include ``text`` index fields in @@ -89,18 +116,6 @@ document field contains the word ``blueberry``, a search on the term ``blue`` will not match the document. However, a search on either ``blueberry`` or ``blueberries`` will match. -By default, the :dbcommand:`text` command returns the top 100 scoring -documents in descending order, but you can specify a ``limit`` option -to change the maximum number to return. - -Given a collection with a ``text`` index, use the -:method:`~db.runCommand()` method to execute the -:dbcommand:`text` command, as in: - -.. code-block:: javascript - - db.collection.runCommand( "text" , { search: } ) - For information and examples on various text search patterns, see :doc:`/tutorial/search-for-text`. diff --git a/source/applications/update.txt b/source/core/update.txt similarity index 98% rename from source/applications/update.txt rename to source/core/update.txt index 392c1cd13b3..c4b1b216cbc 100644 --- a/source/applications/update.txt +++ b/source/core/update.txt @@ -314,7 +314,7 @@ with the operations from the ```` argument applied. ) See also :ref:`Update Operations with the Upsert Flag -` in the :doc:`/applications/create` document. +` in the :doc:`/core/create` document. .. [#upsert-update-operators] If the ```` argument includes only field and value pairs, the new document contains the fields and @@ -366,7 +366,7 @@ replaces the document with the ```` argument: ) .. seealso:: :ref:`crud-create-insert-save` and - :ref:`crud-create-save` in the :doc:`/applications/create` section. + :ref:`crud-create-save` in the :doc:`/core/create` section. .. _crud-update-operators: diff --git a/source/core/write-concern.txt b/source/core/write-concern.txt new file mode 100644 index 00000000000..1381da2c805 --- /dev/null +++ b/source/core/write-concern.txt @@ -0,0 +1,216 @@ +============= +Write Concern +============= + +.. default-domain:: mongodb + +After the :doc:`driver write concern change +` all officially supported +MongoDB drivers enable write concern by default. + +.. _replica-set-write-concern: + +Write Concern for Replica Sets +------------------------------ + +MongoDB's built-in :term:`write concern` confirms the success of write +operations to a :term:`replica set's ` :term:`primary`. +Write concern uses the :dbcommand:`getLastError` command after write +operations to return an object with error information or confirmation +that there are no errors. + +From the perspective of a client application, whether a MongoDB +instance is running as a single server (i.e. "standalone") or a +:term:`replica set` is transparent. However, replica sets offer some +configuration options for write and read operations. [#sharded-clusters]_ + +.. [#sharded-clusters] :term:`Sharded clusters ` where the + shards are also replica sets provide the same configuration options + with regards to write and read operations. + +Verify Write Operations +~~~~~~~~~~~~~~~~~~~~~~~ + +The default write concern confirms write operations only on the +primary. You can configure write concern to confirm write operations +to additional replica set members as well by issuing the +:dbcommand:`getLastError` command with the ``w`` option. + +The ``w`` option confirms that write operations have replicated to the +specified number of replica set members, including the primary. You can +either specify a number or specify ``majority``, which ensures the write +propagates to a majority of set members. The following example ensures +the operation has replicated to two members (the primary and one other +member): + +.. code-block:: javascript + + db.runCommand( { getLastError: 1, w: 2 } ) + +The following example ensures the write operation has replicated to a +majority of the configured members of the set. + +.. code-block:: javascript + + db.runCommand( { getLastError: 1, w: "majority" } ) + +If you specify a ``w`` value greater than the number of members that +hold a copy of the data (i.e., greater than the number of +non-:term:`arbiter` members), the operation blocks until those members +become available. This can cause the operation to block forever. To +specify a timeout threshold for the :dbcommand:`getLastError` operation, +use the ``wtimeout`` argument. The following example sets the timeout to +5000 milliseconds: + +.. code-block:: javascript + + db.runCommand( { getLastError: 1, w: 2, wtimeout:5000 } ) + +Modify Default Write Concern +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can configure your own "default" :dbcommand:`getLastError` +behavior for a replica set. Use the +:data:`~local.system.replset.settings.getLastErrorDefaults` setting in +the :doc:`replica set configuration +`. The following sequence of +commands creates a configuration that waits for the write operation to +complete on a majority of the set members before returning: + +.. code-block:: javascript + + cfg = rs.conf() + cfg.settings = {} + cfg.settings.getLastErrorDefaults = {w: "majority"} + rs.reconfig(cfg) + +The :data:`~local.system.replset.settings.getLastErrorDefaults` +setting affects only those :dbcommand:`getLastError` commands that +have *no* other arguments. + +.. note:: + + Use of insufficient write concern can lead to :ref:`rollbacks + ` in the case of :ref:`replica set failover + `. Always ensure that your operations have + specified the required write concern for your application. + +.. seealso:: :ref:`write-operations-write-concern` and + :ref:`connections-write-concern` + +Custom Write Concerns +~~~~~~~~~~~~~~~~~~~~~ + +You can use replica set tags to create custom write concerns using the +:data:`~local.system.replset.settings.getLastErrorDefaults` and +:data:`~local.system.replset.settings.getLastErrorModes` replica set +settings. + +.. note:: + + Custom write concern modes specify the field name and a number of + *distinct* values for that field. By contrast, read preferences use + the value of fields in the tag document to direct read operations. + + In some cases, you may be able to use the same tags for read + preferences and write concerns; however, you may need to create + additional tags for write concerns depending on the requirements of + your application. + +Single Tag Write Concerns +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Consider a five member replica set, where each member has one of the +following tag sets: + +.. code-block:: javascript + + { "use": "reporting" } + { "use": "backup" } + { "use": "application" } + { "use": "application" } + { "use": "application" } + +You could create a custom write concern mode that will ensure that +applicable write operations will not return until members with two +different values of the ``use`` tag have acknowledged the write +operation. Create the mode with the following sequence of operations +in the :program:`mongo` shell: + +.. code-block:: javascript + + cfg = rs.conf() + cfg.settings = { getLastErrorModes: { use2: { "use": 2 } } } + rs.reconfig(cfg) + +.. these examples need to be better so that they avoid overwriting + getLastErrorModes upon repetition (i.e. they don't $push documents + to getLastErrorModes.) + +To use this mode pass the string ``multiUse`` to the ``w`` option of +:dbcommand:`getLastError` as follows: + +.. code-block:: javascript + + db.runCommand( { getLastError: 1, w: use2 } ) + +Specific Custom Write Concerns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you have a three member replica with the following tag sets: + +.. code-block:: javascript + + { "disk": "ssd" } + { "disk": "san" } + { "disk": "spinning" } + +You cannot specify a custom +:data:`~local.system.replset.settings.getLastErrorModes` value to +ensure that the write propagates to the ``san`` before +returning. However, you may implement this write concern policy by +creating the following additional tags, so that the set resembles the +following: + +.. code-block:: javascript + + { "disk": "ssd" } + { "disk": "san", "disk.san": "san" } + { "disk": "spinning" } + +Then, create a custom +:data:`~local.system.replset.settings.getLastErrorModes` value, as +follows: + +.. code-block:: javascript + + cfg = rs.conf() + cfg.settings = { getLastErrorModes: { san: { "disk.san": 1 } } } + rs.reconfig(cfg) + +.. these examples need to be better so that they avoid overwriting + getLastErrorModes upon repetition (i.e. they don't $push documents + to getLastErrorModes.) + +To use this mode pass the string ``san`` to the ``w`` option of +:dbcommand:`getLastError` as follows: + +.. code-block:: javascript + + db.runCommand( { getLastError: 1, w: san } ) + +This operation will not return until a replica set member with the tag +``disk.san`` returns. + +You may set a custom write concern mode as the default write concern +mode using :data:`~local.system.replset.settings.getLastErrorDefaults` +replica set as in the following setting: + +.. code-block:: javascript + + cfg = rs.conf() + cfg.settings.getLastErrorDefaults = { ssd:1 } + rs.reconfig(cfg) + +.. seealso:: :ref:`replica-set-configuration-tag-sets` for further + information about replica set reconfiguration and tag sets. diff --git a/source/core/write-operations.txt b/source/core/write-operations.txt index a90868bebfd..e07a4797f58 100644 --- a/source/core/write-operations.txt +++ b/source/core/write-operations.txt @@ -24,9 +24,9 @@ Write Operators For information on write operators and how to write data to a MongoDB database, see the following pages: -- :doc:`/applications/create` -- :doc:`/applications/update` -- :doc:`/applications/delete` +- :doc:`/core/create` +- :doc:`/core/update` +- :doc:`/core/delete` For information on specific methods used to perform write operations in the :program:`mongo` shell, see the following: @@ -297,7 +297,7 @@ For more information see your :doc:`driver documentation your application. Also consider the following resources: :ref:`write-operations-sharded-clusters`, :ref:`sharding-bulk-inserts`, and -:doc:`/administration/import-export`. +:doc:`/core/import-export`. .. _write-operations-indexing: @@ -361,7 +361,7 @@ more efficient than those updates that cause document growth. Use document growth when possible. For complete examples of update operations, see -:doc:`/applications/update`. +:doc:`/core/update`. .. _write-operations-padding-factor: @@ -497,7 +497,7 @@ can maintain a largely current state with respect to the primary. For more information on replica sets and write operations, see :ref:`replica-set-write-concern`, :ref:`replica-set-oplog-sizing`, -:ref:`replica-set-oplog`, and :ref:`replica-set-procedure-change-oplog-size`. +:ref:`replica-set-oplog`, and :doc:`/tutorial/change-oplog-size`. .. [#write-concern-throttling] Calling :dbcommand:`getLastError` intermittently with a ``w`` value of ``2`` or ``majority`` will diff --git a/source/crud.txt b/source/crud.txt index 5c415829e1d..c50e64bfc78 100644 --- a/source/crud.txt +++ b/source/crud.txt @@ -27,8 +27,8 @@ deployments. The :doc:`/core/document` provides an overview of core/read-operations core/write-operations -Document Orientation Concepts ------------------------------ +Fundamental Concepts for Document Databases +------------------------------------------- .. todo insert signposting blurb here. @@ -37,9 +37,10 @@ Document Orientation Concepts core/data-modeling core/document - core/object-id - applications/database-references - applications/gridfs + reference/object-id + core/gridfs + reference/gridfs + reference/database-references .. _crud-documents: .. _crud-operations: @@ -53,14 +54,17 @@ operations, i.e. CRUD, in MongoDB. .. toctree:: :maxdepth: 1 - applications/create - applications/read - applications/update - applications/delete + core/create + core/read + core/update + core/delete Data Modeling Patterns ---------------------- +See :doc:`/core/data-modeling` for background on data modeling +practices in MongoDB. + .. toctree:: :maxdepth: 1 diff --git a/source/data-center-awareness.txt b/source/data-center-awareness.txt index e8535c0d807..4b42d780bf5 100644 --- a/source/data-center-awareness.txt +++ b/source/data-center-awareness.txt @@ -23,11 +23,11 @@ Consider the following documents: .. toctree:: :maxdepth: 1 - /administration/operational-segregation - /administration/tag-aware-sharding + /core/operational-segregation + /core/tag-aware-sharding + /tutorial/administer-shard-tags /tutorial/deploy-geographically-distributed-replica-set -Additionally, consider the :ref:`replica-set-write-concern` and -:ref:`replica-set-read-preference` sections of the -:doc:`/applications/replication` document, which addresses +Additionally, consider the :doc:`/core/write-concern` and +:doc:`/core/read-preference` documents, which addresses capabilities related to data center awareness. diff --git a/source/faq/developers.txt b/source/faq/developers.txt index 7ad7e522057..11e0097edca 100644 --- a/source/faq/developers.txt +++ b/source/faq/developers.txt @@ -115,7 +115,7 @@ How do you aggregate data with MongoDB? --------------------------------------- In version 2.1 and later, you can use the new ":doc:`aggregation -framework `," with the +framework `," with the :dbcommand:`aggregate` command. MongoDB also supports :term:`map-reduce` with the @@ -263,7 +263,7 @@ within a single document. You may use the BinData data type to store the binary data. See your :doc:`drivers ` documentation for details on using BinData. -For more information on GridFS, see :doc:`/applications/gridfs`. +For more information on GridFS, see :doc:`/core/gridfs`. How does MongoDB address SQL or Query injection? ------------------------------------------------ @@ -597,7 +597,7 @@ explicitly force the query to use that index. .. [#duplicate-document-in-result-set] As a cursor returns documents other operations may interleave with the query: if some of these - operations are :doc:`updates ` that cause the + operations are :doc:`updates ` that cause the document to move (in the case of a table scan, caused by document growth,) or that change the indexed field on the index used by the query; then the cursor will return the same document more than diff --git a/source/faq/diagnostics.txt b/source/faq/diagnostics.txt index 0c467d367c8..edaee95c783 100644 --- a/source/faq/diagnostics.txt +++ b/source/faq/diagnostics.txt @@ -170,7 +170,7 @@ limited to: - The operating system's cache strategy for LRU (Least Recently Used) -- The impact of :doc:`journaling ` +- The impact of :doc:`journaling ` - The number or rate of page faults and other MMS gauges to detect when you need more RAM diff --git a/source/faq/fundamentals.txt b/source/faq/fundamentals.txt index 14516dd77cf..aa928959831 100644 --- a/source/faq/fundamentals.txt +++ b/source/faq/fundamentals.txt @@ -187,7 +187,7 @@ directly from the indexes and/or data files. Are writes written to disk immediately, or lazily? -------------------------------------------------- -Writes are physically written to the :doc:`journal ` within 100 +Writes are physically written to the :doc:`journal ` within 100 milliseconds. At that point, the write is "durable" in the sense that after a pull-plug-from-wall event, the data will still be recoverable after a hard restart. diff --git a/source/faq/replica-sets.txt b/source/faq/replica-sets.txt index 9a9d78c6efa..a4f9d1c9d92 100644 --- a/source/faq/replica-sets.txt +++ b/source/faq/replica-sets.txt @@ -163,7 +163,7 @@ to construct this majority without the overhead of adding replicating nodes to the system. There are many possible replica set :doc:`architectures -`. +`. If you have a three node replica set, you don't need an arbiter. @@ -179,7 +179,7 @@ facilities are possible. In these cases, the arbiter will break the tie between the two facilities and allow the set to elect a new primary. -.. seealso:: :doc:`/administration/replica-set-architectures` +.. seealso:: :doc:`/core/replica-set-architectures` What information do arbiters exchange with the rest of the replica set? ----------------------------------------------------------------------- @@ -196,8 +196,9 @@ following data with the rest of the replica set: If your MongoDB deployment uses SSL, then all communications between arbiters and the other members of the replica set are secure. See the -documentation for :doc:`/administration/ssl` for more information. Run -all arbiters on secure networks, as with all MongoDB components. +documentation for :doc:`/tutorial/configure-ssl` for more +information. Run all arbiters on secure networks, as with all MongoDB +components. .. see:: The overview of :ref:`Arbiter Members of Replica Sets `. diff --git a/source/faq/sharding.txt b/source/faq/sharding.txt index b7fda932b94..4a9dbb0c360 100644 --- a/source/faq/sharding.txt +++ b/source/faq/sharding.txt @@ -156,7 +156,7 @@ field unique. It is problematic for collections to have duplicate If you're not sharding your collection by the ``_id`` field, then you should be sure to store a globally unique identifier in that -field. The default :doc:`BSON ObjectID ` works well in +field. The default :doc:`BSON ObjectID ` works well in this case. I've enabled sharding and added a second shard, but all the data is still on one server. Why? @@ -202,7 +202,7 @@ pool. See the :ref:`System Resource Utilization ` section of the -:doc:`/administration/ulimit` document. +:doc:`/reference/ulimit` document. Why does ``mongos`` hold connections open? ------------------------------------------ diff --git a/source/faq/storage.txt b/source/faq/storage.txt index f28c80d6f37..124ee52a91a 100644 --- a/source/faq/storage.txt +++ b/source/faq/storage.txt @@ -160,7 +160,7 @@ inserted into the database. Consider the following possible causes: The data directory contains the journal files, which store write operations on disk prior to MongoDB applying them to databases. See - :doc:`/administration/journaling`. + :doc:`/core/journaling`. - Empty records. diff --git a/source/includes/admonitions-eval.rst b/source/includes/admonitions-eval.rst index 7521415043b..a88a14bf5cc 100644 --- a/source/includes/admonitions-eval.rst +++ b/source/includes/admonitions-eval.rst @@ -8,7 +8,7 @@ - Do not use |object| for long running operations as |object| blocks all other operations. Consider using :doc:`other server side code execution options - `. + `. - You can not use |object| with :term:`sharded ` data. In general, you should avoid using diff --git a/source/includes/fact-group-map-reduce-where-limitations-in-24.rst b/source/includes/fact-group-map-reduce-where-limitations-in-24.rst index 14d4a246792..3e8debfbf65 100644 --- a/source/includes/fact-group-map-reduce-where-limitations-in-24.rst +++ b/source/includes/fact-group-map-reduce-where-limitations-in-24.rst @@ -1,19 +1,17 @@ -In MongoDB 2.4, :doc:`map-reduce operations -`, the :dbcommand:`group` command, and -:operator:`$where` operator expressions **cannot** access certain -global functions or properties, such as ``db``, that are available -in the :program:`mongo` shell. +In MongoDB 2.4, :dbcommand:`map-reduce operations `, the +:dbcommand:`group` command, and :operator:`$where` operator expressions +**cannot** access certain global functions or properties, such as +``db``, that are available in the :program:`mongo` shell. When upgrading to MongoDB 2.4, you will need to refactor your code if -your :doc:`map-reduce operations `, -:dbcommand:`group` commands, or :operator:`$where` operator expressions -include any global shell functions or properties that are no longer -available, such as ``db``. +your :dbcommand:`map-reduce operations `, :dbcommand:`group` +commands, or :operator:`$where` operator expressions include any global +shell functions or properties that are no longer available, such as +``db``. The following shell functions and properties **are available** to -:doc:`map-reduce operations `, the -:dbcommand:`group` command, and :operator:`$where` operator expressions -in MongoDB 2.4: +:dbcommand:`map-reduce operations `, the :dbcommand:`group` +command, and :operator:`$where` operator expressions in MongoDB 2.4: .. list-table:: :header-rows: 1 diff --git a/source/includes/fact-text-search-beta.rst b/source/includes/fact-text-search-beta.rst index e976d454f6b..1476a9e1094 100644 --- a/source/includes/fact-text-search-beta.rst +++ b/source/includes/fact-text-search-beta.rst @@ -1,4 +1,4 @@ -The :doc:`text search ` is currently a +The :doc:`text search ` is currently a *beta* feature. As a beta feature: - You need to explicitly enable the feature before :ref:`creating a text diff --git a/source/includes/index-tutorials-considerations.rst b/source/includes/index-tutorials-considerations.rst new file mode 100644 index 00000000000..89cbe97142e --- /dev/null +++ b/source/includes/index-tutorials-considerations.rst @@ -0,0 +1,12 @@ +If your collection holds a large amount of data, and your application +needs to be able to access the data while building the index, consider +building the index in the background, as described in +:ref:`index-creation-background`. To build indexes on replica sets, +see the :ref:`index-build-on-replica-sets` section for more +information. + +.. include:: /includes/note-build-indexes-on-replica-sets.rst + +Some drivers may specify indexes, using ``NumberLong(1)`` rather than +``1`` as the specification. This does not have any affect on the +resulting index. diff --git a/source/includes/list-administration-tutorials.rst b/source/includes/list-administration-tutorials.rst deleted file mode 100644 index 356f1e2419a..00000000000 --- a/source/includes/list-administration-tutorials.rst +++ /dev/null @@ -1,47 +0,0 @@ -Replica Sets -~~~~~~~~~~~~ - -- :doc:`/tutorial/deploy-replica-set` -- :doc:`/tutorial/convert-standalone-to-replica-set` -- :doc:`/tutorial/expand-replica-set` -- :doc:`/tutorial/deploy-geographically-distributed-replica-set` -- :doc:`/tutorial/change-oplog-size` -- :doc:`/tutorial/force-member-to-be-primary` -- :doc:`/tutorial/change-hostnames-in-a-replica-set` -- :doc:`/tutorial/convert-secondary-into-arbiter` -- :doc:`/tutorial/reconfigure-replica-set-with-unavailable-members` - -Sharding -~~~~~~~~ - -- :doc:`/tutorial/deploy-shard-cluster` -- :doc:`/tutorial/convert-replica-set-to-replicated-shard-cluster` -- :doc:`/tutorial/add-shards-to-shard-cluster` -- :doc:`/tutorial/remove-shards-from-cluster` -- :doc:`/tutorial/backup-small-sharded-cluster-with-mongodump` -- :doc:`/tutorial/backup-sharded-cluster-with-filesystem-snapshots` -- :doc:`/tutorial/backup-sharded-cluster-with-database-dumps` -- :doc:`/tutorial/restore-single-shard` -- :doc:`/tutorial/restore-sharded-cluster` -- :doc:`/tutorial/schedule-backup-window-for-sharded-clusters` - -Basic Operations -~~~~~~~~~~~~~~~~ - -- :doc:`/tutorial/use-database-commands` -- :doc:`/tutorial/recover-data-following-unexpected-shutdown` -- :doc:`/tutorial/copy-databases-between-instances` -- :doc:`/tutorial/expire-data` -- :doc:`/tutorial/manage-the-database-profiler` -- :doc:`/tutorial/rotate-log-files` -- :doc:`/tutorial/roll-back-to-v1.8-index` -- :doc:`/tutorial/manage-mongodb-processes` -- :doc:`/tutorial/backup-databases-with-binary-database-dumps` -- :doc:`/tutorial/backup-databases-with-filesystem-snapshots` - -Security -~~~~~~~~ - -- :doc:`/tutorial/configure-linux-iptables-firewall` -- :doc:`/tutorial/configure-windows-netsh-firewall` -- :doc:`/tutorial/control-access-to-mongodb-with-authentication` diff --git a/source/includes/note-general-ssl-support.rst b/source/includes/note-general-ssl-support.rst index 56e04e8ab63..26a5bd19e60 100644 --- a/source/includes/note-general-ssl-support.rst +++ b/source/includes/note-general-ssl-support.rst @@ -2,6 +2,6 @@ The `default distribution of MongoDB `_ does **not** contain support - for SSL. To use SSL you can either compile MongoDB with SSL - support or use |ent-build|. See :doc:`/administration/ssl` for + for SSL. To use SSL you can either compile MongoDB with SSL support + or use |ent-build|. See :doc:`/tutorial/configure-ssl` for more information about SSL and MongoDB. diff --git a/source/includes/note-ssl-tool.rst b/source/includes/note-ssl-tool.rst index 9bf236de999..33690e987d6 100644 --- a/source/includes/note-ssl-tool.rst +++ b/source/includes/note-ssl-tool.rst @@ -5,8 +5,9 @@ .. note:: SSL support in |tool-binary| is not compiled into the default - distribution of MongoDB. See :doc:`/administration/ssl` for more - information on SSL and MongoDB. + distribution of MongoDB. See + :doc:`/tutorial/configure-ssl` for more information on SSL + and MongoDB. Additionally, |tool-binary| does not support connections to :program:`mongod` instances that require client certificate diff --git a/source/includes/table-index-nav.yaml b/source/includes/table-index-nav.yaml index 21d31019c01..b11d87ae217 100644 --- a/source/includes/table-index-nav.yaml +++ b/source/includes/table-index-nav.yaml @@ -27,7 +27,7 @@ getting_started: | developers: | :doc:`Database Operations` - :doc:`Aggregation ` + :doc:`Aggregation ` :doc:`/indexes` diff --git a/source/includes/warning-rs-reconfig.rst b/source/includes/warning-rs-reconfig.rst new file mode 100644 index 00000000000..1343425db7a --- /dev/null +++ b/source/includes/warning-rs-reconfig.rst @@ -0,0 +1,9 @@ +.. warning:: + + The :method:`rs.reconfig()` shell method can force the current + primary to step down, which causes an :ref:`election `. + When the primary steps down, the :program:`mongod` closes all client + connections. While this typically takes 10-20 seconds, attempt to + make these changes during scheduled maintenance periods. To + successfully reconfigure a replica set, a majority of the members + must be accessible. diff --git a/source/index.txt b/source/index.txt index bd09c66c55c..831733828bf 100644 --- a/source/index.txt +++ b/source/index.txt @@ -51,7 +51,7 @@ more information on the MongoDB Documentation project. - :doc:`Database Operations` - :doc:`Aggregation ` + :doc:`Aggregation ` :doc:`/indexes` diff --git a/source/indexes.txt b/source/indexes.txt index 02379f8796e..3a94e8b5d80 100644 --- a/source/indexes.txt +++ b/source/indexes.txt @@ -13,15 +13,29 @@ and operational concerns, see :doc:`/administration/indexes`. For information on how applications might use indexes, see :doc:`/applications/indexes`. -Core MongoDB Indexing Background --------------------------------- +Index Concepts +-------------- .. toctree:: :maxdepth: 1 - core/indexes - administration/indexes - applications/indexes + /core/indexes + +Indexing Strategies for Applications +------------------------------------ + +.. toctree:: + :maxdepth: 1 + + /applications/indexes + +Index Tutorials +--------------- + +.. toctree:: + :maxdepth: 2 + + /administration/indexes Geospatial Indexing ------------------- @@ -30,15 +44,9 @@ See :doc:`/applications/geospatial-indexes` for an introduction to geospatial indexing. .. toctree:: - :maxdepth: 1 + :maxdepth: 2 /applications/geospatial-indexes - /applications/2d - /applications/2dsphere - /applications/geohaystack - /reference/geospatial-queries - /tutorial/calculate-distances-using-spherical-geometry-with-2d-geospatial-indexes - /core/geospatial-indexes Text Indexing @@ -50,5 +58,11 @@ Text Indexing .. toctree:: :maxdepth: 1 - applications/text-search - core/text-indexes + core/text-search + tutorial/create-text-index-on-multiple-fields + tutorial/specify-language-for-text-index + tutorial/avoid-text-index-name-limit + tutorial/create-text-index-on-multi-language-collection + tutorial/control-results-of-text-search + tutorial/limit-number-of-items-scanned-for-text-search + tutorial/return-text-queries-using-only-text-index diff --git a/source/installation.txt b/source/installation.txt index 99d417dde63..b65ea2a59f2 100644 --- a/source/installation.txt +++ b/source/installation.txt @@ -37,10 +37,10 @@ you begin to learn about MongoDB: tutorial/getting-started - :doc:`/tutorial/getting-started` -- :doc:`/applications/create` -- :doc:`/applications/read` -- :doc:`/applications/update` -- :doc:`/applications/delete` +- :doc:`/core/create` +- :doc:`/core/read` +- :doc:`/core/update` +- :doc:`/core/delete` Release Notes ------------- diff --git a/source/meta/reference.txt b/source/meta/reference.txt index ec03d11183c..02aa54c6979 100644 --- a/source/meta/reference.txt +++ b/source/meta/reference.txt @@ -104,5 +104,5 @@ General System Reference /reference/glossary .. include:: /release-notes.txt - :start-after: :orphan: + :start-after: start-include-here :end-before: end-include-here diff --git a/source/reference.txt b/source/reference.txt index 8f21f9584ab..ace6ec3bc5a 100644 --- a/source/reference.txt +++ b/source/reference.txt @@ -29,6 +29,9 @@ concepts and statements and SQL concepts and statements. reference/sql-comparison reference/sql-aggregation-comparison +.. TODO Should reference/database-references go here instead of under +.. General Reference + Quick Reference Material ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -98,6 +101,7 @@ Internal Metadata reference/config-database reference/local-database reference/system-collections + reference/gridfs General Reference ----------------- @@ -108,14 +112,8 @@ General Reference reference/limits reference/mongodb-extended-json reference/text-search + reference/database-references reference/glossary .. seealso:: The :ref:`genindex` may provide useful insight into the reference material in this manual. - -.. The following include, inlines the release notes page where all -.. release-related material should live. - -.. include:: /release-notes.txt - :start-after: start-include-here - :end-before: end-include-here diff --git a/source/reference/aggregation.txt b/source/reference/aggregation.txt index c4ab8915807..7136a54766e 100644 --- a/source/reference/aggregation.txt +++ b/source/reference/aggregation.txt @@ -33,7 +33,7 @@ This documentation provides an overview of all aggregation operators available for use in the aggregation pipeline as well as details regarding their use and behavior. -.. seealso:: :doc:`/applications/aggregation` overview, the +.. seealso:: :doc:`/core/aggregation` overview, the :ref:`Aggregation Framework Documentation Index `, and the :doc:`/tutorial/aggregation-examples` for more information on the diff --git a/source/reference/command.txt b/source/reference/command.txt index 2e802df645c..30e42ccf9c0 100644 --- a/source/reference/command.txt +++ b/source/reference/command.txt @@ -2,6 +2,14 @@ Database Commands ================= +For an introduction to database commands and their use, see the +:doc:`/tutorial/use-database-commands` document. + +.. toctree:: + :hidden: + + /tutorial/use-database-commands + .. toctree:: :maxdepth: 1 :glob: diff --git a/source/reference/command/aggregate.txt b/source/reference/command/aggregate.txt index 35a0f0c1415..4201abc6b7a 100644 --- a/source/reference/command/aggregate.txt +++ b/source/reference/command/aggregate.txt @@ -19,7 +19,7 @@ aggregate contains the data that you wish to aggregate. The ``pipeline`` argument holds an array that contains the specification for the aggregation operation. Consider the following example from the - :doc:`aggregation documentation `. + :doc:`aggregation documentation `. .. code-block:: javascript @@ -57,6 +57,6 @@ aggregate For more aggregation documentation, please see: - - :doc:`/applications/aggregation` + - :doc:`/core/aggregation` - :doc:`/reference/aggregation` - :doc:`/tutorial/aggregation-examples` diff --git a/source/reference/command/authenticate.txt b/source/reference/command/authenticate.txt index 9e1a17a749f..a8d0ae05e58 100644 --- a/source/reference/command/authenticate.txt +++ b/source/reference/command/authenticate.txt @@ -14,7 +14,7 @@ authenticate db.auth( "username", "password" ) - .. see:: :method:`db.auth()` and :doc:`/administration/security` + .. see:: :method:`db.auth()` and :doc:`/core/security` for more information. .. read-lock, slave-ok diff --git a/source/reference/command/eval.txt b/source/reference/command/eval.txt index a0e4dff500e..afd0a19d1cd 100644 --- a/source/reference/command/eval.txt +++ b/source/reference/command/eval.txt @@ -87,7 +87,7 @@ eval .. \|nolockobject| defined in included parameters-eval .. .. include:: /includes/admonitions-eval.rst - .. seealso:: :doc:`/applications/server-side-javascript` + .. seealso:: :doc:`/core/server-side-javascript` .. [#eval-shell-helper] .. include:: /includes/fact-eval-helper-method.rst diff --git a/source/reference/command/getLastError.txt b/source/reference/command/getLastError.txt index aadaa158466..c276b43b4e5 100644 --- a/source/reference/command/getLastError.txt +++ b/source/reference/command/getLastError.txt @@ -51,6 +51,6 @@ getLastError timeframe, the :dbcommand:`getLastError` command will return with an error status. - .. seealso:: :ref:`Write Concern `, :ref:`Replica Set - Write Concern `, and + .. seealso:: :ref:`Write Concern `, :doc:`Replica Set + Write Concern `, and :method:`db.getLastError()`. diff --git a/source/reference/command/group.txt b/source/reference/command/group.txt index 0922d9181bb..79002a2ed41 100644 --- a/source/reference/command/group.txt +++ b/source/reference/command/group.txt @@ -268,6 +268,6 @@ group "keys" : 3, "ok" : 1 } - .. seealso:: :doc:`/applications/aggregation` + .. seealso:: :doc:`/core/aggregation` .. read-lock diff --git a/source/reference/command/hostInfo.txt b/source/reference/command/hostInfo.txt index 098ad6269a1..670fcb62afb 100644 --- a/source/reference/command/hostInfo.txt +++ b/source/reference/command/hostInfo.txt @@ -178,7 +178,7 @@ hostInfo .. data:: hostInfo.extra.maxOpenFiles Reports the current system limits on open file handles. See - :doc:`/administration/ulimit` for more information. + :doc:`/reference/ulimit` for more information. :data:`~hostInfo.extra.maxOpenFiles` only appears on Linux systems. diff --git a/source/reference/command/mapReduce.txt b/source/reference/command/mapReduce.txt index 091cd36ebca..662a6b77913 100644 --- a/source/reference/command/mapReduce.txt +++ b/source/reference/command/mapReduce.txt @@ -74,13 +74,18 @@ following examples use the :method:`db.collection.mapReduce()` method: .. include:: /includes/examples-map-reduce.rst :start-after: map-reduce-document-prototype-begin -For more information and examples, see the :doc:`Map-Reduce -` page. +For more information and examples, see the +:doc:`Map-Reduce ` page and +:doc:`/tutorial/perform-incremental-map-reduce`. -.. seealso:: +.. seealso:: - - :term:`map-reduce` and :method:`db.collection.mapReduce()` + - :doc:`/tutorial/troubleshoot-map-function` - - :doc:`/applications/aggregation` + - :doc:`/tutorial/troubleshoot-reduce-function` + + - :method:`db.collection.mapReduce()` + + - :doc:`/core/aggregation` .. slave-ok diff --git a/source/reference/components.txt b/source/reference/components.txt index 36701eedbec..4f762130894 100644 --- a/source/reference/components.txt +++ b/source/reference/components.txt @@ -100,7 +100,18 @@ GridFS :term:`GridFS` storage system. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 mongofiles +Process Management +------------------ + +See :doc:`/tutorial/manage-mongodb-processes` for an introduction to +running :program:`mongod` instances. + +.. toctree:: + :hidden: + + /tutorial/manage-mongodb-processes + /tutorial/rotate-log-files diff --git a/source/reference/configuration-options.txt b/source/reference/configuration-options.txt index 622accab4c3..e5e9bb7be9a 100644 --- a/source/reference/configuration-options.txt +++ b/source/reference/configuration-options.txt @@ -565,7 +565,7 @@ Settings thread's status via the :data:`~serverStatus.backgroundFlushing` field. :setting:`syncdelay` has no effect on the :setting:`journal` - files or :doc:`journaling `. + files or :doc:`journaling `. .. warning:: @@ -913,7 +913,7 @@ Sharded Cluster Options recalculates the average. See the :ref:`replica-set-read-preference-behavior-member-selection` - section of the :ref:`read preference ` + section of the :doc:`read preference ` documentation for more information. .. setting:: noAutoSplit diff --git a/source/reference/connection-string.txt b/source/reference/connection-string.txt index a36d0939b2a..120a2debda8 100644 --- a/source/reference/connection-string.txt +++ b/source/reference/connection-string.txt @@ -149,7 +149,8 @@ Connection Options .. note:: The :data:`~uri.ssl` option is not supported by all drivers. See your :doc:`driver ` - documentation and the :doc:`/administration/ssl` document. + documentation and the :doc:`/tutorial/configure-ssl` + document. .. data:: uri.connectTimeoutMS @@ -313,7 +314,7 @@ the: :doc:`/core/write-operations`: Read Preference Options ~~~~~~~~~~~~~~~~~~~~~~~ -:ref:`Read preferences ` describe the +:doc:`Read preferences ` describe the behavior of read operations with regards to :term:`replica sets `. These parameters allow you to specify read preferences on a per-connection basis in the connection string: diff --git a/source/reference/database-profiler.txt b/source/reference/database-profiler.txt index ce72b6b440c..03504d69f8a 100644 --- a/source/reference/database-profiler.txt +++ b/source/reference/database-profiler.txt @@ -110,7 +110,7 @@ operation. .. data:: system.profile.updateobj The :ref:`update document ` passed in - during an :doc:`update ` operation. + during an :doc:`update ` operation. .. data:: system.profile.cursorid diff --git a/source/applications/database-references.txt b/source/reference/database-references.txt similarity index 100% rename from source/applications/database-references.txt rename to source/reference/database-references.txt diff --git a/source/reference/glossary.txt b/source/reference/glossary.txt index d1ebbd1de7c..fde266d4184 100644 --- a/source/reference/glossary.txt +++ b/source/reference/glossary.txt @@ -111,8 +111,10 @@ Glossary :doc:`/replication`. and :doc:`/core/replication`. shard - A single replica set that stores some portion of a sharded cluster's - total data set. See :term:`sharding`. + A single :program:`mongod` instance or a :term:`replica set` + that stores some portion of a sharded cluster's + total data set. In production, all shards should be replica sets. + See :term:`sharding`. .. seealso:: The documents in the :doc:`/sharding` section of manual. @@ -230,10 +232,7 @@ Glossary of the official MongoDB drivers support this convention, as does the ``mongofiles`` program. - .. todo When source/applications/gridfs.txt goes live, - add a link here. - - .. seealso:: :doc:`/reference/mongofiles`. + .. seealso:: :doc:`/reference/mongofiles` and :doc:`/core/gridfs`. md5 ``md5`` is a hashing algorithm used to efficiently provide @@ -290,7 +289,7 @@ Glossary .. include:: /includes/fact-journal-commit-interval-with-gle.rst - .. seealso:: The :doc:`/administration/journaling/` page. + .. seealso:: The :doc:`/core/journaling/` page. pcap A packet capture format used by :program:`mongosniff` to record @@ -375,7 +374,7 @@ Glossary enhance searches by creating "bucket" of objects grouped by a second criterion. For example, you might want all geospatial searches to first select along a non-geospatial dimension and then match - on location. See :doc:`/applications/geohaystack` for more + on location. See :doc:`/core/geohaystack` for more information. oplog @@ -583,8 +582,8 @@ Glossary specified number of members. .. seealso:: :ref:`Write Concern `, - :doc:`/core/write-operations`, and :ref:`Write Concern for - Replica Sets `. + :doc:`/core/write-operations`, and :doc:`Write Concern for + Replica Sets `. priority In the context of :term:`replica sets `, priority @@ -637,7 +636,7 @@ Glossary direct reads to secondaries for :term:`eventually consistent ` reads. - .. seealso:: :ref:`Read Preference ` + .. seealso:: :doc:`Read Preference ` replication lag The length of time between the last operation in the primary's @@ -695,9 +694,9 @@ Glossary transforms the data. In MongoDB, you can run arbitrary aggregations over data using map-reduce. - .. seealso:: The :doc:`/applications/map-reduce` page for + .. seealso:: The :doc:`/core/map-reduce` page for more information regarding MongoDB's map-reduce - implementation, and :doc:`/applications/aggregation` for + implementation, and :doc:`/core/aggregation` for another approach to data aggregation in MongoDB. RDBMS @@ -726,19 +725,19 @@ Glossary The MongoDB aggregation framework provides a means to calculate aggregate values without having to use :term:`map-reduce`. - .. seealso:: :doc:`/applications/aggregation`. + .. seealso:: :doc:`/core/aggregation`. pipeline The series of operations in the :term:`aggregation` process. - .. seealso:: :doc:`/applications/aggregation`. + .. seealso:: :doc:`/core/aggregation`. expression In the context of the :term:`aggregation framework`, expressions are the stateless transformations that operate on the data that passes through the :term:`pipeline`. - .. seealso:: :doc:`/applications/aggregation`. + .. seealso:: :doc:`/core/aggregation`. accumulator An :term:`expression` in the :term:`aggregation framework` that diff --git a/source/reference/gridfs.txt b/source/reference/gridfs.txt new file mode 100644 index 00000000000..48ebd913bd6 --- /dev/null +++ b/source/reference/gridfs.txt @@ -0,0 +1,141 @@ +.. index:: GridFS + +====== +GridFS +====== + +.. default-domain:: mongodb + +.. index:: GridFS; collections +.. _gridfs-collections: + +:term:`GridFS` stores files in two collections: + +- ``chunks`` stores the binary chunks. For details, see + :ref:`gridfs-chunks-collection`. + +- ``files`` stores the file's metadata. For details, see + :ref:`gridfs-files-collection`. + +GridFS places the collections in a common bucket by prefixing each +with the bucket name. By default, GridFS uses two collections with +names prefixed by ``fs`` bucket: + +- ``fs.files`` +- ``fs.chunks`` + +You can choose a different bucket name than ``fs``, and create +multiple buckets in a single database. + +.. seealso:: :doc:`/core/gridfs` for more information about GridFS. + +.. index:: GridFS; chunks collection +.. _gridfs-chunks-collection: + +The ``chunks`` Collection +------------------------- + +Each document in the ``chunks`` collection represents a distinct chunk +of a file as represented in the :term:`GridFS` store. The following is a +prototype document from the ``chunks`` collection.: + +.. code-block:: javascript + + { + "_id" : , + "files_id" : , + "n" : , + "data" : + } + +A document from the ``chunks`` collection contains the following fields: + +.. data:: chunks._id + + The unique :term:`ObjectID` of the chunk. + +.. data:: chunks.files_id + + The ``_id`` of the "parent" document, as specified in the ``files`` + collection. + +.. data:: chunks.n + + The sequence number of the chunk. GridFS numbers all chunks, + starting with 0. + +.. data:: chunks.data + + The chunk's payload as a :term:`BSON` binary type. + +The ``chunks`` collection uses a :term:`compound index` on +``files_id`` and ``n``, as described in :ref:`gridfs-index`. + +.. index:: GridFS; files collection +.. _gridfs-files-collection: + +The ``files`` Collection +------------------------ + +Each document in the ``files`` collection represents a file in the +:term:`GridFS` store. Consider the following prototype of a document in +the ``files`` collection: + +.. code-block:: javascript + + { + "_id" : , + "length" : , + "chunkSize" : + "uploadDate" : + "md5" : + + "filename" : , + "contentType" : , + "aliases" : , + "metadata" : , + } + +Documents in the ``files`` collection contain some or all of the +following fields. Applications may create additional arbitrary fields: + +.. data:: files._id + + The unique ID for this document. The ``_id`` is of the data type you + chose for the original document. The default type for MongoDB + documents is :term:`BSON` :term:`ObjectID`. + +.. data:: files.length + + The size of the document in bytes. + +.. data:: files.chunkSize + + The size of each chunk. GridFS divides the document into chunks of + the size specified here. The default size is 256 kilobytes. + +.. data:: files.uploadDate + + The date the document was first stored by GridFS. This value has the + ``Date`` type. + +.. data:: files.md5 + + An MD5 hash returned from the filemd5 API. This value has the ``String`` + type. + +.. data:: files.filename + + Optional. A human-readable name for the document. + +.. data:: files.contentType + + Optional. A valid MIME type for the document. + +.. data:: files.aliases + + Optional. An array of alias strings. + +.. data:: files.metadata + + Optional. Any additional information you want to store. diff --git a/source/reference/method/db.collection.aggregate.txt b/source/reference/method/db.collection.aggregate.txt index 30c0d1c8320..b0f81e5f8fc 100644 --- a/source/reference/method/db.collection.aggregate.txt +++ b/source/reference/method/db.collection.aggregate.txt @@ -17,7 +17,7 @@ db.collection.aggregate() these operators. Consider the following example from the :doc:`aggregation - documentation `. + documentation `. .. code-block:: javascript @@ -34,5 +34,5 @@ db.collection.aggregate() ); .. seealso:: ":dbcommand:`aggregate`," - ":doc:`/applications/aggregation`," and + ":doc:`/core/aggregation`," and ":doc:`/reference/aggregation`." diff --git a/source/reference/method/db.collection.ensureIndex.txt b/source/reference/method/db.collection.ensureIndex.txt index 4b11a729033..fdd27f7c93c 100644 --- a/source/reference/method/db.collection.ensureIndex.txt +++ b/source/reference/method/db.collection.ensureIndex.txt @@ -17,7 +17,7 @@ db.collection.ensureIndex() ``1`` specifies ascending and a ``-1`` specifies descending. MongoDB supports several different index types including - :doc:`text `, :doc:`geospatial + :ref:`text `, :doc:`geospatial `, and :ref:`hashed ` indexes. @@ -187,8 +187,8 @@ db.collection.ensureIndex() significance of the field relative to the other indexed fields in terms of the score. You can specify weights for some or all the indexed fields. - See :ref:`text-index-internals-weights` to adjust - the scores. + See :doc:`/tutorial/control-results-of-text-search` + to adjust the scores. :option string default_language: @@ -198,7 +198,8 @@ db.collection.ensureIndex() the indexed data is ``english``. See :ref:`text-search-languages` for the available - languages and :ref:`text-index-default-language` for + languages and + :doc:`/tutorial/specify-language-for-text-index` for more information and example. :option string language_override: diff --git a/source/reference/method/db.collection.group.txt b/source/reference/method/db.collection.group.txt index ac56a6decf5..f9ba5a1c6c4 100644 --- a/source/reference/method/db.collection.group.txt +++ b/source/reference/method/db.collection.group.txt @@ -210,6 +210,6 @@ following prototype: { "day_of_week" : "Friday", "total" : 110, "count" : 6, "avg" : 18 }, { "day_of_week" : "Tuesday", "total" : 70, "count" : 3, "avg" : 23 } ] - .. seealso:: :doc:`/applications/aggregation` + .. seealso:: :doc:`/core/aggregation` .. STUB ":doc:`/applications/simple-aggregation`" diff --git a/source/reference/method/db.collection.mapReduce.txt b/source/reference/method/db.collection.mapReduce.txt index 4c9d0f8c5ef..ddf08c702dd 100644 --- a/source/reference/method/db.collection.mapReduce.txt +++ b/source/reference/method/db.collection.mapReduce.txt @@ -42,11 +42,16 @@ db.collection.mapReduce() .. include:: /includes/examples-map-reduce.rst -For more information and examples, see the :doc:`Map-Reduce -` page. - +For more information and examples, see the +:doc:`Map-Reduce ` page and +:doc:`/tutorial/perform-incremental-map-reduce`. + .. seealso:: - - :term:`map-reduce` and :dbcommand:`mapReduce` command + - :doc:`/tutorial/troubleshoot-map-function` + + - :doc:`/tutorial/troubleshoot-reduce-function` + + - :dbcommand:`mapReduce` command - - :doc:`/applications/aggregation` + - :doc:`/core/aggregation` diff --git a/source/reference/method/db.eval.txt b/source/reference/method/db.eval.txt index ae42a02ae9e..e4ac2d940f7 100644 --- a/source/reference/method/db.eval.txt +++ b/source/reference/method/db.eval.txt @@ -70,4 +70,4 @@ db.eval() .. seealso:: - :doc:`/applications/server-side-javascript` + :doc:`/core/server-side-javascript` diff --git a/source/reference/method/db.fsyncLock.txt b/source/reference/method/db.fsyncLock.txt index 55e7fffd455..628895270e5 100644 --- a/source/reference/method/db.fsyncLock.txt +++ b/source/reference/method/db.fsyncLock.txt @@ -21,6 +21,6 @@ db.fsyncLock() { fsync: 1, lock: true } This function locks the database and create a window for - :doc:`backup operations `. + :doc:`backup operations `. .. include:: /includes/note-disable-profiling-fsynclock.rst diff --git a/source/reference/method/db.fsyncUnlock.txt b/source/reference/method/db.fsyncUnlock.txt index f428f2bd670..b523a34cb20 100644 --- a/source/reference/method/db.fsyncUnlock.txt +++ b/source/reference/method/db.fsyncUnlock.txt @@ -9,6 +9,6 @@ db.fsyncUnlock() Unlocks a :program:`mongod` instance to allow writes and reverses the operation of a :method:`db.fsyncLock()` operation. Typically you will use :method:`db.fsyncUnlock()` following a database :doc:`backup - operation `. + operation `. :method:`db.fsyncUnlock()` is an administrative command. diff --git a/source/reference/method/db.getLastError.txt b/source/reference/method/db.getLastError.txt index 21ef1478df9..a37fec37279 100644 --- a/source/reference/method/db.getLastError.txt +++ b/source/reference/method/db.getLastError.txt @@ -13,6 +13,6 @@ db.getLastError() .. see:: :dbcommand:`getLastError` for all options, :ref:`Write Concern ` for a conceptual overview, :doc:`/core/write-operations` for information about all write - operations in MongoDB, and :ref:`Replica Set Write Concern - ` for special considerations related + operations in MongoDB, and :doc:`Replica Set Write Concern + ` for special considerations related to write concern for replica sets. diff --git a/source/reference/method/mongo.setSlaveOk.txt b/source/reference/method/mongo.setSlaveOk.txt index a5dbfe0512d..9795dcfa1b8 100644 --- a/source/reference/method/mongo.setSlaveOk.txt +++ b/source/reference/method/mongo.setSlaveOk.txt @@ -20,5 +20,5 @@ mongo.setSlaveOk() :method:`rs.slaveOk()`. See the :method:`readPref() ` method for more - fine-grained control over :ref:`read preference ` - in the :program:`mongo` shell. + fine-grained control over :doc:`read preference + ` in the :program:`mongo` shell. diff --git a/source/reference/method/rs.slaveOk.txt b/source/reference/method/rs.slaveOk.txt index bfd98161568..4bdce990116 100644 --- a/source/reference/method/rs.slaveOk.txt +++ b/source/reference/method/rs.slaveOk.txt @@ -15,4 +15,5 @@ rs.slaveOk() This allows the current connection to allow read operations to run on :term:`secondary` nodes. See the :method:`readPref() ` method for more fine-grained control over - :ref:`read preference ` in the :program:`mongo` shell. + :doc:`read preference ` in the + :program:`mongo` shell. diff --git a/source/reference/mongo-shell.txt b/source/reference/mongo-shell.txt index 67b91f41278..b55846ee227 100644 --- a/source/reference/mongo-shell.txt +++ b/source/reference/mongo-shell.txt @@ -141,7 +141,7 @@ The following table displays some common JavaScript operations: - Find all documents in the collection and returns a cursor. - See the :doc:`/applications/read` and + See the :doc:`/core/read` and :doc:`/core/read-operations` for more information and examples. See :ref:`read-operations-cursors` for additional information on @@ -155,20 +155,20 @@ The following table displays some common JavaScript operations: - Update an existing document in the collection. - See :doc:`/applications/update` for more information. + See :doc:`/core/update` for more information. * - :method:`db.collection.save()` - Insert either a new document or update an existing document in the collection. - See :doc:`/applications/update` for more information. + See :doc:`/core/update` for more information. * - :method:`db.collection.remove()` - Delete documents from the collection. - See :doc:`/applications/delete` for more information. + See :doc:`/core/delete` for more information. * - :method:`db.collection.drop()` @@ -188,13 +188,13 @@ The following table displays some common JavaScript operations: For more information on performing operations in the shell, see: -- :doc:`/applications/create` +- :doc:`/core/create` -- :doc:`/applications/read` +- :doc:`/core/read` -- :doc:`/applications/update` +- :doc:`/core/update` -- :doc:`/applications/delete` +- :doc:`/core/delete` - :doc:`/administration/indexes` @@ -432,7 +432,7 @@ The following table provides some common read operations in the method is the :method:`~db.collection.find()` method with a :method:`limit(1) `. -See :doc:`/applications/read` and :doc:`/core/read-operations` +See :doc:`/core/read` and :doc:`/core/read-operations` documentation for more information and examples. See :doc:`/reference/operators` to specify other query operators. diff --git a/source/reference/mongod.txt b/source/reference/mongod.txt index c81ba9ada44..87baf0765d3 100644 --- a/source/reference/mongod.txt +++ b/source/reference/mongod.txt @@ -191,7 +191,7 @@ Options will continue to have access to the database until the you create the first user. - See the :doc:`Security and Authentication ` + See the :doc:`Security and Authentication ` page for more information regarding this functionality. .. option:: --cpu @@ -465,7 +465,7 @@ Options field. :setting:`syncdelay` has no effect on the :setting:`journal` - files or :doc:`journaling `. + files or :doc:`journaling `. .. warning:: @@ -653,8 +653,8 @@ Sharding Cluster Options SSL Options ``````````` -.. see:: :doc:`/administration/ssl` for full documentation of - MongoDB's support. +.. see:: :doc:`/tutorial/configure-ssl` for full + documentation of MongoDB's support. .. |binary-name| replace:: :program:`mongod` .. include:: /includes/manpage-options-ssl.rst diff --git a/source/reference/mongodb-extended-json.txt b/source/reference/mongodb-extended-json.txt index ab86e6a6410..5093b6447f6 100644 --- a/source/reference/mongodb-extended-json.txt +++ b/source/reference/mongodb-extended-json.txt @@ -5,7 +5,7 @@ MongoDB Extended JSON .. default-domain:: mongodb MongoDB :doc:`import and export utilities -` (i.e. :program:`mongoimport` and +` (i.e. :program:`mongoimport` and :program:`mongoexport`) and MongoDB :ecosystem:`REST Interfaces ` render an approximation of MongoDB :term:`BSON` documents in JSON format. diff --git a/source/reference/mongodump.txt b/source/reference/mongodump.txt index ae1265599b7..95d8878a42b 100644 --- a/source/reference/mongodump.txt +++ b/source/reference/mongodump.txt @@ -13,7 +13,7 @@ Synopsis :program:`mongodump` is a utility for creating a binary export of the contents of a database. Consider using this utility as part an -effective :doc:`backup strategy `. Use +effective :doc:`backup strategy `. Use :program:`mongodump` in conjunction with :program:`mongorestore` to restore databases. @@ -25,7 +25,7 @@ without an active :program:`mongod`. .. seealso:: :program:`mongorestore`, :doc:`/tutorial/backup-sharded-cluster-with-database-dumps` - and :doc:`/administration/backups`. + and :doc:`/core/backups`. Options ------- diff --git a/source/reference/mongoexport.txt b/source/reference/mongoexport.txt index e331519b886..96880217379 100644 --- a/source/reference/mongoexport.txt +++ b/source/reference/mongoexport.txt @@ -12,7 +12,7 @@ Synopsis :program:`mongoexport` is a utility that produces a JSON or CSV export of data stored in a MongoDB instance. See the -":doc:`/administration/import-export`" document for a more in depth +":doc:`/core/import-export`" document for a more in depth usage overview, and the ":doc:`mongoimport`" document for more information regarding the :program:`mongoimport` utility, which provides the inverse "importing" capability. @@ -22,7 +22,7 @@ provides the inverse "importing" capability. Do not use :program:`mongoimport` and :program:`mongoexport` for full-scale backups because they may not reliably capture data type information. Use :program:`mongodump` and :program:`mongorestore` as - described in ":doc:`/administration/backups`" for this kind of + described in ":doc:`/core/backups`" for this kind of functionality. Options diff --git a/source/reference/mongoimport.txt b/source/reference/mongoimport.txt index f77bf7d6fce..de8c8c85e29 100644 --- a/source/reference/mongoimport.txt +++ b/source/reference/mongoimport.txt @@ -13,7 +13,7 @@ Synopsis The :program:`mongoimport` tool provides a route to import content from a JSON, CSV, or TSV export created by :program:`mongoexport`, or potentially, another third-party export tool. See the -":doc:`/administration/import-export`" document for a more in depth +":doc:`/core/import-export`" document for a more in depth usage overview, and the ":doc:`mongoexport`" document for more information regarding :program:`mongoexport`, which provides the inverse "importing" capability. @@ -23,7 +23,7 @@ provides the inverse "importing" capability. Do not use :program:`mongoimport` and :program:`mongoexport` for full instance, production backups because they will not reliably capture data type information. Use :program:`mongodump` and :program:`mongorestore` as - described in ":doc:`/administration/backups`" for this kind of + described in ":doc:`/core/backups`" for this kind of functionality. Options diff --git a/source/reference/mongooplog.txt b/source/reference/mongooplog.txt index 54396f2a78c..461989d66d3 100644 --- a/source/reference/mongooplog.txt +++ b/source/reference/mongooplog.txt @@ -30,7 +30,7 @@ operations to the host ``mongodb1.example.net``. If you do not need to keep the :option:`--from ` host running during the migration, consider using :program:`mongodump` and :program:`mongorestore` or another :doc:`backup -` operation, which may be better suited to +` operation, which may be better suited to your operation. .. note:: @@ -40,7 +40,7 @@ your operation. :program:`mongooplog` will not be able to copy oplog entries. .. seealso:: :program:`mongodump`, :program:`mongorestore`, - ":doc:`/administration/backups`", ":ref:`Oplog Internals Overview + ":doc:`/core/backups`", ":ref:`Oplog Internals Overview `", and ":ref:`Replica Set Oplog Sizing `". diff --git a/source/reference/mongos.txt b/source/reference/mongos.txt index d6d5f7e00a4..22cccadf810 100644 --- a/source/reference/mongos.txt +++ b/source/reference/mongos.txt @@ -277,7 +277,7 @@ Options recalculates the average. See the :ref:`replica-set-read-preference-behavior-member-selection` - section of the :ref:`read preference ` + section of the :doc:`read preference ` documentation for more information. .. option:: --noAutoSplit @@ -302,8 +302,8 @@ Options SSL Options ~~~~~~~~~~~ -.. see:: :doc:`/administration/ssl` for full documentation of - MongoDB's support. +.. see:: :doc:`/tutorial/configure-ssl` for full + documentation of MongoDB's support. .. |binary-name| replace:: :program:`mongos` .. include:: /includes/manpage-options-auth.rst diff --git a/source/core/object-id.txt b/source/reference/object-id.txt similarity index 100% rename from source/core/object-id.txt rename to source/reference/object-id.txt diff --git a/source/reference/operator/setOnInsert.txt b/source/reference/operator/setOnInsert.txt index 98b31778eef..000c40eb633 100644 --- a/source/reference/operator/setOnInsert.txt +++ b/source/reference/operator/setOnInsert.txt @@ -47,10 +47,10 @@ $setOnInsert The :operator:`$setOnInsert` operator only affects :method:`~db.collection.update()` operations with the :term:`upsert` flag that perform an :doc:`insert - `. + `. If the :method:`~db.collection.update()` has the upsert flag and - performs an :doc:`update `, + performs an :doc:`update `, :operator:`$setOnInsert` has no effect. .. example:: diff --git a/source/reference/parameters.txt b/source/reference/parameters.txt index bbbd58ede5f..73fc1297a9e 100644 --- a/source/reference/parameters.txt +++ b/source/reference/parameters.txt @@ -234,7 +234,7 @@ Parameters .. include:: /includes/warning-text-search-not-for-production.rst - Enables the :doc:`text search ` feature. + Enables the :doc:`text search ` feature. You must enable the feature before creating or accessing a text index. diff --git a/source/reference/replica-configuration.txt b/source/reference/replica-configuration.txt index 62c0a4f6c2a..d5ebc8b77b3 100644 --- a/source/reference/replica-configuration.txt +++ b/source/reference/replica-configuration.txt @@ -242,7 +242,7 @@ Configuration Variables appears only when set to ``false``. If not set, :data:`~local.system.replset.settings.chainingAllowed` is ``true``. - .. seealso:: :ref:`replica-set-chained-replication` + .. seealso:: :doc:`/tutorial/manage-chained-replication` .. data:: local.system.replset.settings.getLastErrorDefaults @@ -427,7 +427,7 @@ Tag sets provide custom and configurable :term:`write concern` and :term:`read preferences ` for a :term:`replica set`. This section outlines the process for specifying tags for a replica set, for more information see the -full documentation of the behavior of :ref:`tags sets for write concern +full documentation of the behavior of ref:`tags sets for write concern ` and :ref:`tag sets for read preference `. diff --git a/source/reference/server-status.txt b/source/reference/server-status.txt index dbf6709b5c8..27f8673e090 100644 --- a/source/reference/server-status.txt +++ b/source/reference/server-status.txt @@ -468,7 +468,7 @@ connections available connections that the database can provide. Consider this value in combination with the value of :data:`~serverStatus.connections.current` to understand the connection load on - the database, and the :doc:`/administration/ulimit` document for + the database, and the :doc:`/reference/ulimit` document for more information about system thresholds on available connections. .. _server-status-extra-info: @@ -978,7 +978,7 @@ Journaling (dur) interval is configurable using the ``--journalCommitInterval`` option. - .. seealso:: ":doc:`/administration/journaling`" for more information about journaling operations. + .. seealso:: ":doc:`/core/journaling`" for more information about journaling operations. .. data:: serverStatus.dur.commits @@ -1303,7 +1303,7 @@ metrics .. data:: serverStatus.metrics.operation.fastmod :data:`~serverStatus.metrics.operation.fastmod` reports the number - of :doc:`update ` operations that neither + of :doc:`update ` operations that neither cause documents to grow nor require updates to the index. For example, this counter would record an update operation that use the :operator:`$inc` operator to increment the value of a field that is diff --git a/source/reference/simple-aggregation.txt b/source/reference/simple-aggregation.txt index c680d277b87..309f0af8b7c 100644 --- a/source/reference/simple-aggregation.txt +++ b/source/reference/simple-aggregation.txt @@ -5,7 +5,7 @@ Simple Aggregation Methods and Commands .. default-domain:: mongodb In addition to the :doc:`aggregation -framework` and :term:`map-reduce`, MongoDB +framework` and :term:`map-reduce`, MongoDB provides the following methods and commands to perform aggregation: Count diff --git a/source/reference/sql-aggregation-comparison.txt b/source/reference/sql-aggregation-comparison.txt index 504946a32d9..942535f7d4b 100644 --- a/source/reference/sql-aggregation-comparison.txt +++ b/source/reference/sql-aggregation-comparison.txt @@ -8,7 +8,7 @@ SQL to Aggregation Framework Mapping Chart files in the includes directory. To change the content of the tables, edit those files. -The :doc:`aggregation framework ` allows +The :doc:`aggregation framework ` allows MongoDB to provide native aggregation capabilities that corresponds to many common data aggregation operations in SQL. If you're new to MongoDB you might want to consider the :doc:`/faq` section for a diff --git a/source/reference/system-collections.txt b/source/reference/system-collections.txt index 9eca0285488..0949fdf6dd2 100644 --- a/source/reference/system-collections.txt +++ b/source/reference/system-collections.txt @@ -53,7 +53,8 @@ System collections include these collections stored directly in the database: .. data:: .system.js - The :data:`.system.js` collection holds special - JavaScript code for use in :doc:`server side JavaScript - `. See - :ref:`storing-server-side-javascript` for more information. + The :data:`.system.js` collection holds special JavaScript + code for use in :doc:`server side JavaScript + `. See + :doc:`/tutorial/store-javascript-function-on-server` for + more information. diff --git a/source/reference/text-search.txt b/source/reference/text-search.txt index 7228ba72933..4020f91a29b 100644 --- a/source/reference/text-search.txt +++ b/source/reference/text-search.txt @@ -83,7 +83,7 @@ The returned document contains the following fields: contained the stemmed term or terms. The :data:`~text.results.score` field signifies how well the document matched the stemmed term or terms. See - :ref:`text-index-internals-weights` for how you can + :doc:`/tutorial/control-results-of-text-search` for how you can adjust the scores for the matching words. .. data:: text.stats diff --git a/source/administration/ulimit.txt b/source/reference/ulimit.txt similarity index 100% rename from source/administration/ulimit.txt rename to source/reference/ulimit.txt diff --git a/source/release-notes.txt b/source/release-notes.txt index 64178587d00..183dc98bf2d 100644 --- a/source/release-notes.txt +++ b/source/release-notes.txt @@ -1,5 +1,3 @@ -:orphan: - .. start-include-here Release Notes diff --git a/source/release-notes/1.8.txt b/source/release-notes/1.8.txt index ee31ab1f028..82c016656ae 100644 --- a/source/release-notes/1.8.txt +++ b/source/release-notes/1.8.txt @@ -211,7 +211,7 @@ Journaling `````````` Returning to 1.6 after using 1.8 -:doc:`Journaling ` works +:doc:`Journaling ` works fine, as journaling does not change anything about the data file format. Suppose you are running 1.8.x with journaling enabled and you decide to switch back to 1.6. There are two scenarios: @@ -230,7 +230,7 @@ Changes Journaling ~~~~~~~~~~ -MongoDB now supports write-ahead :doc:`/administration/journaling` to +MongoDB now supports write-ahead :doc:`/core/journaling` to facilitate fast crash recovery and durability in the storage engine. With journaling enabled, a :program:`mongod` can be quickly restarted following a crash without needing to repair the :term:`collections @@ -306,7 +306,7 @@ Additional Changes and Enhancements 1.7.5 ````` -- :doc:`Journaling `. +- :doc:`Journaling `. - Extent allocation improvements. diff --git a/source/release-notes/2.0.txt b/source/release-notes/2.0.txt index eb2b8a13a5e..1441efa7dd1 100644 --- a/source/release-notes/2.0.txt +++ b/source/release-notes/2.0.txt @@ -237,7 +237,7 @@ propagates to a majority of nodes, effectively committing it. The value for "majority" will automatically adjust as you add or remove nodes from the set. -For more information, see :ref:`replica-set-write-concern`. +For more information, see :doc:`/core/write-concern`. Reconfiguration with a Minority Up `````````````````````````````````` diff --git a/source/release-notes/2.2.txt b/source/release-notes/2.2.txt index de53fd267e2..4efba60072a 100644 --- a/source/release-notes/2.2.txt +++ b/source/release-notes/2.2.txt @@ -154,7 +154,7 @@ operations without needing to use :term:`map-reduce`. The provides an interface to these operations. Consider the following resources for background on the aggregation framework and its use: -- Documentation: :doc:`/applications/aggregation` +- Documentation: :doc:`/core/aggregation` - Reference: :doc:`/reference/aggregation` @@ -210,8 +210,8 @@ data most frequently. write-operations-write-concern Shard tagging controls data location, and is complementary but -separate from replica set tagging, which controls :ref:`read -preference ` and :ref:`write concern +separate from replica set tagging, which controls :doc:`read +preference ` and :ref:`write concern `. For example, shard tagging can pin all "USA" data to one or more logical shards, while replica set tagging can control which :program:`mongod` instances (e.g. "``production``" @@ -224,13 +224,14 @@ shell that support tagged sharding configuration: - :method:`sh.addTagRange()` - :method:`sh.removeShardTag()` -Also, see :doc:`/administration/tag-aware-sharding`. +Also, see :doc:`/core/tag-aware-sharding` and +:doc:`/tutorial/administer-shard-tags`. Fully Supported Read Preference Semantics ````````````````````````````````````````` -All MongoDB clients and drivers now support full :ref:`read -preferences `, including consistent +All MongoDB clients and drivers now support full :doc:`read +preferences `, including consistent support for a full range of :ref:`read preference modes ` and :ref:`tag sets `. This support extends to the diff --git a/source/release-notes/2.4.txt b/source/release-notes/2.4.txt index 46837c2967b..635b4a5fa46 100644 --- a/source/release-notes/2.4.txt +++ b/source/release-notes/2.4.txt @@ -52,7 +52,7 @@ databases as a beta feature. With the new :ref:`text index `, and supporting, :dbcommand:`text` command you can search text in data stored in MongoDB, using an index that updates in real-time and is always consistent with the data set. See -:doc:`/applications/text-search` for more information about text +:doc:`/core/text-search` for more information about text search in MongoDB. .. _2.4-release-geospatial: @@ -155,10 +155,11 @@ the legacy privilege documents, which MongoDB continues to support in Enhanced SSL Support ~~~~~~~~~~~~~~~~~~~~ -In 2.4, MongoDB instances can optionally require clients to provide -SSL certificates signed by a Certificate Authority. You must use the +In 2.4, MongoDB instances can optionally require clients to provide SSL +certificates signed by a Certificate Authority. You must use the MongoDB distribution that supports SSL, and your client driver must -support SSL. See :doc:`/administration/ssl` for more information. +support SSL. See :doc:`/tutorial/configure-ssl` for more +information. .. _2.4-unique-users: @@ -377,7 +378,7 @@ Improvements to the Aggregation Framework MongoDB 2.4 introduces a number of additional functionality and improved performance for the :doc:`Aggregation Framework -`. Consider the following additions in 2.4: +`. Consider the following additions in 2.4: - :pipeline:`$match` queries now support the :operator:`$geoWithin` operator for bounded geospatial queries. diff --git a/source/replication.txt b/source/replication.txt index 68337bd8dcc..d336e139fcf 100644 --- a/source/replication.txt +++ b/source/replication.txt @@ -5,19 +5,6 @@ Replication Database replication ensures redundancy, backup, and automatic failover. Replication occurs through groups of servers known as replica sets. -For an overview, see :doc:`/core/replication`. To work with members, -see :doc:`/administration/replica-sets`. To configure deployment -architecture, see :doc:`/administration/replica-set-architectures`. To -modify read and write operations, see :doc:`/applications/replication`. -For procedures for performing certain replication tasks, see the -:ref:`list of replication tutorials `. For -documentation of MongoDB's operational segregation capabilities for -replica set deployments see the :doc:`/data-center-awareness` - -This section contains full documentation, tutorials, and pragmatic -guides, as well as links to the reference material that describes all -aspects of replica sets. - Replica Set Use and Operation ----------------------------- @@ -27,8 +14,7 @@ Consider these higher level introductions to replica sets: :maxdepth: 1 core/replication - administration/replica-sets - administration/replica-set-architectures + core/replica-set-architectures applications/replication core/replication-internals @@ -39,7 +25,10 @@ deployments. .. toctree:: :maxdepth: 1 - administration/master-slave + core/master-slave + +For documentation of MongoDB's operational segregation capabilities for +replica set deployments see the :doc:`/data-center-awareness` .. index:: tutorials; replica sets .. _replica-set-tutorials-list: @@ -50,34 +39,10 @@ Replica Set Tutorials and Procedures The following tutorials describe a number of common replica set maintenance and operational practices in greater detail. -.. Updates to this tutorial list should also be made in - /source/administration/replica-sets.txt - and if appropriate in - /source/includes/list-administration-tutorials.rst - -Getting Started with Replica Sets -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. toctree:: - :maxdepth: 1 - - tutorial/deploy-replica-set - tutorial/convert-standalone-to-replica-set - tutorial/expand-replica-set - tutorial/deploy-geographically-distributed-replica-set - -Replica Set Maintenance and Administration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. toctree:: - :maxdepth: 1 +.. toctree:: + :maxdepth: 3 - tutorial/change-oplog-size - tutorial/force-member-to-be-primary - tutorial/change-hostnames-in-a-replica-set - tutorial/convert-secondary-into-arbiter - tutorial/reconfigure-replica-set-with-unavailable-members - tutorial/recover-data-following-unexpected-shutdown + /administration/replica-sets Replica Set Reference Material ------------------------------ diff --git a/source/security.txt b/source/security.txt index be097390b7e..01d93131f01 100644 --- a/source/security.txt +++ b/source/security.txt @@ -13,14 +13,16 @@ for different levels of access to databases and database operations. For more information, see the reference pages listed at the bottom of this page. +If you believe you have discovered a vulnerability in MongoDB, please +see :doc:`/tutorial/create-a-vulnerability-report`. + Strategies and Practices ------------------------ .. toctree:: :maxdepth: 2 - /administration/security - /administration/vulnerability-notification + /core/security Tutorials --------- @@ -32,6 +34,7 @@ Tutorials /tutorial/configure-windows-netsh-firewall /tutorial/control-access-to-mongodb-with-authentication /tutorial/control-access-to-mongodb-with-kerberos-authentication + /tutorial/create-a-vulnerability-report Reference --------- diff --git a/source/sharding.txt b/source/sharding.txt index cac149adde2..ba469d0f021 100644 --- a/source/sharding.txt +++ b/source/sharding.txt @@ -5,92 +5,33 @@ Sharding .. _sharding-background: Sharding distributes a single logical database system across a cluster -of machines. Sharding uses range-based portioning to distribute -:term:`documents ` based on a specific :term:`shard key`. +of machines. -For a general introduction to sharding, cluster operations, and -relevant implications and administration see: -:doc:`/faq/sharding`. - - - -Sharded Cluster Use and Operation ---------------------------------- - -The documents in this section introduce sharded clusters, their -operation, functioning, and use. If you are unfamiliar with data -partitioning, or MongoDB's sharding implementation begin with these -documents: +Sharding Concepts +----------------- .. toctree:: :maxdepth: 1 - core/sharded-clusters - administration/sharded-clusters - administration/sharded-cluster-architectures - core/sharded-cluster-internals - -Sharded Cluster Tutorials and Procedures ----------------------------------------- - -The documents listed in this section address common sharded cluster -operational practices in greater detail. + /core/sharded-clusters + /core/sharded-cluster-architectures + /core/sharded-cluster-query-routing + /core/sharded-cluster-security + /core/sharded-cluster-internals -Getting Started With Sharded Clusters -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Administration +-------------- -.. toctree:: - :maxdepth: 1 - - tutorial/deploy-shard-cluster - tutorial/add-shards-to-shard-cluster - tutorial/view-sharded-cluster-configuration - -Sharded Cluster Maintenance and Administration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The following document provides a list of tutorials for administering +sharded clusters. .. toctree:: - :maxdepth: 1 - - tutorial/manage-sharded-cluster-config-server - tutorial/manage-chunks-in-sharded-cluster - tutorial/configure-sharded-cluster-balancer - tutorial/manage-sharded-cluster-balancer - tutorial/remove-shards-from-cluster - -Backup and Restore Sharded Clusters -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. toctree:: - :maxdepth: 1 - - tutorial/backup-small-sharded-cluster-with-mongodump - tutorial/backup-sharded-cluster-with-filesystem-snapshots - tutorial/backup-sharded-cluster-with-database-dumps - tutorial/restore-single-shard - tutorial/restore-sharded-cluster - tutorial/schedule-backup-window-for-sharded-clusters - -Application Development Patterns for Sharded Clusters -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following documents describe processes that application developers -may find useful when developing applications that use data stored in a -MongoDB sharded cluster. For some cases you will also want to consider -the documentation of :doc:`/data-center-awareness`. - -.. toctree:: - :maxdepth: 1 - - administration/tag-aware-sharding - tutorial/enforce-unique-keys-for-sharded-collections - tutorial/convert-replica-set-to-replicated-shard-cluster + :maxdepth: 2 -Sharded Cluster Reference -------------------------- + /administration/sharded-clusters -Consider the following reference material relevant to sharded cluster -use and administration. +Reference +--------- - :doc:`/reference/sharding-commands` - :doc:`/reference/config-database` @@ -99,7 +40,7 @@ use and administration. .. toctree:: :hidden: - reference/sharding-commands + /reference/sharding-commands .. STUB tutorial/replace-one-configuration-server-in-a-shard-cluster .. STUB tutorial/replace-all-configuration-servers-in-a-shard-cluster diff --git a/source/tutorial.txt b/source/tutorial.txt index ff80ac908c8..af782d07093 100644 --- a/source/tutorial.txt +++ b/source/tutorial.txt @@ -32,21 +32,68 @@ Getting Started Administration -------------- -.. toctree:: - :hidden: - - tutorial/use-database-commands - tutorial/recover-data-following-unexpected-shutdown - tutorial/manage-mongodb-processes - tutorial/convert-replica-set-to-replicated-shard-cluster - tutorial/copy-databases-between-instances - tutorial/backup-databases-with-binary-database-dumps - tutorial/backup-databases-with-filesystem-snapshots - tutorial/manage-the-database-profiler - tutorial/rotate-log-files - tutorial/roll-back-to-v1.8-index - -.. include:: /includes/list-administration-tutorials.rst +Replica Sets +~~~~~~~~~~~~ + +- :doc:`/tutorial/deploy-replica-set` +- :doc:`/tutorial/convert-standalone-to-replica-set` +- :doc:`/tutorial/expand-replica-set` +- :doc:`/tutorial/remove-replica-set-member` +- :doc:`/tutorial/replace-replica-set-member` +- :doc:`/tutorial/adjust-replica-set-member-priority` +- :doc:`/tutorial/resync-replica-set-member` +- :doc:`/tutorial/deploy-geographically-distributed-replica-set` +- :doc:`/tutorial/change-oplog-size` +- :doc:`/tutorial/force-member-to-be-primary` +- :doc:`/tutorial/change-hostnames-in-a-replica-set` +- :doc:`/tutorial/add-replica-set-arbiter` +- :doc:`/tutorial/convert-secondary-into-arbiter` +- :doc:`/tutorial/configure-replica-set-secondary-sync-target` +- :doc:`/tutorial/configure-a-delayed-replica-set-member` +- :doc:`/tutorial/configure-a-hidden-replica-set-member` +- :doc:`/tutorial/configure-a-non-voting-replica-set-member` +- :doc:`/tutorial/configure-secondary-only-replica-set-member` +- :doc:`/tutorial/manage-chained-replication` +- :doc:`/tutorial/reconfigure-replica-set-with-unavailable-members` +- :doc:`/tutorial/recover-data-following-unexpected-shutdown` +- :doc:`/tutorial/troubleshoot-replica-sets` + +Sharding +~~~~~~~~ + +- :doc:`/tutorial/deploy-shard-cluster` +- :doc:`/tutorial/convert-replica-set-to-replicated-shard-cluster` +- :doc:`/tutorial/add-shards-to-shard-cluster` +- :doc:`/tutorial/remove-shards-from-cluster` +- :doc:`/tutorial/backup-small-sharded-cluster-with-mongodump` +- :doc:`/tutorial/backup-sharded-cluster-with-filesystem-snapshots` +- :doc:`/tutorial/backup-sharded-cluster-with-database-dumps` +- :doc:`/tutorial/restore-single-shard` +- :doc:`/tutorial/restore-sharded-cluster` +- :doc:`/tutorial/schedule-backup-window-for-sharded-clusters` +- :doc:`/tutorial/administer-shard-tags` + +Basic Operations +~~~~~~~~~~~~~~~~ + +- :doc:`/tutorial/use-database-commands` +- :doc:`/tutorial/recover-data-following-unexpected-shutdown` +- :doc:`/tutorial/copy-databases-between-instances` +- :doc:`/tutorial/expire-data` +- :doc:`/tutorial/manage-the-database-profiler` +- :doc:`/tutorial/rotate-log-files` +- :doc:`/tutorial/roll-back-to-v1.8-index` +- :doc:`/tutorial/manage-mongodb-processes` +- :doc:`/tutorial/backup-databases-with-binary-database-dumps` +- :doc:`/tutorial/backup-databases-with-filesystem-snapshots` + +Security +~~~~~~~~ + +- :doc:`/tutorial/configure-linux-iptables-firewall` +- :doc:`/tutorial/configure-windows-netsh-firewall` +- :doc:`/tutorial/control-access-to-mongodb-with-authentication` + .. index:: tutorials; development patterns .. index:: development tutorials @@ -63,6 +110,10 @@ Development Patterns - :doc:`/tutorial/aggregation-examples` - :doc:`/tutorial/model-data-for-keyword-search` - :doc:`/tutorial/limit-number-of-elements-in-updated-array` +- :doc:`/tutorial/perform-incremental-map-reduce` +- :doc:`/tutorial/troubleshoot-map-function` +- :doc:`/tutorial/troubleshoot-reduce-function` +- :doc:`/tutorial/store-javascript-function-on-server` .. index:: tutorials; application development .. index:: application tutorials @@ -85,6 +136,10 @@ Text Search Patterns - :doc:`/tutorial/enable-text-search` - :doc:`/tutorial/search-for-text` +- :doc:`/tutorial/create-text-index-on-multiple-fields` +- :doc:`/tutorial/specify-language-for-text-index` +- :doc:`/tutorial/avoid-text-index-name-limit` +- :doc:`/tutorial/control-results-of-text-search` - :doc:`/tutorial/create-text-index-on-multi-language-collection` - :doc:`/tutorial/return-text-queries-using-only-text-index` - :doc:`/tutorial/limit-number-of-items-scanned-for-text-search` diff --git a/source/tutorial/add-replica-set-arbiter.txt b/source/tutorial/add-replica-set-arbiter.txt new file mode 100644 index 00000000000..f9ae68380a6 --- /dev/null +++ b/source/tutorial/add-replica-set-arbiter.txt @@ -0,0 +1,60 @@ +============================= +Add an Arbiter to Replica Set +============================= + +.. default-domain:: mongodb + +Arbiters are special :program:`mongod` instances that do not hold a +copy of the data and thus cannot become primary. Arbiters exist solely +to participate in :ref:`elections `. Because +arbiters do not hold a copies of collection data, they have minimal +resource requirements and do not require dedicated hardware. + +.. note:: + + Because of their minimal system requirements, you may safely deploy an + arbiter on a system with another workload, such as an application + server or monitoring member. + +.. warning:: + + Do not run arbiter processes on a system that is an active + :term:`primary` or :term:`secondary` of its :term:`replica set`. + +Add an Arbiter +-------------- + +.. note:: + To prevent tied :term:`elections `, do not add an arbiter + to a set if the set already has an odd number of voting members. + +1. Create a data directory for the arbiter. The :program:`mongod` uses + this directory for configuration information. It *will not* hold + database collection data. The following example creates the + ``/data/arb`` data directory: + + .. code-block:: sh + + mkdir /data/arb + +#. Start the arbiter, making sure to specify the replica set name and + the data directory. Consider the following example: + + .. code-block:: sh + + mongod --port 30000 --dbpath /data/arb --replSet rs + +#. In a :program:`mongo` shell connected to the :term:`primary`, add the + arbiter to the replica set by issuing the :method:`rs.addArb()` + method, which uses the following syntax: + + .. code-block:: javascript + + rs.addArb("<:port>") + + For example, if the arbiter runs on ``m1.example.net:30000``, you + would issue this command: + + .. code-block:: javascript + + rs.addArb("m1.example.net:30000") diff --git a/source/tutorial/adjust-replica-set-member-priority.txt b/source/tutorial/adjust-replica-set-member-priority.txt new file mode 100644 index 00000000000..b301118b519 --- /dev/null +++ b/source/tutorial/adjust-replica-set-member-priority.txt @@ -0,0 +1,66 @@ +====================================== +Adjust Priority for Replica Set Member +====================================== + +.. default-domain:: mongodb + +To change the value of the +:data:`~local.system.replset.members[n].priority` in the replica set +configuration, use the following sequence of commands in the +:program:`mongo` shell: + +.. code-block:: javascript + + cfg = rs.conf() + cfg.members[0].priority = 0.5 + cfg.members[1].priority = 2 + cfg.members[2].priority = 2 + rs.reconfig(cfg) + +The first operation uses :method:`rs.conf()` to set the local variable +``cfg`` to the contents of the current replica set configuration, which +is a :term:`document`. The next three operations change the +:data:`~local.system.replset.members[n].priority` value in the ``cfg`` +document for the first three members configured in the :data:`members +` array. The final operation calls +:method:`rs.reconfig()` with the argument of ``cfg`` to initialize the +new configuration. + +.. include:: /includes/note-rs-conf-array-index.rst + +If a member has :data:`~local.system.replset.members[n].priority` set +to ``0``, it is ineligible to become :term:`primary` and will not seek +election. :ref:`Hidden members `, +:ref:`delayed members `, and +:ref:`arbiters ` all have +:data:`~local.system.replset.members[n].priority` set to ``0``. + +All members have a :data:`~local.system.replset.members[n].priority` +equal to ``1`` by default. + +The value of :data:`~local.system.replset.members[n].priority` can be +any floating point (i.e. decimal) number between ``0`` and ``1000``. +Priorities are only used to determine the preference in election. The +priority value is used only in relation to other members. With the +exception of members with a priority of ``0``, the absolute value of +the :data:`~local.system.replset.members[n].priority` value is +irrelevant. + +Replica sets will preferentially elect and maintain the primary status +of the member with the highest +:data:`~local.system.replset.members[n].priority` setting. + +.. warning:: + + Replica set reconfiguration can force the current primary to step + down, leading to an election for primary in the replica + set. Elections cause the current primary to close all open + :term:`client` connections. + + Perform routine replica set reconfiguration during scheduled + maintenance windows. + +.. seealso:: The :ref:`Replica Reconfiguration Usage + ` example revolves around + changing the priorities of the :data:`~local.system.replset.members` + of a replica set. diff --git a/source/tutorial/administer-shard-tags.txt b/source/tutorial/administer-shard-tags.txt new file mode 100644 index 00000000000..c6dc1218804 --- /dev/null +++ b/source/tutorial/administer-shard-tags.txt @@ -0,0 +1,113 @@ +================================ +Administer and Manage Shard Tags +================================ + +.. default-domain:: mongodb + +In a sharded cluster, you can use tags to associate specific ranges of +a :term:`shard key` with a specific :term:`shard` or subset of shards. + +Tag a Shard +----------- + +Associate tags with a particular shard using the +:method:`sh.addShardTag()` method when connected to a :program:`mongos` +instance. A single shard may have multiple tags, and multiple shards +may also have the same tag. + +.. example:: + + The following example adds the tag ``NYC`` to two shards, and the tags + ``SFO`` and ``NRT`` to a third shard: + + .. code-block:: javascript + + sh.addShardTag("shard0000", "NYC") + sh.addShardTag("shard0001", "NYC") + sh.addShardTag("shard0002", "SFO") + sh.addShardTag("shard0002", "NRT") + +You may remove tags from a particular shard using the +:method:`sh.removeShardTag()` method when connected to a +:program:`mongos` instance, as in the following example, which removes +the ``NRT`` tag from a shard: + +.. code-block:: javascript + + sh.removeShardTag("shard0002", "NRT") + +Tag a Shard Key Range +--------------------- + +To assign a tag to a range of shard keys use the +:method:`sh.addTagRange()` method when connected to a +:program:`mongos` instance. Any given shard key range may only have +*one* assigned tag. You cannot overlap defined ranges, or tag the same +range more than once. + +.. example:: + + Given a collection named ``users`` in the ``records`` database, + sharded by the ``zipcode`` field. The following operations assign: + + - two ranges of zip codes in Manhattan and Brooklyn the ``NYC`` tag + + - one range of zip codes in San Francisco the ``SFO`` tag + + .. code-block:: javascript + + sh.addTagRange("records.users", { zipcode: "10001" }, { zipcode: "10281" }, "NYC") + sh.addTagRange("records.users", { zipcode: "11201" }, { zipcode: "11240" }, "NYC") + sh.addTagRange("records.users", { zipcode: "94102" }, { zipcode: "94135" }, "SFO") + +.. note:: + + Shard rages are always inclusive of the lower value and exclusive + of the upper boundary. + +Remove a Tag From a Shard Key Range +----------------------------------- + +The :program:`mongod` does not provide a helper for removing a tag +range. You may delete tag assignment from a shard key range by removing +the corresponding document from the :data:`~config.tags` collection of +the ``config`` database. + +Each document in the :data:`~config.tags` holds the :term:`namespace` +of the sharded collection and a minimum shard key value. + +.. example:: + + The following example removes the ``NYC`` tag assignment for the + range of zip codes within Manhattan: + + .. code-block:: javascript + + use config + db.tags.remove({ _id: { ns: "records.users", min: { zipcode: "10001" }}, tag: "NYC" }) + +View Existing Shard Tags +------------------------ + +The output from :method:`sh.status()` lists tags associated with a +shard, if any, for each shard. A shard's tags exist in the shard's +document in the :data:`~config.shards` collection of the ``config`` +database. To return all shards with a specific tag, use a sequence of +operations that resemble the following, which will return only those +shards tagged with ``NYC``: + +.. code-block:: javascript + + use config + db.shards.find({ tags: "NYC" }) + +You can find tag ranges for all :term:`namespaces ` in the +:data:`~config.tags` collection of the ``config`` database. The output +of :method:`sh.status()` displays all tag ranges. To return all shard +key ranges tagged with ``NYC``, use the following sequence of +operations: + +.. code-block:: javascript + + use config + db.tags.find({ tags: "NYC" }) diff --git a/source/tutorial/aggregation-examples.txt b/source/tutorial/aggregation-examples.txt index b8c87137af7..ad7c3a0ac2e 100644 --- a/source/tutorial/aggregation-examples.txt +++ b/source/tutorial/aggregation-examples.txt @@ -8,7 +8,7 @@ MongoDB provides flexible data aggregation functionality with the :dbcommand:`aggregate` command. For additional information about aggregation consider the following resources: -- :doc:`/applications/aggregation` +- :doc:`/core/aggregation` - :doc:`/reference/aggregation` - :doc:`/reference/sql-aggregation-comparison` diff --git a/source/tutorial/avoid-text-index-name-limit.txt b/source/tutorial/avoid-text-index-name-limit.txt new file mode 100644 index 00000000000..48f30003eae --- /dev/null +++ b/source/tutorial/avoid-text-index-name-limit.txt @@ -0,0 +1,48 @@ +====================================================== +Specify ``text`` Index Name to Avoid Name Length Limit +====================================================== + +.. default-domain:: mongodb + +The default name for the index consists of each indexed field name +concatenated with ``_text``. For example, the following command creates +a ``text`` index on the fields ``content``, ``users.comments``, and +``users.profiles``: + +.. code-block:: javascript + + db.collection.ensureIndex( + { + content: "text", + "users.comments": "text", + "users.profiles": "text" + } + ) + +The default name for the index is: + +.. code-block:: javascript + + "content_text_users.comments_text_users.profiles_text" + +To avoid creating an index with a name that exceeds the :limit:`index +name length limit `, you can pass the ``name`` +option to the :method:`db.collection.ensureIndex()` method: + +.. code-block:: javascript + + db.collection.ensureIndex( + { + content: "text", + "users.comments": "text", + "users.profiles": "text" + }, + { + name: "MyTextIndex" + } + ) + +.. note:: + + To drop the ``text`` index, use the index name. To get the name of + an index, use :method:`db.collection.getIndexes()`. diff --git a/source/tutorial/backup-databases-with-binary-database-dumps.txt b/source/tutorial/backup-databases-with-binary-database-dumps.txt index 9bd97eafeb2..9605fbfafcf 100644 --- a/source/tutorial/backup-databases-with-binary-database-dumps.txt +++ b/source/tutorial/backup-databases-with-binary-database-dumps.txt @@ -13,7 +13,7 @@ capabilities, consider the backup methods described in .. seealso:: - - :doc:`/administration/backups` + - :doc:`/core/backups` - :doc:`/reference/mongodump` - :doc:`/reference/mongorestore` diff --git a/source/tutorial/backup-databases-with-filesystem-snapshots.txt b/source/tutorial/backup-databases-with-filesystem-snapshots.txt index bb569234cbe..e76b6175bc9 100644 --- a/source/tutorial/backup-databases-with-filesystem-snapshots.txt +++ b/source/tutorial/backup-databases-with-filesystem-snapshots.txt @@ -13,7 +13,7 @@ level tools to create copies of the device that holds MongoDB's data files. These methods complete quickly and work reliably, but require more system configuration outside of MongoDB. -.. seealso:: :doc:`/administration/backups` and +.. seealso:: :doc:`/core/backups` and :doc:`/tutorial/backup-databases-with-binary-database-dumps`. .. _snapshots-overview: diff --git a/source/tutorial/build-indexes-in-the-background.txt b/source/tutorial/build-indexes-in-the-background.txt new file mode 100644 index 00000000000..7854d23560b --- /dev/null +++ b/source/tutorial/build-indexes-in-the-background.txt @@ -0,0 +1,32 @@ +.. index:: index; create in background +.. _index-create-in-background: + +=============================== +Build Indexes in the Background +=============================== + +.. default-domain:: mongodb + +By default, MongoDB builds indexes in the foreground, which means that +these indexes block all other read and write operations to the +database while the index builds. :ref:`Background index construction +` allows read and write operations to +continue while building the index; however, these index builds take +longer to complete and result in a larger index. + +.. seealso:: :doc:`/core/indexes` and :doc:`/administration/indexes` + for more infomration. + +Procedure +--------- +To create an index in the background, add the ``background`` argument +to the :method:`~db.collection.ensureIndex()` operation, as in the +following index: + +.. code-block:: javascript + + db.collection.ensureIndex( { a: 1 }, { background: true } ) + +Consider the section on :ref:`background index construction +` for more information about these indexes +and their implications. diff --git a/source/tutorial/build-indexes-on-replica-sets.txt b/source/tutorial/build-indexes-on-replica-sets.txt new file mode 100644 index 00000000000..8869b0fa66d --- /dev/null +++ b/source/tutorial/build-indexes-on-replica-sets.txt @@ -0,0 +1,142 @@ +.. index:: index; replica set +.. index:: replica set; index +.. _index-build-on-replica-sets: +.. _index-building-replica-sets: + +============================= +Build Indexes on Replica Sets +============================= + +.. default-domain:: mongodb + +:ref:`Background index creation operations +` become *foreground* indexing operations +on :term:`secondary` members of replica sets. The foreground index +building process blocks all replication and read operations on the +secondaries while they build the index. + +Secondaries will begin building indexes *after* the +:term:`primary` finishes building the index. In :term:`sharded clusters +`, the :program:`mongos` will send :method:`ensureIndex() +` to the primary members of the replica +set for each shard, which then replicate to the secondaries after the +primary finishes building the index. + +To minimize the impact of building an index on your replica set, use +the following procedure to build indexes on secondaries: + +.. see:: :doc:`/administration/indexes` and :doc:`/core/indexes` for + more information. + +Considerations +-------------- + +.. warning:: + + Ensure that your :term:`oplog` is large enough to permit the + indexing or re-indexing operation to complete without falling + too far behind to catch up. See the ":ref:`oplog sizing + `" documentation for additional + information. + +.. note:: + + This procedure *does* take one member out of the replica set at a + time. However, this procedure will only affect one member of the + set at a time rather than *all* secondaries at the same time. + +Procedure +--------- + +.. note:: + + If you need to build an index in a :term:`sharded cluster`, repeat + the following procedure for each replica set that provides each + :term:`shard`. + +.. _tutorial-index-on-replica-sets-stop-one-member: + +Stop One Secondary +~~~~~~~~~~~~~~~~~~ + +Stop the :program:`mongod` process on one secondary. Restart the +:program:`mongod` process *without* the :option:`--replSet ` +option and running on a different port. [#different-port]_ This +instance is now in "standalone" mode. + +For example, if your :program:`mongod` *normally* runs with on the +default port of ``27017`` with the :option:`--replSet +` option you would use the following invocation: + +.. code-block:: sh + + mongod --port 47017 + +.. [#different-port] By running the :program:`mongod` on a different + port, you ensure that the other members of the replica set and all + clients will not contact the member while you are building the + index. + +.. _tutorial-index-on-replica-sets-build-index: + +Build the Index +~~~~~~~~~~~~~~~ + +Create the new index using the :method:`~db.collection.ensureIndex()` +in the :program:`mongo` shell, or comparable method in your +driver. This operation will create or rebuild the index on this +:program:`mongod` instance + +For example, to create an ascending index on the ``username`` field of +the ``records`` collection, use the following :program:`mongo` shell +operation: + +.. code-block:: sh + + db.records.ensureIndex( { username: 1 } ) + +.. seealso:: :doc:`/tutorial/create-an-index` and + :doc:`/tutorial/create-a-compound-index` for more information. + +.. _tutorial-index-on-replica-sets-restart-mongod: + +Restart the Program ``mongod`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When the index build completes, estart the :program:`mongod` instance +with the :option:`--replSet ` option on its usual port: + +.. code-block:: sh + + mongod --port 27017 --replSet rs0 + +Modify the port number (e.g. ``27017``) or the replica set name +(e.g. ``rs0``) as needed. + +Allow replication to catch up on this member. + +Build Indexes on all Secondaries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For each secondary in the set, build an index according to the +following steps: + +#. :ref:`tutorial-index-on-replica-sets-stop-one-member` +#. :ref:`tutorial-index-on-replica-sets-build-index` +#. :ref:`tutorial-index-on-replica-sets-restart-mongod` + +Build the Index on the Primary +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Finally, to build the index on the :term:`primary`, begin by stepping +down the primary. Use the :method:`rs.stepDown()` method in the +:program:`mongo` shell to cause the current primary to become a +secondary graceful and allow the set to elect another member as +primary. + +Then repeat the index building procedure, listed below, to build the +index on the primary: + +#. :ref:`tutorial-index-on-replica-sets-stop-one-member` +#. :ref:`tutorial-index-on-replica-sets-build-index` +#. :ref:`tutorial-index-on-replica-sets-restart-mongod` diff --git a/source/tutorial/configure-a-delayed-replica-set-member.txt b/source/tutorial/configure-a-delayed-replica-set-member.txt new file mode 100644 index 00000000000..2b7b621efcb --- /dev/null +++ b/source/tutorial/configure-a-delayed-replica-set-member.txt @@ -0,0 +1,41 @@ +====================================== +Configure a Delayed Replica Set Member +====================================== + +.. default-domain:: mongodb + +To configure a :term:`replica set` member with a one hour delay, use the +following sequence of operations in the :program:`mongo` shell: + +.. code-block:: javascript + + cfg = rs.conf() + cfg.members[0].priority = 0 + cfg.members[0].slaveDelay = 3600 + rs.reconfig(cfg) + +.. include:: /includes/warning-rs-reconfig.rst + +After the replica set reconfigures, the first member of the set in the +:data:`~local.system.replset.members` array will have a priority +of ``0`` and cannot become :term:`primary`. The :data:`slaveDelay ` value +delays both replication and the member's :term:`oplog` by 3600 seconds (1 +hour). Setting :data:`~local.system.replset.members[n].slaveDelay` to a +non-zero value also sets :data:`~local.system.replset.members[n].hidden` to +``true`` for this replica set so that it does not receive application +queries in normal operations. + +.. include:: /includes/seealso-elections.rst + +.. warning:: + + The length of the secondary + :data:`~local.system.replset.members[n].slaveDelay` must + fit within the window of the oplog. If the oplog is shorter than + the :data:`~local.system.replset.members[n].slaveDelay` + window, the delayed member cannot successfully replicate + operations. + +.. seealso:: :data:`~local.system.replset.members[n].slaveDelay`, :ref:`Replica Set Reconfiguration + `, :ref:`replica-set-oplog-sizing`, + and the :doc:`/tutorial/change-oplog-size` tutorial. diff --git a/source/tutorial/configure-a-hidden-replica-set-member.txt b/source/tutorial/configure-a-hidden-replica-set-member.txt new file mode 100644 index 00000000000..988b7450220 --- /dev/null +++ b/source/tutorial/configure-a-hidden-replica-set-member.txt @@ -0,0 +1,48 @@ +======================================== +Configure a Replica Set Member as Hidden +======================================== + +.. default-domain:: mongodb + +Hidden members are part of a replica set but cannot become primary and +are invisible to client applications. *However,* hidden members **do** +vote in :ref:`elections `. + +Hidden members are ideal for instances that will have significantly +different usage patterns than the other members and require separation +from normal traffic. Typically, hidden members provide reporting, +dedicated backups, and dedicated read-only testing and integration +support. + +Hidden members have :data:`~local.system.replset.members[n].priority` set +``0`` and have :data:`~local.system.replset.members[n].hidden` set to ``true``. + +To configure a :term:`hidden member`, on the :term:`primary`, use the +following sequence of operations in the :program:`mongo` shell: + +.. code-block:: javascript + + cfg = rs.conf() + cfg.members[0].priority = 0 + cfg.members[0].hidden = true + rs.reconfig(cfg) + +.. include:: /includes/warning-rs-reconfig.rst + +After re-configuring the set, the first member of the set in the +:data:`~local.system.replset.members` array will have a priority of ``0`` +so that it cannot become primary. The other members in the set will +not advertise the hidden member in the :dbcommand:`isMaster` or +:method:`db.isMaster()` output. + +.. include:: /includes/seealso-elections.rst + +.. versionchanged:: 2.0 + + For :term:`sharded clusters ` running with replica + sets before 2.0, if you reconfigured a member as hidden, you *had* + to restart :program:`mongos` to prevent queries from reaching the + hidden member. + +.. seealso:: :ref:`Read Preference ` + and :ref:`Replica Set Reconfiguration `. diff --git a/source/tutorial/configure-a-non-voting-replica-set-member.txt b/source/tutorial/configure-a-non-voting-replica-set-member.txt new file mode 100644 index 00000000000..40802659f03 --- /dev/null +++ b/source/tutorial/configure-a-non-voting-replica-set-member.txt @@ -0,0 +1,48 @@ +========================================= +Configure a Non-Voting Replica Set Member +========================================= + +.. default-domain:: mongodb + +You may choose to change the number of votes that each member has in +:ref:`elections ` for :term:`primary`. In general, all +members should have only 1 vote to prevent intermittent ties, deadlock, +or the wrong members from becoming :term:`primary`. Use :ref:`replica +set priorities ` to control which members +are more likely to become primary. + +To disable a member's ability to vote in elections, use the following +command sequence in the :program:`mongo` shell. + +.. code-block:: javascript + + cfg = rs.conf() + cfg.members[3].votes = 0 + cfg.members[4].votes = 0 + cfg.members[5].votes = 0 + rs.reconfig(cfg) + +This sequence gives ``0`` votes to the fourth, fifth, and sixth +members of the set according to the order of the +:data:`~local.system.replset.members` array in the output of +:method:`rs.conf()`. This setting allows the set to elect these +members as :term:`primary` but does not allow them to vote in +elections. If you have three non-voting members, you can add three +additional voting members to your set. Place voting members so that +your designated primary or primaries can reach a majority of votes in +the event of a network partition. + +.. include:: /includes/warning-rs-reconfig.rst + +.. note:: + + In general and when possible, all members should have only 1 vote. This + prevents intermittent ties, deadlocks, or the wrong members from + becoming primary. Use :ref:`Replica Set Priorities + ` to control which members are more + likely to become primary. + +.. seealso:: :data:`~local.system.replset.members[n].votes` and :ref:`Replica Set + Reconfiguration `. + +.. include:: /includes/seealso-elections.rst diff --git a/source/tutorial/configure-linux-iptables-firewall.txt b/source/tutorial/configure-linux-iptables-firewall.txt index 648749131c3..42b1b012007 100644 --- a/source/tutorial/configure-linux-iptables-firewall.txt +++ b/source/tutorial/configure-linux-iptables-firewall.txt @@ -14,7 +14,7 @@ system. This document outlines basic firewall configurations for ``iptables`` firewalls on Linux. Use these approaches as a starting point for your larger networking organization. For a detailed over view of security -practices and risk management for MongoDB, see :doc:`/administration/security`. +practices and risk management for MongoDB, see :doc:`/core/security`. .. seealso:: For MongoDB deployments on Amazon's web services, see the :ecosystem:`Amazon EC2 ` page, which addresses Amazon's diff --git a/source/tutorial/configure-replica-set-secondary-sync-target.txt b/source/tutorial/configure-replica-set-secondary-sync-target.txt new file mode 100644 index 00000000000..90a4204dd0f --- /dev/null +++ b/source/tutorial/configure-replica-set-secondary-sync-target.txt @@ -0,0 +1,27 @@ +=================================== +Configure a Secondary's Sync Target +=================================== + +.. default-domain:: mongodb + +To override the default sync target selection logic, you may manually +configure a :term:`secondary` member's sync target for pulling +:term:`oplog` entries temporarily. The following operations provide +access to this functionality: + +- :dbcommand:`replSetSyncFrom` command, or + +- :method:`rs.syncFrom()` helper in the :program:`mongo` shell + +Only modify the default sync logic as needed, and always exercise +caution. :method:`rs.syncFrom()` will not affect an in-progress +initial sync operation. To affect the sync target for the initial sync, run +:method:`rs.syncFrom()` operation *before* initial sync. + +If you run :method:`rs.syncFrom()` during initial sync, MongoDB +produces no error messages, but the sync target will not change until +after the initial sync operation. + +.. note:: + + .. include:: /includes/fact-replica-set-sync-from-is-temporary.rst diff --git a/source/tutorial/configure-secondary-only-replica-set-member.txt b/source/tutorial/configure-secondary-only-replica-set-member.txt new file mode 100644 index 00000000000..acf987cd2ac --- /dev/null +++ b/source/tutorial/configure-secondary-only-replica-set-member.txt @@ -0,0 +1,73 @@ +================================================ +Prevent Replica Set Member from Becoming Primary +================================================ + +.. default-domain:: mongodb + +The secondary-only configuration prevents a :term:`secondary` member in a +:term:`replica set` from ever becoming a :term:`primary` in a +:term:`failover`. You can set secondary-only mode for any member of +the set except the current primary. + +For example, you may want to configure all members of a replica sets +located outside of the main data centers as secondary-only to prevent +these members from ever becoming primary. + +To configure a member as secondary-only, set its +:data:`~local.system.replset.members[n].priority` value to ``0``. Any member with a +:data:`~local.system.replset.members[n].priority` equal to ``0`` will never seek +:ref:`election ` and cannot become primary in any +situation. For more information on priority levels, see +:ref:`replica-set-node-priority`. + +.. include:: /includes/note-rs-conf-array-index.rst + +As an example of modifying member priorities, assume a four-member +replica set. Use the following sequence of operations in the +:program:`mongo` shell to modify member priorities: + +.. code-block:: javascript + + cfg = rs.conf() + cfg.members[0].priority = 2 + cfg.members[1].priority = 1 + cfg.members[2].priority = 0.5 + cfg.members[3].priority = 0 + rs.reconfig(cfg) + +This reconfigures the set, with the following priority settings: + +- Member ``0`` to a priority of ``2`` so that it becomes primary, under + most circumstances. + +- Member ``1`` to a priority of ``1``, which is the default value. + Member ``1`` becomes primary if no member with a *higher* priority is + eligible. + +- Member ``2`` to a priority of ``0.5``, which makes it less likely to + become primary than other members but doesn't prohibit the + possibility. + +- Member ``3`` to a priority of ``0``. + Member ``3`` cannot become the :term:`primary` member under any + circumstances. + +.. include:: /includes/warning-rs-reconfig.rst + +.. note:: + + - If your replica set has an even number of members, add an + :doc:`arbiter ` to ensure that + members can quickly obtain a majority of votes in an election for + primary. + + - MongoDB does not permit the current :term:`primary` to have a + :data:`~local.system.replset.members[n].priority` of ``0``. If you + want to prevent the current primary from becoming primary, first + use :method:`rs.stepDown()` to step down the current primary, and + then :ref:`reconfigure the replica set + ` with :method:`rs.conf()` and + :method:`rs.reconfig()`. + +.. seealso:: :data:`~local.system.replset.members[n].priority` and + :ref:`Replica Set Reconfiguration `. diff --git a/source/administration/ssl.txt b/source/tutorial/configure-ssl.txt similarity index 100% rename from source/administration/ssl.txt rename to source/tutorial/configure-ssl.txt diff --git a/source/tutorial/configure-windows-netsh-firewall.txt b/source/tutorial/configure-windows-netsh-firewall.txt index 3a7ebcad28f..f6d7de45ac1 100644 --- a/source/tutorial/configure-windows-netsh-firewall.txt +++ b/source/tutorial/configure-windows-netsh-firewall.txt @@ -15,7 +15,7 @@ Use these approaches as a starting point for your larger networking organization. For a detailed over view of security practices and risk management for MongoDB, see -:doc:`/administration/security`. +:doc:`/core/security`. .. seealso:: `Windows Firewall `_ documentation from Microsoft. diff --git a/source/tutorial/consider-performance-when-creating-indexes.txt b/source/tutorial/consider-performance-when-creating-indexes.txt new file mode 100644 index 00000000000..8b01727181d --- /dev/null +++ b/source/tutorial/consider-performance-when-creating-indexes.txt @@ -0,0 +1,44 @@ +======================================================================= +Consider Performance when Creating Indexes for Write-heavy Applications +======================================================================= + +.. default-domain:: mongodb + +If your application is write-heavy, then be careful when creating new +indexes, since each additional index with impose a +write-performance penalty. In general, don't be careless about adding +indexes. Add indexes to complement your queries. Always have +a good reason for adding a new index, and be sure to benchmark +alternative strategies. + +Consider Insert Throughput +-------------------------- + +.. todo:: insert link to /source/core/write-operations when that page is complete. + Do we want to link to write concern? -bg + +MongoDB must update *all* indexes associated with a collection after +every insert, update, or delete operation. For update operations, if +the updated document does not move to a new location, then MongoDB only +modifies the updated fields in the index. Therefore, every index on a +collection adds some amount of overhead to these write operations. In +almost every case, the performance gains that indexes realize for read +operations are worth the insertion penalty. However, in some cases: + +- An index to support an infrequent query might incur more + insert-related costs than savings in read-time. + + .. todo:: How do you determine if the above is the case? + Empirically. + +- If you have many indexes on a collection with a high insert throughput + and a number of related indexes, you may find better overall + performance with a smaller number of indexes, even if some queries + are less optimally supported by an index. + + .. todo:: The above is unclear. -bg + +- If your indexes and queries are not sufficiently :ref:`selective + `, the speed improvements for query operations + may not offset the costs of maintaining an index. For more + information see :ref:`index-selectivity`. diff --git a/source/tutorial/control-access-to-mongodb-with-authentication.txt b/source/tutorial/control-access-to-mongodb-with-authentication.txt index 2bb30b81e90..6c8dcb68718 100644 --- a/source/tutorial/control-access-to-mongodb-with-authentication.txt +++ b/source/tutorial/control-access-to-mongodb-with-authentication.txt @@ -14,14 +14,14 @@ MongoDB provides a basic access control system that you can enable with the :setting:`auth` and :setting:`keyFile` configuration settings [#cli-options]_. For an overview, see the :ref:`security-authentication` section of the -:doc:`/administration/security` document. For additional configuration +:doc:`/core/security` document. For additional configuration settings that affect security, see the :ref:`configuration-security` section of the :doc:`/administration/configuration` document. MongoDB also provides role-based privileges through each database's :data:`system.users <.system.users>` collection. For an overview, see the :ref:`security-authorization` section of the -:doc:`/administration/security` document. +:doc:`/core/security` document. .. [#cli-options] Use the :option:`--auth ` or :option:`--keyFile ` options on the command diff --git a/source/tutorial/control-access-to-mongodb-with-kerberos-authentication.txt b/source/tutorial/control-access-to-mongodb-with-kerberos-authentication.txt index cd764d6cb04..b6ac2ecb585 100644 --- a/source/tutorial/control-access-to-mongodb-with-kerberos-authentication.txt +++ b/source/tutorial/control-access-to-mongodb-with-kerberos-authentication.txt @@ -106,7 +106,7 @@ method, as in the following example: db.system.users.remove( { user: "application/reporting@EXAMPLE.NET" } ) -To modify a user document, use :doc:`update ` +To modify a user document, use :doc:`update ` operations on documents in the :data:`system.users <.system.users>` collection. diff --git a/source/tutorial/control-results-of-text-search.txt b/source/tutorial/control-results-of-text-search.txt new file mode 100644 index 00000000000..ef5726519d9 --- /dev/null +++ b/source/tutorial/control-results-of-text-search.txt @@ -0,0 +1,76 @@ +=========================================== +Control Results of Text Search with Weights +=========================================== + +.. default-domain:: mongodb + +By default, the :dbcommand:`text` command returns matching documents +based on scores, from highest to lowest. For a ``text`` index, the +*weight* of an indexed field denotes the significance of the field +relative to the other indexed fields in terms of the score. The score +for a given word in a document is derived from the weighted sum of the +frequency for each of the indexed fields in that document. + +The default weight is 1 for the indexed fields. To adjust the weights +for the indexed fields, include the ``weights`` option in the +:method:`db.collection.ensureIndex()` method. + +.. warning:: + + Choose the weights carefully in order to prevent the need to reindex. + +A collection ``blog`` has the following documents: + +.. code-block:: javascript + + { _id: 1, + content: "This morning I had a cup of coffee.", + about: "beverage", + keywords: [ "coffee" ] + } + + { _id: 2, + content: "Who doesn't like cake?", + about: "food", + keywords: [ "cake", "food", "dessert" ] + } + +To create a ``text`` index with different field weights for the +``content`` field and the ``keywords`` field, include the ``weights`` +option to the :method:`~db.collection.ensureIndex()` method. For +example, the following command creates an index on three fields and +assigns weights to two of the fields: + +.. code-block:: javascript + + db.blog.ensureIndex( + { + content: "text", + keywords: "text", + about: "text" + }, + { + weights: { + content: 10, + keywords: 5, + }, + name: "TextIndex" + } + ) + +The ``text`` index has the following fields and weights: + +- ``content`` has a weight of 10, + +- ``keywords`` has a weight of 5, and + +- ``about`` has the default weight of 1. + +These weights denote the relative significance of the indexed fields to +each other. For instance, a term match in the ``content`` field has: + +- ``2`` times (i.e. ``10:5``) the impact as a term match in the + ``keywords`` field and + +- ``10`` times (i.e. ``10:1``) the impact as a term match in the + ``about`` field. diff --git a/source/tutorial/convert-standalone-to-replica-set.txt b/source/tutorial/convert-standalone-to-replica-set.txt index 31fed3ef73e..d78e9c3a518 100644 --- a/source/tutorial/convert-standalone-to-replica-set.txt +++ b/source/tutorial/convert-standalone-to-replica-set.txt @@ -16,8 +16,20 @@ For more information on :doc:`replica sets, their use, and administration `, see: - :doc:`/core/replication`, -- :doc:`/administration/replica-set-architectures`, -- :doc:`/administration/replica-sets`, and +- :doc:`/core/replica-set-architectures`, +- :doc:`/tutorial/expand-replica-set` +- :doc:`/tutorial/add-replica-set-arbiter` +- :doc:`/tutorial/remove-replica-set-member` +- :doc:`/tutorial/replace-replica-set-member` +- :doc:`/tutorial/adjust-replica-set-member-priority` +- :doc:`/tutorial/resync-replica-set-member` +- :doc:`/tutorial/configure-replica-set-secondary-sync-target` +- :doc:`/tutorial/configure-a-delayed-replica-set-member` +- :doc:`/tutorial/configure-a-hidden-replica-set-member` +- :doc:`/tutorial/configure-a-non-voting-replica-set-member` +- :doc:`/tutorial/configure-secondary-only-replica-set-member` +- :doc:`/tutorial/manage-chained-replication` +- :doc:`/tutorial/troubleshoot-replica-sets`, and - :doc:`/applications/replication`. .. note:: diff --git a/source/tutorial/copy-databases-between-instances.txt b/source/tutorial/copy-databases-between-instances.txt index e52e85c4337..abf2143f766 100644 --- a/source/tutorial/copy-databases-between-instances.txt +++ b/source/tutorial/copy-databases-between-instances.txt @@ -26,8 +26,8 @@ for use cases that resemble the following use cases: - seeding test environments. -Also consider the :doc:`/administration/backups` and -:doc:`/administration/import-export` documentation for more related +Also consider the :doc:`/core/backups` and +:doc:`/core/import-export` documentation for more related information. .. note:: diff --git a/source/tutorial/create-a-compound-index.txt b/source/tutorial/create-a-compound-index.txt new file mode 100644 index 00000000000..50bdaa1371d --- /dev/null +++ b/source/tutorial/create-a-compound-index.txt @@ -0,0 +1,45 @@ +.. index:: index; create +.. index:: index; compound +.. _index-create-compound-index: + +======================= +Create a Compound Index +======================= + +.. default-domain:: mongodb + +Indexes allow MongoDB to process and fulfill queries quickly by +creating small and efficient representations of the documents in a +:term:`collection`. MongoDB supports indexes that include content on a +single field, as well as :ref:`compound indexes ` +that include content from multiple fields. Continue reading for +instructions and examples of building a compound index. + +Build a Compound Index +---------------------- + +To create a :ref:`compound index ` use an +operation that resembles the following prototype: + +.. code-block:: javascript + + db.collection.ensureIndex( { a: 1, b: 1, c: 1 } ) + +Example +------- + +The following operation will create an index on the +``item``, ``category``, and ``price`` fields of the ``products`` +collection: + +.. code-block:: javascript + + db.products.ensureIndex( { item: 1, category: 1, price: 1 } ) + +Additional Considerations +------------------------- + +.. include:: /includes/index-tutorials-considerations.rst + +.. seealso:: :ref:`index-create-index`, :doc:`/administration/indexes` + and :doc:`/core/indexes` for more information. diff --git a/source/tutorial/create-a-hashed-index.txt b/source/tutorial/create-a-hashed-index.txt new file mode 100644 index 00000000000..a6b3c4671ec --- /dev/null +++ b/source/tutorial/create-a-hashed-index.txt @@ -0,0 +1,41 @@ +.. index:: index; hashed +.. _index-hashed-index: + +===================== +Create a Hashed Index +===================== + +.. default-domain:: mongodb + +.. versionadded:: 2.4 + +:ref:`Hashed indexes ` compute a hash of the value +of a field in a collection and index the hashed value. These indexes +permit equality queries and may be suitable shard keys for some +collections. + +.. see:: :ref:`sharding-hashed-sharding` for more information about hashed + indexes in sharded clusters, as well as :doc:`/core/indexes` and + :doc:`/administration/indexes` for more information about indexes. + +Operation +--------- + +To create a :ref:`hashed index `, specify +``hashed`` as the value of the index key, as in the following +prototype: + +.. example:: + + .. code-block:: javascript + + db.collection.ensureIndex( { a: "hashed" } ) + +Considerations +-------------- + +MongoDB supports ``hashed`` indexes of any single field. The hashing +function collapses sub-documents and computes the hash for the entire +value, but does not support multi-key (i.e. arrays) indexes. + +You may not create compound indexes that have ``hashed`` index fields. diff --git a/source/tutorial/create-a-sparse-index.txt b/source/tutorial/create-a-sparse-index.txt new file mode 100644 index 00000000000..afd19d2f416 --- /dev/null +++ b/source/tutorial/create-a-sparse-index.txt @@ -0,0 +1,52 @@ +.. index:: index; sparse +.. _index-sparse-index: + +======================= +Create a Sparse Indexes +======================= + +.. default-domain:: mongodb + +Sparse indexes are like non-sparse indexes, except that they omit +references to documents that do not include the indexed field. For +fields that are only present in some documents sparse indexes may +provide a significant space savings. See :ref:`index-type-sparse` for +more information about sparse indexes and their use. + +.. seealso:: :doc:`/core/indexes` and :doc:`/administration/indexes` + for more information. + +Prototype +--------- + +To create a :ref:`sparse index ` on a field, use an +operation that resembles the following prototype: + +.. code-block:: javascript + + db.collection.ensureIndex( { a: 1 }, { sparse: true } ) + +Example +------- + +The following operation, creates a sparse index on the ``users`` +collection that *only* includes a document in the index if +the ``twitter_name`` field exists in a document. + +.. code-block:: javascript + + db.users.ensureIndex( { twitter_name: 1 }, { sparse: true } ) + +The index excludes all documents that do not include the +``twitter_name`` field. + +Considerations +-------------- + +.. note:: + + Sparse indexes can affect the results returned by the query, + particularly with respect to sorts on fields *not* included in the + index. See the :ref:`sparse index ` section for + more information. + diff --git a/source/tutorial/create-a-unique-index.txt b/source/tutorial/create-a-unique-index.txt new file mode 100644 index 00000000000..c5080299025 --- /dev/null +++ b/source/tutorial/create-a-unique-index.txt @@ -0,0 +1,91 @@ +===================== +Create a Unique Index +===================== + +.. default-domain:: mongodb + +MongoDB allows you to specify a :ref:`unique constraint +` on an index. These constraints prevent +applications from inserting :term:`documents ` that have +duplicate values for the inserted fields. Additionally, if you want to +create an index on a collection that has existing data that might have +duplicate values for the indexed field, you may chose combine unique +enforcement with :ref:`duplicate dropping +`. + +.. index:: index; unique +.. _index-unique-index: + +Unique Indexes +~~~~~~~~~~~~~~ + +To create a :ref:`unique indexes `, consider the +following prototype: + +.. code-block:: javascript + + db.collection.ensureIndex( { a: 1 }, { unique: true } ) + +For example, you may want to create a unique index on the ``"tax-id":`` +of the ``accounts`` collection to prevent storing multiple account +records for the same legal entity: + +.. code-block:: javascript + + db.accounts.ensureIndex( { "tax-id": 1 }, { unique: true } ) + +The :ref:`_id index ` is a unique index. In some +situations you may consider using ``_id`` field itself for this kind +of data rather than using a unique index on another field. + +In many situations you will want to combine the ``unique`` constraint +with the ``sparse`` option. When MongoDB indexes a field, if a +document does not have a value for a field, the index entry for that +item will be ``null``. Since unique indexes cannot have duplicate +values for a field, without the ``sparse`` option, MongoDB will reject +the second document and all subsequent documents without the indexed +field. Consider the following prototype. + +.. code-block:: javascript + + db.collection.ensureIndex( { a: 1 }, { unique: true, sparse: true } ) + +You can also enforce a unique constraint on :ref:`compound indexes +`, as in the following prototype: + +.. code-block:: javascript + + db.collection.ensureIndex( { a: 1, b: 1 }, { unique: true } ) + +These indexes enforce uniqueness for the *combination* of index keys +and *not* for either key individually. + +.. index:: index; drop duplicates +.. index:: index; duplicates +.. _index-drop-duplicates: + +Drop Duplicates +~~~~~~~~~~~~~~~ + +To force the creation of a :ref:`unique index ` +index on a collection with duplicate values in the field you are +indexing you can use the ``dropDups`` option. This will force MongoDB +to create a *unique* index by deleting documents with duplicate values +when building the index. Consider the following prototype invocation +of :method:`db.collection.ensureIndex()`: + +.. code-block:: javascript + + db.collection.ensureIndex( { a: 1 }, { dropDups: true } ) + +See the full documentation of :ref:`duplicate dropping +` for more information. + +.. warning:: + + Specifying ``{ dropDups: true }`` may delete data from your + database. Use with extreme caution. + +Refer to the :method:`ensureIndex() ` +documentation for additional index creation options. + diff --git a/source/tutorial/create-a-vulnerability-report.txt b/source/tutorial/create-a-vulnerability-report.txt new file mode 100644 index 00000000000..9f997311c2e --- /dev/null +++ b/source/tutorial/create-a-vulnerability-report.txt @@ -0,0 +1,87 @@ +============================= +Create a Vulnerability Report +============================= + +.. default-domain:: mongodb + +If you believe you have discovered a vulnerability in MongoDB or a +related product or have experienced a security incident related to +MongoDB, please report the issue so that `10gen `_ can respond +appropriately and work to prevent additional issues in the +future. + +To report an issue, use either `jira.mongodb.org +`_ (preferred) or email. 10gen responds to +vulnerability notifications within 48 hours. + +Information to Provide +---------------------- + +All vulnerability reports should contain as much information +as possible so 10gen can move quickly to resolve the issue. +In particular, please include the following: + +- The name of the product. + +- *Common Vulnerability* information, if applicable, including: + + - CVSS (Common Vulnerability Scoring System) Score. + + - CVE (Common Vulnerability and Exposures) Identifier. + +- Contact information, including an email address and/or phone number, + if applicable. + +Create the Report in Jira +------------------------- + +10gen prefers `jira.mongodb.org `_ for all +communication regarding MongoDB and related products. + +Submit a ticket in the :issue:`Core Server Security ` +project at: +`https://jira.mongodb.org/browse/SECURITY/ `_. +The ticket +number will become the reference identification for the issue for the +lifetime of the issue. You can use this identifier for tracking +purposes. + +Send the Report via Email +------------------------- + +While Jira is preferred, you may also report +vulnerabilities via email to `security@10gen.com `_. + +You may encrypt email using the 10gen public key at +`http://docs.mongodb.org/10gen-gpg-key.asc `_. + +10gen responds to vulnerability reports sent via +email with a response email that contains a reference number for a Jira ticket +posted to the :issue:`SECURITY` project. + +Evaluation of a Vulnerability Report +------------------------------------ + +10gen validates all submitted vulnerabilities and uses Jira +to track all communications regarding a vulnerability, +including requests for clarification or additional information. If +needed, 10gen representatives set up a conference call to exchange +information regarding the vulnerability. + +Disclosure +---------- + +10gen requests that you do *not* publicly disclose any information +regarding the vulnerability or exploit the issue until 10gen has had the +opportunity to analyze the vulnerability, to respond to the notification, +and to notify key users, customers, and partners. + +The amount of time required to validate a reported vulnerability +depends on the complexity and severity of the issue. 10gen takes all +required vulnerabilities very seriously and will always ensure that +there is a clear and open channel of communication with the reporter. + +After validating an issue, 10gen coordinates public disclosure of +the issue with the reporter in a mutually agreed timeframe and +format. If required or requested, the reporter of a vulnerability will +receive credit in the published security bulletin. diff --git a/source/tutorial/create-an-index.txt b/source/tutorial/create-an-index.txt new file mode 100644 index 00000000000..d7a4f92a7cd --- /dev/null +++ b/source/tutorial/create-an-index.txt @@ -0,0 +1,65 @@ +.. index:: index; create +.. _index-create-index: + +=============== +Create an Index +=============== + +.. default-domain:: mongodb + +Indexes allow MongoDB to process and fulfill queries quickly by +creating small and efficient representations of the documents in a +:term:`collection`. MongoDB creates an index on the ``_id`` field of +every collection by default, but allows users to create indexes for +any collection using on any field in a :term:`document`. + +This tutorial describes how to create an index on a single +field. MongoDB also supports :ref:`compound indexes +`, which are indexes on multiple fields. See +:ref:`index-create-compound-index` for instructions on building +compound indexes. + +Build a Foreground Index on a Single Field +------------------------------------------ + +To create an index, use :method:`db.collection.ensureIndex()` or a similar +:api:`method from your driver <>`. For example +the following creates an index on the ``phone-number`` field +of the ``people`` collection: + +.. code-block:: javascript + + db.people.ensureIndex( { "phone-number": 1 } ) + +:method:`ensureIndex() ` only creates an +index if an index of the same specification does not already exist. + +All indexes support and optimize the performance for queries that select +on this field. For queries that cannot use an index, MongoDB must scan +all documents in a collection for documents that match the query. + +Examples +-------- + +If you create an index on the ``user_id`` field in the ``records``, +this index is, the index will support the following query: + +.. code-block:: javascript + + db.records.find( { user_id: 2 } ) + +However, the following query, on the ``profile_url`` field is not +supported by this index: + +.. code-block:: javascript + + db.records.find( { profile_url: 2 } ) + +Additional Considerations +------------------------- + +.. include:: /includes/index-tutorials-considerations.rst + +.. seealso:: :ref:`index-create-compound-index`, + :doc:`/administration/indexes` and :doc:`/core/indexes` for more + information. diff --git a/source/tutorial/create-indexes-to-support-queries.txt b/source/tutorial/create-indexes-to-support-queries.txt new file mode 100644 index 00000000000..8faa2cf4d22 --- /dev/null +++ b/source/tutorial/create-indexes-to-support-queries.txt @@ -0,0 +1,193 @@ +====================================== +Create Indexes to Support Your Queries +====================================== + +.. default-domain:: mongodb + +An index supports a query when the index contains all the fields scanned +by the query. The query scans the index and not the collection. Creating indexes +that supports queries results in greatly increased query performance. + +This document describes strategies for creating indexes that support queries. + +Create a Single-Key Index if All Queries Use the Same, Single Key +----------------------------------------------------------------- + +If you only ever query on a single key in a given collection, then you need +to create just one single-key index for that collection. For example, you +might create an index on ``category`` in the ``product`` collection: + +.. code-block:: javascript + + db.products.ensureIndex( { "category": 1 } ) + +.. _compound-key-indexes: + +Create Compound Indexes to Support Several Different Queries +------------------------------------------------------------ + +If you sometimes query on only one key and at other times query on that +key combined with a second key, then creating a compound index is more +efficient than creating a single-key index. MongoDB will use the +compound index for both queries. For example, you might create an index +on both ``category`` and ``item``. + +.. code-block:: javascript + + db.products.ensureIndex( { "category": 1, "item": 1 } ) + +This allows you both options. You can query on just ``category``, and +you also can query on ``category`` combined with ``item``. +A single :ref:`compound index ` on multiple fields +can support all the queries that search a "prefix" subset of those fields. + +.. note:: With the exception of queries that use the :operator:`$or` + operator, a query does not use multiple indexes. A query uses only one + index. + +.. example:: + + The following index on a collection: + + .. code-block:: javascript + + { x: 1, y: 1, z: 1 } + + Can support queries that the following indexes support: + + .. code-block:: javascript + + { x: 1 } + { x: 1, y: 1 } + + There are some situations where the prefix indexes may offer better + query performance: for example if ``z`` is a large array. + + The ``{ x: 1, y: 1, z: 1 }`` index can also support many of the same + queries as the following index: + + .. code-block:: javascript + + { x: 1, z: 1 } + + Also, ``{ x: 1, z: 1 }`` has an additional use. Given the following + query: + + .. code-block:: javascript + + db.collection.find( { x: 5 } ).sort( { z: 1} ) + + The ``{ x: 1, z: 1 }`` index supports both the query and the sort + operation, while the ``{ x: 1, y: 1, z: 1 }`` index only supports + the query. For more information on sorting, see + :ref:`sorting-with-indexes`. + +.. _covered-queries: +.. _indexes-covered-queries: + +Create Indexes that Support Covered Queries +------------------------------------------- + +A covered query is a query in which: + +- all the fields in the :ref:`query ` + are part of an index, **and** + +- all the fields returned in the results are in the same index. + +Because the index "covers" the query, MongoDB can both match the +:ref:`query conditions ` **and** return +the results using only the index; MongoDB does not need to look at +the documents, only the index, to fulfill the query. + +Querying *only* the index can be much faster than querying documents +outside of the index. Index keys are typically smaller than the +documents they catalog, and indexes are typically available in RAM or +located sequentially on disk. + +MongoDB automatically uses an index that covers a query when possible. +To ensure that an index can *cover* a query, create an index that +includes all the fields listed in the :ref:`query document +` and in the query result. You can +specify the fields to return in the query results with a +:ref:`projection ` document. By default, MongoDB includes +the ``_id`` field in the query result. So, if the index does **not** +include the ``_id`` field, then you must exclude the ``_id`` field +(i.e. ``_id: 0``) from the query results. + +.. example:: + + Given collection ``users`` with an index on the fields ``user`` and + ``status``, as created by the following option: + + .. code-block:: javascript + + db.users.ensureIndex( { status: 1, user: 1 } ) + + Then, this index will cover the following query which selects on + the ``status`` field and returns only the ``user`` field: + + .. code-block:: javascript + + db.users.find( { status: "A" }, { user: 1, _id: 0 } ) + + In the operation, the projection document explicitly specifies + ``_id: 0`` to exclude the ``_id`` field from the result since the + index is only on the ``status`` and the ``user`` fields. + + If the projection document does not specify the exclusion of the + ``_id`` field, the query returns the ``_id`` field. The following + query is **not** covered by the index on the ``status`` and the + ``user`` fields because with the projection document ``{ user: 1 + }``, the query returns both the ``user`` field and the ``_id`` field: + + .. code-block:: javascript + + db.users.find( { status: "A" }, { user: 1 } ) + +An index **cannot** cover a query if: + +- any of the indexed fields in any of the documents in the collection + includes an array. If an indexed field is an array, the index becomes + a :ref:`multi-key index ` index and cannot + support a covered query. + +- any of the indexed fields are fields in subdocuments. To index fields + in subdocuments, use :term:`dot notation`. For example, consider + a collection ``users`` with documents of the following form: + + .. code-block:: javascript + + { _id: 1, user: { login: "tester" } } + + The collection has the following indexes: + + .. code-block:: none + + { user: 1 } + + { "user.login": 1 } + + The ``{ user: 1 }`` index covers the following query: + + .. code-block:: none + + db.users.find( { user: { login: "tester" } }, { user: 1, _id: 0 } ) + + However, the ``{ "user.login": 1 }`` index does **not** cover the + following query: + + .. code-block:: none + + db.users.find( { "user.login": "tester" }, { "user.login": 1, _id: 0 } ) + + The query, however, does use the ``{ "user.login": 1 }`` index to + find matching documents. + +To determine whether a query is a covered query, use the +:method:`~cursor.explain()` method. If the :method:`~cursor.explain()` +output displays ``true`` for the :data:`~explain.indexOnly` field, the query is +covered by an index, and MongoDB queries only that index to match the +query **and** return the results. + +For more information see :ref:`indexes-measuring-use`. diff --git a/source/tutorial/create-queries-that-ensure-selectivity.txt b/source/tutorial/create-queries-that-ensure-selectivity.txt new file mode 100644 index 00000000000..da4107c5aa8 --- /dev/null +++ b/source/tutorial/create-queries-that-ensure-selectivity.txt @@ -0,0 +1,85 @@ +.. _index-selectivity: + +====================================== +Create Queries that Ensure Selectivity +====================================== + +.. default-domain:: mongodb + +Selectivity is the ability of a query to narrow results using the index. +Effective indexes are more selective and allow MongoDB to use the index +for a larger portion of the work associated with fulfilling the query. + +To ensure selectivity, +write queries that limit the number of possible documents with the +indexed field. Write queries that are appropriately selective relative +to your indexed data. + +.. example:: + + Suppose you have a field called ``status`` where the possible values + are ``new`` and ``processed``. If you add an index on ``status`` + you've created a low-selectivity index. The index will + be of little help in locating records. + + A better strategy, depending on your queries, would be to create a + :ref:`compound index ` that includes the + low-selectivity field and another field. For example, you could + create a compound index on ``status`` and ``created_at.`` + + Another option, again depending on your use case, might be to use + separate collections, one for each status. + +.. example:: + + Consider an index ``{ a : 1 }`` (i.e. an index on the key ``a`` + sorted in ascending order) on a collection where ``a`` has three + values evenly distributed across the collection: + + .. code-block:: javascript + + { _id: ObjectId(), a: 1, b: "ab" } + { _id: ObjectId(), a: 1, b: "cd" } + { _id: ObjectId(), a: 1, b: "ef" } + { _id: ObjectId(), a: 2, b: "jk" } + { _id: ObjectId(), a: 2, b: "lm" } + { _id: ObjectId(), a: 2, b: "no" } + { _id: ObjectId(), a: 3, b: "pq" } + { _id: ObjectId(), a: 3, b: "rs" } + { _id: ObjectId(), a: 3, b: "tv" } + + If you query for ``{ a: 2, b: "no" }`` MongoDB must scan 3 + :term:`documents ` in the collection to return the one + matching result. Similarly, a query for ``{ a: { $gt: 1}, b: "tv" }`` + must scan 6 documents, also to return one result. + + Consider the same index on a collection where ``a`` has *nine* values + evenly distributed across the collection: + + .. code-block:: javascript + + { _id: ObjectId(), a: 1, b: "ab" } + { _id: ObjectId(), a: 2, b: "cd" } + { _id: ObjectId(), a: 3, b: "ef" } + { _id: ObjectId(), a: 4, b: "jk" } + { _id: ObjectId(), a: 5, b: "lm" } + { _id: ObjectId(), a: 6, b: "no" } + { _id: ObjectId(), a: 7, b: "pq" } + { _id: ObjectId(), a: 8, b: "rs" } + { _id: ObjectId(), a: 9, b: "tv" } + + If you query for ``{ a: 2, b: "cd" }``, MongoDB must scan only one + document to fulfill the query. The index and query are more selective + because the values of ``a`` are evenly distributed *and* the query + can select a specific document using the index. + + However, although the index on ``a`` is more selective, a query such + as ``{ a: { $gt: 5 }, b: "tv" }`` would still need to scan 4 + documents. + + .. todo:: is there an answer to that last "However" paragraph? + +If overall selectivity is low, and if MongoDB must read a number of +documents to return results, then some queries may perform faster +without indexes. To determine performance, see +:ref:`indexes-measuring-use`. diff --git a/source/tutorial/create-sorted-indexes.txt b/source/tutorial/create-sorted-indexes.txt new file mode 100644 index 00000000000..bb105022610 --- /dev/null +++ b/source/tutorial/create-sorted-indexes.txt @@ -0,0 +1,60 @@ +.. _index-sort: +.. _sorting-with-indexes: + +================================= +Use Indexes to Sort Query Results +================================= + +.. default-domain:: mongodb + +For the fastest performance when sorting query results by a given field, +create a sorted index on that field. + +To sort query results on multiple fields, create a :ref:`compound +index `. MongoDB sorts results based on the field +order in the index. For queries that include a sort that uses a +compound index, ensure that all fields before the first sorted field +are equality matches. + +.. example:: + + If you create the following index: + + .. code-block:: javascript + + { a: 1, b: 1, c: 1, d: 1 } + + The following query and sort operations can use the index: + + .. code-block:: javascript + + db.collection.find().sort( { a:1 } ) + db.collection.find().sort( { a:1, b:1 } ) + + db.collection.find( { a:4 } ).sort( { a:1, b:1 } ) + db.collection.find( { b:5 } ).sort( { a:1, b:1 } ) + + db.collection.find( { a:5 } ).sort( { b:1, c:1 } ) + + db.collection.find( { a:5, c:4, b:3 } ).sort( { d:1 } ) + + db.collection.find( { a: { $gt:4 } } ).sort( { a:1, b:1 } ) + db.collection.find( { a: { $gt:5 } } ).sort( { a:1, b:1 } ) + + db.collection.find( { a:5, b:3, d:{ $gt:4 } } ).sort( { c:1 } ) + db.collection.find( { a:5, b:3, c:{ $lt:2 }, d:{ $gt:4 } } ).sort( { c:1 } ) + + However, the following queries cannot sort the results using the + index: + + .. code-block:: javascript + + db.collection.find().sort( { b:1 } ) + db.collection.find( { b:5 } ).sort( { b:1 } ) + +.. note:: + + For in-memory sorts that do not use an index, the :method:`sort() + ` operation is significantly slower. The + :method:`~cursor.sort()` operation will abort when it uses 32 + megabytes of memory. diff --git a/source/tutorial/create-text-index-on-multiple-fields.txt b/source/tutorial/create-text-index-on-multiple-fields.txt new file mode 100644 index 00000000000..452d3a8adb5 --- /dev/null +++ b/source/tutorial/create-text-index-on-multiple-fields.txt @@ -0,0 +1,47 @@ +======================================== +Create ``text`` Index on Multiple Fields +======================================== + +.. default-domain:: mongodb + +You can create a ``text`` index on the field or fields whose value is a +string or an array of string elements. When creating a ``text`` index +on multiple fields, you can specify the individual fields or you can +wildcard specifier (``$**``). + +Index Specific Fields +--------------------- + +The following example creates a ``text`` index on the fields +``subject`` and ``content``: + +.. code-block:: javascript + + db.collection.ensureIndex( + { + subject: "text", + content: "text" + } + ) + +This ``text`` index catalogs all string data in the ``subject`` field +and the ``content`` field, where the field value is either a string or +an array of string elements. + +Index All Fields +---------------- + +To allow for text search on all fields with string content, use the +wildcard specifier (``$**``) to index all fields that contain string +content. + +The following example indexes any string value in the data of every +field of every document in ``collection`` and names the index +``TextIndex``: + +.. code-block:: javascript + + db.collection.ensureIndex( + { "$**": "text" }, + { name: "TextIndex" } + ) diff --git a/source/tutorial/deploy-geographically-distributed-replica-set.txt b/source/tutorial/deploy-geographically-distributed-replica-set.txt index 7c70a3590cd..4e77b900d46 100644 --- a/source/tutorial/deploy-geographically-distributed-replica-set.txt +++ b/source/tutorial/deploy-geographically-distributed-replica-set.txt @@ -9,7 +9,7 @@ in multiple locations. The tutorial addresses three-member sets, four-member sets, and sets with more than four members. For appropriate background, see :doc:`/core/replication` and -:doc:`/administration/replica-set-architectures`. For related +:doc:`/core/replica-set-architectures`. For related tutorials, see :doc:`/tutorial/deploy-replica-set` and :doc:`/tutorial/expand-replica-set`. diff --git a/source/tutorial/deploy-replica-set.txt b/source/tutorial/deploy-replica-set.txt index e2deece2cd5..cd2550beeb6 100644 --- a/source/tutorial/deploy-replica-set.txt +++ b/source/tutorial/deploy-replica-set.txt @@ -12,7 +12,7 @@ a one for production systems. To instead deploy a replica set from a single standalone MongoDB instance, see :doc:`/tutorial/convert-standalone-to-replica-set`. For additional information regarding replica set deployments, see -:doc:`/core/replication` and :doc:`/administration/replica-set-architectures`. +:doc:`/core/replication` and :doc:`/core/replica-set-architectures`. Overview -------- diff --git a/source/tutorial/deploy-shard-cluster.txt b/source/tutorial/deploy-shard-cluster.txt index 62c09876433..2e4dd5454d1 100644 --- a/source/tutorial/deploy-shard-cluster.txt +++ b/source/tutorial/deploy-shard-cluster.txt @@ -10,7 +10,7 @@ The topics on this page present an ordered sequence of the tasks required to set up a :term:`sharded cluster`. Before deploying a sharded cluster for the first time, consider the :doc:`/core/sharded-clusters` and -:doc:`/administration/sharded-cluster-architectures` documents. +:doc:`/core/sharded-cluster-architectures` documents. To set up a sharded cluster, complete the following sequence of tasks in the order defined below: diff --git a/source/tutorial/enable-authentication-in-sharded-cluster.txt b/source/tutorial/enable-authentication-in-sharded-cluster.txt new file mode 100644 index 00000000000..1ca16bcb83b --- /dev/null +++ b/source/tutorial/enable-authentication-in-sharded-cluster.txt @@ -0,0 +1,52 @@ +========================================== +Enable Authentication in a Sharded Cluster +========================================== + +.. default-domain:: mongodb + +.. versionadded:: 2.0 + Support for authentication with sharded clusters. + +To control access to a sharded cluster, create key files and then set +the :setting:`keyFile` option on *all* components of the sharded +cluster, including all :program:`mongos` instances, all config server +:program:`mongod` instances, and all shard :program:`mongod` +instances. The content of the key file is arbitrary but must be the +same on all cluster members. + +.. note:: For an overview of authentication, see + :ref:`security-authentication`. For an overview of security, see + :doc:`/security`. + +Procedure +--------- + +To enable authentication, do the following: + +1. Generate a key file to store authentication information, as described + in the :ref:`generate-key-file` section. + +#. On each component in the sharded cluster, enable authentication by + doing one of the following: + + - In the configuration file, set the :setting:`keyFile` option to the + key file's path and then start the component, as in the following + example: + + .. code-block:: cfg + + keyFile = /srv/mongodb/keyfile + + - When starting the component, set :option:`--keyFile ` option, + which is an option for both :program:`mongos` instances and + :program:`mongod` instances. Set the :option:`--keyFile ` + to the key file's path. + + .. note:: + + The :setting:`keyFile` setting implies :setting:`auth`, which + means in most cases you do not need to set :setting:`auth` + explicitly. + +#. Add the first administrative user and then add subsequent users. See + :ref:`control-access-add-users`. diff --git a/source/tutorial/ensure-indexes-fit-ram.txt b/source/tutorial/ensure-indexes-fit-ram.txt new file mode 100644 index 00000000000..fbf1138e484 --- /dev/null +++ b/source/tutorial/ensure-indexes-fit-ram.txt @@ -0,0 +1,48 @@ +.. _indexes-ensure-indexes-fit-ram: + +====================== +Ensure Indexes Fit RAM +====================== + +.. default-domain:: mongodb + +For the fastest processing, ensure that your indexes fit entirely in RAM so +that the system can avoid reading the index from disk. + +To check the size of your indexes, use the +:method:`db.collection.totalIndexSize()` helper, which returns data in +bytes: + +.. code-block:: javascript + + > db.collection.totalIndexSize() + 4294976499 + +The above example shows an index size of almost 4.3 gigabytes. To ensure +this index fits in RAM, you must not only have more than that much RAM +available but also must have RAM available for the rest of the +:term:`working set`. Also remember: + +If you have and use multiple collections, you must consider the size +of all indexes on all collections. The indexes and the working set must be able to +fit in memory at the same time. + +There are some limited cases where indexes do not need +to fit in memory. See :ref:`indexing-right-handed`. + +.. seealso:: For additional :doc:`collection statistics + `, use :dbcommand:`collStats` or + :method:`db.collection.stats()`. + +.. _indexing-right-handed: + +Indexes that Hold Only Recent Values in RAM +------------------------------------------- + +Indexes do not have to fit *entirely* into RAM in all cases. If the +value of the indexed field increments with every insert, and most queries +select recently added documents; then MongoDB only needs to keep the +parts of the index that hold the most recent or "right-most" values in +RAM. This allows for efficient index use for read and write +operations and minimize the amount of RAM required to support the +index. diff --git a/source/tutorial/evaluate-operational-performance.txt b/source/tutorial/evaluate-operational-performance.txt new file mode 100644 index 00000000000..ed82c5be892 --- /dev/null +++ b/source/tutorial/evaluate-operational-performance.txt @@ -0,0 +1,48 @@ +========================================== +Evaluate Performance of Current Operations +========================================== + +.. default-domain:: mongodb + +The following sections describe techniques for evaluating operational +performance. + +Use the Database Profiler to Evaluate Operations Against the Database +--------------------------------------------------------------------- + +.. todo Add link below: :doc:`database profiler ` + +MongoDB provides a database profiler that shows performance +characteristics of each operation against the database. Use the profiler +to locate any queries or write operations that are running slow. You can +use this information, for example, to determine what indexes to create. + +.. todo Add below: , see :doc:`/tutorial/manage-the-database-profiler` and ... + +For more information, see :ref:`database-profiling`. + +Use ``db.currentOp()`` to Evaluate ``mongod`` Operations +-------------------------------------------------------- + +The :method:`db.currentOp()` method reports on current operations +running on a :program:`mongod` instance. For documentation of the +output of :method:`db.currentOp()` see :doc:`/reference/current-op`. + +Use ``$explain`` to Evaluate Query Performance +---------------------------------------------- + +The :method:`explain() ` method returns statistics +on a query, and reports the index MongoDB selected to fulfill the +query, as well as information about the internal operation of the +query. + +.. example:: To use :method:`explain() ` on a query + for documents matching the expression ``{ a: 1 }``, in the + collection ``records``, use an operation that resembles the + following in the :program:`mongo` shell: + + .. code-block:: javascript + + db.records.find( { a: 1 } ).explain() + +.. todo Link to Kay's new explain doc diff --git a/source/tutorial/expand-replica-set.txt b/source/tutorial/expand-replica-set.txt index 7cd21dcd4ae..abadf09f4c9 100644 --- a/source/tutorial/expand-replica-set.txt +++ b/source/tutorial/expand-replica-set.txt @@ -10,12 +10,37 @@ Overview This tutorial explains how to add an additional member to an existing replica set. -Before adding a new member, see the -:ref:`replica-set-admin-procedure-add-member` -topic in the :doc:`/administration/replica-sets` document. +Before adding a new member to an existing :term:`replica set`, do one of +the following to prepare the new member's :term:`data directory `: + +- Make sure the new member's data directory *does not* contain data. The + new member will copy the data from an existing member. + + If the new member is in a :term:`recovering` state, it must exit and + become a :term:`secondary` before MongoDB + can copy all data as part of the replication process. This process + takes time but does not require administrator intervention. + +- Manually copy the data directory from an existing member. The new + member becomes a secondary member and will catch up to the current + state of the replica set after a short interval. Copying the data over + manually shortens the amount of time for the new member to become + current. + + Ensure that you can copy the data directory to the new member and + begin replication within the :ref:`window allowed by the oplog + `. If the difference in the amount of time + between the most recent operation and the most recent operation to + the database exceeds the length of the :term:`oplog` on the existing + members, then the new instance will have to perform an initial sync, + which completely resynchronizes the data, as described in + :doc:`resync-replica-set-member`. + + Use :method:`db.printReplicationInfo()` to check the current state of + replica set members with regards to the oplog. For background on replication deployment patterns, see the -:doc:`/administration/replica-set-architectures` document. +:doc:`/core/replica-set-architectures` document. Requirements ------------ @@ -187,7 +212,7 @@ Production Notes - clean: the existing dataset must be from a consistent copy of the database from a member of the same replica set. See the - :doc:`/administration/backups` document for more information. + :doc:`/core/backups` document for more information. - recent: the copy must more recent than the oldest operation in the :term:`primary` member's :term:`oplog`. The new secondary must be diff --git a/source/tutorial/expire-data.txt b/source/tutorial/expire-data.txt index ecc1ac2e93d..755a3ffc454 100644 --- a/source/tutorial/expire-data.txt +++ b/source/tutorial/expire-data.txt @@ -37,7 +37,7 @@ delete operations and leads to more predictable storage use patterns. .. note:: When the TTL thread is active, you will see a :doc:`delete - ` operation in the output of + ` operation in the output of :method:`db.currentOp()` or in the data collected by the :ref:`database profiler `. diff --git a/source/tutorial/getting-started-with-the-mongo-shell.txt b/source/tutorial/getting-started-with-the-mongo-shell.txt index 0d8cdca8b40..1eaf3561c72 100644 --- a/source/tutorial/getting-started-with-the-mongo-shell.txt +++ b/source/tutorial/getting-started-with-the-mongo-shell.txt @@ -122,10 +122,10 @@ For more documentation of basic MongoDB operations in the - :doc:`/tutorial/getting-started` - :doc:`/reference/mongo-shell` -- :doc:`/applications/create` -- :doc:`/applications/read` -- :doc:`/applications/update` -- :doc:`/applications/delete` +- :doc:`/core/create` +- :doc:`/core/read` +- :doc:`/core/update` +- :doc:`/core/delete` - :doc:`/administration/indexes` - :doc:`/core/read-operations` - :doc:`/core/write-operations` diff --git a/source/tutorial/getting-started.txt b/source/tutorial/getting-started.txt index 409f0567035..861cc765833 100644 --- a/source/tutorial/getting-started.txt +++ b/source/tutorial/getting-started.txt @@ -181,7 +181,7 @@ Insert Individual Documents db.things.find() This operation returns the following results. The :doc:`ObjectId - ` values will be unique: + ` values will be unique: .. code-block:: javascript @@ -191,7 +191,7 @@ Insert Individual Documents All MongoDB documents must have an ``_id`` field with a unique value. These operations do not explicitly specify a value for the ``_id`` field, so :program:`mongo` creates a unique :doc:`ObjectId - ` value for the field before inserting it into the + ` value for the field before inserting it into the collection. Insert Multiple Documents Using a For Loop @@ -211,7 +211,7 @@ Insert Multiple Documents Using a For Loop db.things.find() The :program:`mongo` shell displays the first 20 documents in the - collection. Your :doc:`ObjectId ` values will be + collection. Your :doc:`ObjectId ` values will be different: .. code-block:: javascript @@ -294,7 +294,7 @@ Iterate over the Cursor with a Loop ``printjson()`` method renders the document in a JSON-like format. The result of this operation follows, although if the - :doc:`ObjectId ` values will be unique: + :doc:`ObjectId ` values will be unique: .. code-block:: javascript @@ -363,7 +363,7 @@ Query for Specific Documents MongoDB has a rich query system that allows you to select and filter the documents in a collection along specific fields and values. See -:ref:`read-operations-query-document` and :doc:`/applications/read` +:ref:`read-operations-query-document` and :doc:`/core/read` for a full account of queries in MongoDB. In this procedure, you query for specific documents in the ``things`` @@ -383,7 +383,7 @@ To query for specific documents, do the following: db.things.find( { name : "mongo" } ) MongoDB returns one document that fits this criteria. The - :doc:`ObjectId ` value will be different: + :doc:`ObjectId ` value will be different: .. code-block:: javascript @@ -422,7 +422,7 @@ To query for specific documents, do the following: { "_id" : ObjectId("4c220a42f3924d31102bd868"), "x" : 4, "j" : 19 } { "_id" : ObjectId("4c220a42f3924d31102bd869"), "x" : 4, "j" : 20 } - :doc:`ObjectId ` values are always unique. + :doc:`ObjectId ` values are always unique. #. Query for all documents where ``x`` has a value of ``4``, as in the @@ -479,7 +479,7 @@ following command: db.things.findOne() For more information on querying for documents, see the -:doc:`/applications/read` and :doc:`/core/read-operations` documentation. +:doc:`/core/read` and :doc:`/core/read-operations` documentation. Limit the Number of Documents in the Result Set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -497,7 +497,7 @@ following command: db.things.find().limit(3) MongoDB will return the following result, with different -:doc:`ObjectId ` values: +:doc:`ObjectId ` values: .. code-block:: javascript diff --git a/source/tutorial/install-mongodb-on-windows.txt b/source/tutorial/install-mongodb-on-windows.txt index 43e1d964c29..f7190031659 100644 --- a/source/tutorial/install-mongodb-on-windows.txt +++ b/source/tutorial/install-mongodb-on-windows.txt @@ -151,7 +151,7 @@ indicates that the mongod.exe process is running successfully. networks. All users should select ``Private Networks, such as my home or work network`` and click ``Allow access``. For additional information on security and MongoDB, please read the - :doc:`/administration/security` page. + :doc:`/core/security` page. .. warning:: diff --git a/source/tutorial/list-indexes.txt b/source/tutorial/list-indexes.txt new file mode 100644 index 00000000000..f480f5ab6fd --- /dev/null +++ b/source/tutorial/list-indexes.txt @@ -0,0 +1,50 @@ +============================ +Return a List of All Indexes +============================ + +.. default-domain:: mongodb + +When performing maintenance you may want to check which indexes exist +on a collection. Every index on a collection has a corresponding +:term:`document` in the :data:`system.indexes +<.system.indexes>` collection, and you can use standard +queries (i.e. :method:`~db.collection.find()`) to list the indexes, or +in the :program:`mongo` shell, the +:method:`~db.collection.getIndexes()` method to return a list of the +indexes on a collection, as in the following examples. + +.. seealso:: :doc:`/core/indexes` and :doc:`/administration/indexes` + for more information about indexes in MongoDB and common index + management operations. + +.. index:: index; list indexes +.. _index-list-indexes-for-collection: + +List all Indexes on a Collection +-------------------------------- + +To return a list of all indexes on a collection, use the, use the +:method:`db.collection.getIndexes()` method or a similar +:api:`method for your driver <>`. + +For example, to view all indexes on the ``people`` collection: + +.. code-block:: javascript + + db.people.getIndexes() + +.. index:: index; list indexes +.. _index-list-indexes-for-database: + +List all Indexes for a Database +------------------------------- + +To return a list of all indexes on all collections in a database, use +the following operation in the :program:`mongo` shell: + +.. code-block:: javascript + + db.system.indexes.find() + +See :data:`system.indexes <.system.indexes>` for more +information about these documents. diff --git a/source/tutorial/manage-chained-replication.txt b/source/tutorial/manage-chained-replication.txt new file mode 100644 index 00000000000..90ad0fadae0 --- /dev/null +++ b/source/tutorial/manage-chained-replication.txt @@ -0,0 +1,85 @@ +========================== +Manage Chained Replication +========================== + +.. default-domain:: mongodb + +Starting in version 2.0, MongoDB supports chained replication. A +chained replication occurs when a :term:`secondary` member replicates +from another secondary member instead of from the :term:`primary`. This +might be the case, for example, if a secondary selects its replication +target based on ping time and if the closest member is another +secondary. + +Chained replication can reduce load on the primary. But chained +replication can also result in increased replication lag, depending on +the topology of the network. + +.. versionadded:: 2. + +You can use the :data:`~local.system.replset.settings.chainingAllowed` +setting in :doc:`/reference/replica-configuration` to disable chained +replication for situations where chained replication is causing lag. + +MongoDB enables chained replication by default. This procedure +describes how to disable it and how to re-enable it. + +.. note:: + + If chained replication is disabled, you still can use + :dbcommand:`replSetSyncFrom` to specify that a secondary replicates + from another secondary. But that configuration will last only until the + secondary recalculates which member to sync from. + +Disable Chained Replication +--------------------------- + +To disable chained replication, set the +:data:`~local.system.replset.settings.chainingAllowed` +field in :doc:`/reference/replica-configuration` to ``false``. + +You can use the following sequence of commands to set +:data:`~local.system.replset.settings.chainingAllowed` to +``false``: + +1. Copy the configuration settings into the ``cfg`` object: + + .. code-block:: javascript + + cfg = rs.config() + +#. Take note of whether the current configuration settings contain the + ``settings`` sub-document. If they do, skip this step. + + .. warning:: To avoid data loss, skip this step if the configuration + settings contain the ``settings`` sub-document. + + If the current configuration settings **do not** contain the + ``settings`` sub-document, create the sub-document by issuing the + following command: + + .. code-block:: javascript + + cfg.settings = { } + +#. Issue the following sequence of commands to set + :data:`~local.system.replset.settings.chainingAllowed` to + ``false``: + + .. code-block:: javascript + + cfg.settings.chainingAllowed = false + rs.reconfig(cfg) + +Re-enable Chained Replication +----------------------------- + +To re-enable chained replication, set +:data:`~local.system.replset.settings.chainingAllowed` to ``true``. +You can use the following sequence of commands: + +.. code-block:: javascript + + cfg = rs.config() + cfg.settings.chainingAllowed = true + rs.reconfig(cfg) diff --git a/source/tutorial/manage-in-progress-indexing-operations.txt b/source/tutorial/manage-in-progress-indexing-operations.txt new file mode 100644 index 00000000000..bc99ac6b316 --- /dev/null +++ b/source/tutorial/manage-in-progress-indexing-operations.txt @@ -0,0 +1,26 @@ +.. index:: index; monitor index building +.. _index-monitor-index-building: +.. _indexes-admin-stop-in-progress-build: + +========================================================== +Monitor and Manage In Progress Index Creation and Building +========================================================== + +.. default-domain:: mongodb + +To see the status of the indexing processes, you can use the +:method:`db.currentOp()` method in the :program:`mongo` shell. The value +of the ``query`` field and the ``msg`` field will indicate if the +operation is an index build. The ``msg`` field also indicates the +percent of the build that is complete. + +To terminate an ongoing index build, use the +:method:`db.killOp()` method in the :program:`mongo` shell. + +For more information about the output of :method:`db.currentOp()`, see +the :doc:`/reference/current-op`. + +.. versionchanged:: 2.4 + Before MongoDB 2.4, you could *only* terminate *background* index + builds. After 2.4, you can terminate any index build, including + foreground index builds. diff --git a/source/tutorial/manage-sharded-cluster-balancer.txt b/source/tutorial/manage-sharded-cluster-balancer.txt index 958e8262bec..73733e6b487 100644 --- a/source/tutorial/manage-sharded-cluster-balancer.txt +++ b/source/tutorial/manage-sharded-cluster-balancer.txt @@ -198,7 +198,7 @@ Disable Balancing During Backups -------------------------------- If MongoDB migrates a :term:`chunk` during a :doc:`backup -`, you can end with an inconsistent snapshot +`, you can end with an inconsistent snapshot of your :term:`sharded cluster`. Never run a backup while the balancer is active. To ensure that the balancer is inactive during your backup operation: diff --git a/source/tutorial/manage-sharded-cluster-config-server.txt b/source/tutorial/manage-sharded-cluster-config-server.txt index 3633394055b..5500830526a 100644 --- a/source/tutorial/manage-sharded-cluster-config-server.txt +++ b/source/tutorial/manage-sharded-cluster-config-server.txt @@ -237,7 +237,7 @@ of the cluster metadata from the config database is straight forward: :ref:`sharding-balancing-disable-temporally` section for more information on managing the balancer process. -.. seealso:: :doc:`/administration/backups`. +.. seealso:: :doc:`/core/backups`. .. [#read-only] While one of the three config servers is unavailable, the cluster cannot split any chunks nor can it migrate chunks diff --git a/source/tutorial/map-reduce-examples.txt b/source/tutorial/map-reduce-examples.txt new file mode 100644 index 00000000000..7587700d920 --- /dev/null +++ b/source/tutorial/map-reduce-examples.txt @@ -0,0 +1,12 @@ +=================== +Map-Reduce Examples +=================== + +.. default-domain:: mongodb + +In the :program:`mongo` shell, the :method:`db.collection.mapReduce()` +method is a wrapper around the :dbcommand:`mapReduce` command. The +following examples use the :method:`db.collection.mapReduce()` method: + +.. include:: /includes/examples-map-reduce.rst + :start-after: map-reduce-document-prototype-begin diff --git a/source/tutorial/measure-index-use.txt b/source/tutorial/measure-index-use.txt new file mode 100644 index 00000000000..2b95edceefb --- /dev/null +++ b/source/tutorial/measure-index-use.txt @@ -0,0 +1,80 @@ +.. index:: index; measure use +.. _index-measure-index-use: +.. _indexes-measuring-use: + +================= +Measure Index Use +================= + +.. default-domain:: mongodb + +Synopsis +-------- + +Query performance is a good general indicator of index use; +however, for more precise insight into index use, MongoDB provides a +number of tools that allow you to study query operations and observe +index use for your database. + +.. seealso:: :doc:`/core/indexes`, + :doc:`/applications/indexes`, and + :doc:`/administration/indexes` for more information. + +Operations +---------- + +Return Query Plan with ``explain()`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Append the :method:`explain() ` method to any cursor +(e.g. query) to return a document with statistics about the query +process, including the index used, the number of documents scanned, +and the time the query takes to process in milliseconds. + +Control Index Use with ``hint()`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Append the :method:`hint() ` to any cursor (e.g. +query) with the index as the argument to *force* MongoDB +to use a specific index to fulfill the query. Consider the following +example: + +.. code-block:: javascript + + db.people.find( { name: "John Doe", zipcode: { $gt: 63000 } } } ).hint( { zipcode: 1 } ) + +You can use :method:`hint() ` and :method:`explain() +` in conjunction with each other to compare the +effectiveness of a specific index. Specify the ``$natural`` operator +to the :method:`hint() ` method to prevent MongoDB from +using *any* index: + +.. code-block:: javascript + + db.people.find( { name: "John Doe", zipcode: { $gt: 63000 } } } ).hint( { $natural: 1 } ) + +Instance Index Use Reporting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +MongoDB provides a number of metrics of index use and operation that +you may want to consider when analyzing index use for your database: + +- In the output of :dbcommand:`serverStatus`: + + - :data:`~serverStatus.indexCounters` + + - :data:`~serverStatus.metrics.queryExecutor.scanned` + + - :data:`~serverStatus.metrics.operation.scanAndOrder` + +- In the output of :dbcommand:`collStats`: + + - :data:`~collStats.totalIndexSize` + + - :data:`~collStats.indexSizes` + +- In the output of :dbcommand:`dbStats`: + + - :data:`dbStats.indexes` + + -:data:`dbStats.indexSize` diff --git a/source/tutorial/model-data-for-keyword-search.txt b/source/tutorial/model-data-for-keyword-search.txt index 79d93f8d4ec..9225f61c78b 100644 --- a/source/tutorial/model-data-for-keyword-search.txt +++ b/source/tutorial/model-data-for-keyword-search.txt @@ -12,7 +12,7 @@ Model Data to Support Keyword Search information. In 2.4, MongoDB provides a text search feature. See - :doc:`/applications/text-search` for more information. + :doc:`/core/text-search` for more information. If your application needs to perform queries on the content of a field that holds text you can perform exact matches on the text or use diff --git a/source/administration/snmp.txt b/source/tutorial/monitor-with-snmp.txt similarity index 100% rename from source/administration/snmp.txt rename to source/tutorial/monitor-with-snmp.txt diff --git a/source/tutorial/perform-incremental-map-reduce.txt b/source/tutorial/perform-incremental-map-reduce.txt new file mode 100644 index 00000000000..7fb47799e6c --- /dev/null +++ b/source/tutorial/perform-incremental-map-reduce.txt @@ -0,0 +1,159 @@ +============================== +Perform Incremental Map-Reduce +============================== + +.. default-domain:: mongodb + +Map-reduce operations can handle complex aggregation tasks. To perform +map-reduce operations, MongoDB provides the :dbcommand:`mapReduce` +command and, in the :program:`mongo` shell, the +:method:`db.collection.mapReduce()` wrapper method. + +If the map-reduce dataset is constantly growing, then rather than +performing the map-reduce operation over the entire dataset each time +you want to run map-reduce, you may want to perform an incremental +map-reduce. + +To perform incremental map-reduce: + +#. Run a map-reduce job over the current collection and output the + result to a separate collection. + +#. When you have more data to process, run subsequent map-reduce job + with: + + - the ``query`` parameter that specifies conditions that match + *only* the new documents. + + - the ``out`` parameter that specifies the ``reduce`` action to + merge the new results into the existing output collection. + +Consider the following example where you schedule a map-reduce +operation on a ``sessions`` collection to run at the end of each day. + +Data Setup +---------- + +The ``sessions`` collection contains documents that log users' session +each day, for example: + +.. code-block:: javascript + + db.sessions.save( { userid: "a", ts: ISODate('2011-11-03 14:17:00'), length: 95 } ); + db.sessions.save( { userid: "b", ts: ISODate('2011-11-03 14:23:00'), length: 110 } ); + db.sessions.save( { userid: "c", ts: ISODate('2011-11-03 15:02:00'), length: 120 } ); + db.sessions.save( { userid: "d", ts: ISODate('2011-11-03 16:45:00'), length: 45 } ); + + db.sessions.save( { userid: "a", ts: ISODate('2011-11-04 11:05:00'), length: 105 } ); + db.sessions.save( { userid: "b", ts: ISODate('2011-11-04 13:14:00'), length: 120 } ); + db.sessions.save( { userid: "c", ts: ISODate('2011-11-04 17:00:00'), length: 130 } ); + db.sessions.save( { userid: "d", ts: ISODate('2011-11-04 15:37:00'), length: 65 } ); + +Initial Map-Reduce of Current Collection +---------------------------------------- + +Run the first map-reduce operation as follows: + +#. Define the ``map`` function that maps the ``userid`` to an + object that contains the fields ``userid``, ``total_time``, ``count``, + and ``avg_time``: + + .. code-block:: javascript + + var mapFunction = function() { + var key = this.userid; + var value = { + userid: this.userid, + total_time: this.length, + count: 1, + avg_time: 0 + }; + + emit( key, value ); + }; + +#. Define the corresponding ``reduce`` function with two arguments + ``key`` and ``values`` to calculate the total time and the count. + The ``key`` corresponds to the ``userid``, and the ``values`` is an + array whose elements corresponds to the individual objects mapped to the + ``userid`` in the ``mapFunction``. + + .. code-block:: javascript + + var reduceFunction = function(key, values) { + + var reducedObject = { + userid: key, + total_time: 0, + count:0, + avg_time:0 + }; + + values.forEach( function(value) { + reducedObject.total_time += value.total_time; + reducedObject.count += value.count; + } + ); + return reducedObject; + }; + +#. Define ``finalize`` function with two arguments ``key`` and + ``reducedValue``. The function modifies the ``reducedValue`` document + to add another field ``average`` and returns the modified document. + + .. code-block:: javascript + + var finalizeFunction = function (key, reducedValue) { + + if (reducedValue.count > 0) + reducedValue.avg_time = reducedValue.total_time / reducedValue.count; + + return reducedValue; + }; + +#. Perform map-reduce on the ``session`` collection using the + ``mapFunction``, the ``reduceFunction``, and the + ``finalizeFunction`` functions. Output the results to a collection + ``session_stat``. If the ``session_stat`` collection already exists, + the operation will replace the contents: + + .. code-block:: javascript + + db.sessions.mapReduce( mapFunction, + reduceFunction, + { + out: { reduce: "session_stat" }, + finalize: finalizeFunction + } + ) + +Subsequent Incremental Map-Reduce +--------------------------------- + +Later as the ``sessions`` collection grows, you can run additional +map-reduce operations. For example, add new documents to the +``sessions`` collection: + +.. code-block:: javascript + + db.sessions.save( { userid: "a", ts: ISODate('2011-11-05 14:17:00'), length: 100 } ); + db.sessions.save( { userid: "b", ts: ISODate('2011-11-05 14:23:00'), length: 115 } ); + db.sessions.save( { userid: "c", ts: ISODate('2011-11-05 15:02:00'), length: 125 } ); + db.sessions.save( { userid: "d", ts: ISODate('2011-11-05 16:45:00'), length: 55 } ); + +At the end of the day, perform incremental map-reduce on the +``sessions`` collection but use the ``query`` field to select only the +new documents. Output the results to the collection ``session_stat``, +but ``reduce`` the contents with the results of the incremental +map-reduce: + +.. code-block:: javascript + + db.sessions.mapReduce( mapFunction, + reduceFunction, + { + query: { ts: { $gt: ISODate('2011-11-05 00:00:00') } }, + out: { reduce: "session_stat" }, + finalize: finalizeFunction + } + ); diff --git a/source/tutorial/rebuild-indexes.txt b/source/tutorial/rebuild-indexes.txt new file mode 100644 index 00000000000..09e6ab7e729 --- /dev/null +++ b/source/tutorial/rebuild-indexes.txt @@ -0,0 +1,56 @@ +.. index:: index; rebuild +.. _index-rebuild-index: + +=============== +Rebuild Indexes +=============== + +.. default-domain:: mongodb + +If you need to rebuild indexes for a collection you can use the +:method:`db.collection.reIndex()` method to rebuild all indexes on a +collection in a single operation. This operation drops all indexes, +including the :ref:`_id index `, and then rebuilds all +indexes. + +.. seealso:: :doc:`/core/indexes` and :doc:`/administration/indexes`. + +Process +------- + +The operation takes the following form: + +.. code-block:: javascript + + db.accounts.reIndex() + +MongoDB will return the following document when the operation +completes: + +.. code-block:: javascript + + { + "nIndexesWas" : 2, + "msg" : "indexes dropped for collection", + "nIndexes" : 2, + "indexes" : [ + { + "key" : { + "_id" : 1, + "tax-id" : 1 + }, + "ns" : "records.accounts", + "name" : "_id_" + } + ], + "ok" : 1 + } + +This shell helper provides a wrapper around the :dbcommand:`reIndex` +:term:`database command`. Your :doc:`client library ` +may have a different or additional interface for this operation. + +Additional Considerations +------------------------- + +.. include:: /includes/note-build-indexes-on-replica-sets.rst diff --git a/source/tutorial/reconfigure-replica-set-with-unavailable-members.txt b/source/tutorial/reconfigure-replica-set-with-unavailable-members.txt index 0eb4a3c54bb..fc1fb095222 100644 --- a/source/tutorial/reconfigure-replica-set-with-unavailable-members.txt +++ b/source/tutorial/reconfigure-replica-set-with-unavailable-members.txt @@ -19,8 +19,7 @@ replica set when a **majority** of members are *not* accessible: You may need to use one of these procedures, for example, in a geographically distributed replica set, where *no* local group of -members can reach a majority. See -:ref:`replica-set-elections-and-network-partitions` for more +members can reach a majority. See :ref:`replica-set-elections` for more information on this situation. .. index:: replica set; reconfiguration diff --git a/source/tutorial/recover-data-following-unexpected-shutdown.txt b/source/tutorial/recover-data-following-unexpected-shutdown.txt index 02f0f609b51..23dccb2f6eb 100644 --- a/source/tutorial/recover-data-following-unexpected-shutdown.txt +++ b/source/tutorial/recover-data-following-unexpected-shutdown.txt @@ -78,9 +78,11 @@ Overview .. warning:: Recovering a member of a replica set. - Do not use this procedure to recover a member of a :term:`replica set`. - Instead you should either restore from a :doc:`backup ` - or perform an initial sync using data from an intact member of the set, as described in :ref:`replica-set-resync-stale-member`. + Do not use this procedure to recover a member of a + :term:`replica set`. Instead you should either restore from + a :doc:`backup ` or perform an initial sync using + data from an intact member of the set, as described in + :doc:`/tutorial/resync-replica-set-member`. There are two processes to repair data files that result from an unexpected shutdown: @@ -186,6 +188,6 @@ situations. If you are not running with journaling, and your database shuts down unexpectedly for *any* reason, you should always proceed *as if* your database is in an inconsistent and likely corrupt state. If at all possible restore -from :doc:`backup ` or, if running as a :term:`replica +from :doc:`backup ` or, if running as a :term:`replica set`, restore by performing an initial sync using data from an intact -member of the set, as described in :ref:`replica-set-resync-stale-member`. +member of the set, as described in :doc:`/tutorial/resync-replica-set-member`. diff --git a/source/tutorial/remove-indexes.txt b/source/tutorial/remove-indexes.txt new file mode 100644 index 00000000000..7e501c09423 --- /dev/null +++ b/source/tutorial/remove-indexes.txt @@ -0,0 +1,46 @@ +.. index:: index; remove +.. _index-remove-index: + +============== +Remove Indexes +============== + +.. default-domain:: mongodb + +To remove an index from a collection use the +:method:`~db.collection.dropIndex()` method and the following +procedure. If you simply need to rebuild indexes you can use the +process described in the :doc:`/tutorial/rebuild-indexes` +document. + +.. seealso:: :doc:`/administration/indexes` and :doc:`/core/indexes` + for more information about indexes and indexing operations in + MongoDB. + +Operations +---------- + +To remove an index, use the :method:`db.collection.dropIndex()` method, +as in the following example: + +.. code-block:: javascript + + db.accounts.dropIndex( { "tax-id": 1 } ) + +This will remove the index on the ``"tax-id"`` field in the ``accounts`` +collection. The shell provides the following document after completing +the operation: + +.. code-block:: javascript + + { "nIndexesWas" : 3, "ok" : 1 } + +Where the value of ``nIndexesWas`` reflects the number of indexes +*before* removing this index. You can also use the +:method:`db.collection.dropIndexes()` to remove *all* indexes, except +for the :ref:`_id index ` from a collection. + +These shell helpers provide wrappers around the +:dbcommand:`dropIndexes` :term:`database command`. Your :doc:`client +library ` may have a different or additional +interface for these operations. diff --git a/source/tutorial/remove-replica-set-member.txt b/source/tutorial/remove-replica-set-member.txt new file mode 100644 index 00000000000..d6aab0bce51 --- /dev/null +++ b/source/tutorial/remove-replica-set-member.txt @@ -0,0 +1,38 @@ +=============================== +Remove Members from Replica Set +=============================== + +.. default-domain:: mongodb + +You may remove a member of a replica set at any time; *however*, for best +results always *shut down* the :program:`mongod` instance before +removing it from a replica set. + +.. versionchanged:: 2.2 + Before 2.2, you *had* to shut down the :program:`mongod` instance + before removing it. While 2.2 removes this requirement, it remains + good practice. + +To remove a member, use the +:method:`rs.remove()` method in the :program:`mongo` shell while +connected to the current :term:`primary`. Issue the +:method:`db.isMaster()` command when connected to *any* member of the +set to determine the current primary. Use a command in either +of the following forms to remove the member: + +.. code-block:: javascript + + rs.remove("mongo2.example.net:27017") + rs.remove("mongo3.example.net") + +This operation disconnects the shell briefly and forces a +re-connection as the :term:`replica set` renegotiates which member +will be primary. The shell displays an error even if this +command succeeds. + +You can re-add a removed member to a replica set at any time using the +:doc:`procedure for adding replica set members `. +Additionally, consider using the :ref:`replica set reconfiguration procedure +` to change the +:data:`~local.system.replset.members[n].host` value to rename a member in a replica set +directly. diff --git a/source/tutorial/replace-replica-set-member.txt b/source/tutorial/replace-replica-set-member.txt new file mode 100644 index 00000000000..b50f03a04a4 --- /dev/null +++ b/source/tutorial/replace-replica-set-member.txt @@ -0,0 +1,32 @@ +============================ +Replace a Replica Set Member +============================ + +.. default-domain:: mongodb + +Use this procedure to replace a member of a replica set when the hostname +has changed. This procedure preserves all existing configuration +for a member, except its hostname/location. + +You may need to replace a replica set member if you want to replace an +existing system and only need to change the hostname rather than +completely replace all configured options related to the previous +member. + +Use :method:`rs.reconfig()` to change the value of the +:data:`~local.system.replset.members[n].host` field to reflect the new hostname or port +number. :method:`rs.reconfig()` will not change the value of +:data:`~local.system.replset.members[n]._id`. + +.. code-block:: javascript + + cfg = rs.conf() + cfg.members[0].host = "mongo2.example.net:27019" + rs.reconfig(cfg) + +.. warning:: + + Any replica set configuration change can trigger the current + :term:`primary` to step down, which forces an :ref:`election `. This + causes the current shell session, and clients connected to this replica set, + to produce an error even when the operation succeeds. diff --git a/source/tutorial/restore-sharded-cluster.txt b/source/tutorial/restore-sharded-cluster.txt index 679f530569f..fd37c30ad4d 100644 --- a/source/tutorial/restore-sharded-cluster.txt +++ b/source/tutorial/restore-sharded-cluster.txt @@ -15,7 +15,7 @@ consider the following tutorials describe backup procedures in detail: - :doc:`/tutorial/backup-sharded-cluster-with-database-dumps` The exact procedure used to restore a database depends on the method -used to capture the backup. See the :doc:`/administration/backups` +used to capture the backup. See the :doc:`/core/backups` document for an overview of backups with MongoDB, as well as :ref:`sharded-cluster-backups` which provides an overview of the high level concepts important for backing up sharded clusters. diff --git a/source/tutorial/restore-single-shard.txt b/source/tutorial/restore-single-shard.txt index dd0fc6b25a5..cc09ef28725 100644 --- a/source/tutorial/restore-single-shard.txt +++ b/source/tutorial/restore-single-shard.txt @@ -17,7 +17,7 @@ backup and restoration of sharded clusters specifically: - :ref:`sharded-cluster-backups` - :doc:`/tutorial/restore-sharded-cluster` -- :doc:`/administration/backups` +- :doc:`/core/backups` Procedure --------- @@ -29,7 +29,7 @@ from this shard since the last backup. If that's the case, you must manually move those chunks, as described in this procedure. 1. Restore the shard as you would any other :program:`mongod` - instance. See :doc:`/administration/backups` for overviews of these + instance. See :doc:`/core/backups` for overviews of these procedures. #. For all chunks that migrated away from this shard, you do not need diff --git a/source/tutorial/resync-replica-set-member.txt b/source/tutorial/resync-replica-set-member.txt new file mode 100644 index 00000000000..e4e242d9f15 --- /dev/null +++ b/source/tutorial/resync-replica-set-member.txt @@ -0,0 +1,92 @@ +================================ +Resync a Member of a Replica Set +================================ + +.. default-domain:: mongodb + +When a secondary's replication process falls so far behind that +:term:`primary` overwrites oplog entries that the secondary has not yet +replicated, that secondary cannot catch up and becomes "stale." When +that occurs, you must completely resynchronize the member by removing +its data and performing an initial sync. + +To do so, use one of the following approaches: + +- Restart the :program:`mongod` with an empty data directory and let + MongoDB's normal initial syncing feature restore the data. This + is the more simple option, but may take longer to replace the data. + + See :ref:`replica-set-auto-resync-stale-member`. + +- Restart the machine with a copy of a recent data directory from + another member in the :term:`replica set`. This procedure can replace + the data more quickly but requires more manual steps. + + See :ref:`replica-set-resync-by-copying`. + +.. index:: replica set; resync +.. _replica-set-auto-resync-stale-member: + +Automatically Resync a Stale Member +----------------------------------- + +This procedure relies on MongoDB's regular process for initial +sync. This will restore the data on the stale member to reflect the +current state of the set. For an overview of MongoDB initial sync +process, see the :ref:`replica-set-syncing` section. + +To resync the stale member: + +1. Stop the stale member's :program:`mongod` instance. On Linux + systems you can use :option:`mongod --shutdown` Set + :option:`--dbpath ` to the member's data + directory, as in the following: + + .. code-block:: sh + + mongod --dbpath /data/db/ --shutdown + +#. Delete all data and sub-directories from the member's data + directory. By removing the data :setting:`dbpath`, MongoDB will + perform a complete resync. Consider making a backup first. + +#. Restart the :program:`mongod` instance on the member. For example: + + .. code-block:: sh + + mongod --dbpath /data/db/ --replSet rsProduction + + At this point, the :program:`mongod` will perform an initial + sync. The length of the initial sync may process depends on the + size of the database and network connection between members of the + replica set. + + Initial sync operations can impact the other members of the set and + create additional traffic to the primary, and can only occur if + another member of the set is accessible and up to date. + +.. index:: replica set; resync +.. _replica-set-resync-by-copying: + +Resync by Copying All Datafiles from Another Member +--------------------------------------------------- + +This approach uses a copy of the data files from an existing member of +the replica set, or a back of the data files to "seed" the stale member. + +The copy or backup of the data files **must** be sufficiently recent +to allow the new member to catch up with the :term:`oplog`, otherwise +the member would need to perform an initial sync. + +.. note:: + + In most cases you cannot copy data files from a running + :program:`mongod` instance to another, because the data files will + change during the file copy operation. Consider the + :doc:`/core/backups` documentation for several methods + that you can use to capture a consistent snapshot of a running + :program:`mongod` instance. + +After you have copied the data files from the "seed" source, start the +:program:`mongod` instance and allow it to apply all operations from +the oplog until it reflects the current state of the replica set. diff --git a/source/tutorial/select-shard-key.txt b/source/tutorial/select-shard-key.txt new file mode 100644 index 00000000000..a904ba5d3b1 --- /dev/null +++ b/source/tutorial/select-shard-key.txt @@ -0,0 +1,54 @@ +.. index:: shard key + single: sharding; shard key +.. _sharding-shard-key-selection: + +================== +Select a Shard Key +================== + +.. default-domain:: mongodb + +This document gives guidelines for selecting a shard key. Choosing the +correct shard key can have a great impact on the performance, +capability, and functioning of your database and cluster. Appropriate +shard key choice depends on the schema of your data and the way that +your application queries and writes data to the database. + +Use the following guidelines when creating a shard key. + +Create a Shard Key that is Easily Divisible +------------------------------------------- + +An easily divisible shard key makes it easy for MongoDB to distribute +content among the shards. Shard keys that have a limited number of +possible values can result in chunks that are "unsplitable." See the +:ref:`sharding-shard-key-cardinality` section for more information. + +Create a Shard Key that has High Randomness +------------------------------------------- + +A shard key with high randomness prevents any single shard from becoming +a bottleneck and will distribute write operations among the cluster. + +Conversely, a shard keys that has a high correlation with insert time is +a poor choice. For more information, see the +:ref:`sharding-shard-key-write-scaling`. + +Create a Shard Key that Targets a Single Shard +---------------------------------------------- + +A shard key that targets a single shard makes it possible for the +:program:`mongos` program to return most query operations directly from +a single *specific* :program:`mongod` instance. Your shard key should be +the primary field used by your queries. Fields with a high degree of +"randomness" are poor choices for this reason. For examples, see +:ref:`sharding-shard-key-query-isolation`. + +Create a Special Purpose or Compound Key +---------------------------------------- + +The challenge when selecting a shard key is that there is not always +an obvious choice. Often, an existing field in your collection may not be +the optimal key. In those situations, computing a special purpose +shard key into an additional field or using a compound shard key may +help produce one that is more ideal. diff --git a/source/tutorial/shard-collection-with-a-hashed-shard-key.txt b/source/tutorial/shard-collection-with-a-hashed-shard-key.txt new file mode 100644 index 00000000000..d32938a2a2a --- /dev/null +++ b/source/tutorial/shard-collection-with-a-hashed-shard-key.txt @@ -0,0 +1,53 @@ +.. todo The "Hashed Sharding" topic was moved with few changes from +.. core/sharded-clusters.txt to here. Changes in wording are eventually +.. needed. + +=============== +Hashed Sharding +=============== + +.. default-domain:: mongodb + +.. versionadded:: 2.4 + +:ref:`Hashed shard keys ` use a +:ref:`hashed index ` of a single field as the +:term:`shard key` to partition data across your sharded cluster. + +Procedure +--------- + +To shard a collection using a hashed shard key, issue an operation in +the :program:`mongo` shell that resembles the following: + +.. code-block:: javascript + + sh.shardCollection( "records.active", { a: "hashed" } ) + +This operation shards the ``active`` collection in the ``records`` +database, using a hash of the ``a`` field as the shard key. + +Additional Considerations +------------------------- + +The field you choose as your hashed shard key should have a good +cardinality, or large number of different values. Hashed keys work +well with fields that increase monotonically like :term:`ObjectId` +values or timestamps. + +If you shard an empty collection using a hashed +shard key, MongoDB will automatically create and migrate chunks so +that each shard has two chunks. You can control how many chunks +MongoDB will create with the ``numInitialChunks`` parameter to +:dbcommand:`shardCollection`. + +See :ref:`index-hashed-index` for limitations on hashed indexes. + +.. include:: /includes/warning-hashed-index-floating-point.rst + +.. warning:: + + Hashed shard keys are only supported by the MongoDB 2.4 and greater + versions of the :program:`mongos` program. After sharding a + collection with a hashed shard key, you must use the MongoDB 2.4 or + greater :program:`mongos` instances in your sharded cluster. diff --git a/source/tutorial/shard-gridfs-data.txt b/source/tutorial/shard-gridfs-data.txt new file mode 100644 index 00000000000..70b7a30a62f --- /dev/null +++ b/source/tutorial/shard-gridfs-data.txt @@ -0,0 +1,54 @@ +======================= +Shard GridFS Data Store +======================= + +.. default-domain:: mongodb + +When sharding a :term:`GridFS` store, consider the following: + +``files`` Collection +-------------------- + +Most deployments will not need to shard the ``files`` +collection. The ``files`` collection is typically small, and only +contains metadata. None of the required keys for GridFS lend +themselves to an even distribution in a sharded situation. If you +*must* shard the ``files`` collection, use the ``_id`` field +possibly in combination with an application field + +Leaving ``files`` unsharded means that all the file metadata +documents live on one shard. For production GridFS stores you *must* +store the ``files`` collection on a replica set. + +``chunks`` Collection +--------------------- + +To shard the ``chunks`` collection by ``{ files_id : 1 , n : 1 }``, +issue commands similar to the following: + +.. code-block:: javascript + + db.fs.chunks.ensureIndex( { files_id : 1 , n : 1 } ) + + db.runCommand( { shardCollection : "test.fs.chunks" , key : { files_id : 1 , n : 1 } } ) + +You may also want shard using just the ``file_id`` field, as in the +following operation: + +.. code-block:: javascript + + db.runCommand( { shardCollection : "test.fs.chunks" , key : { files_id : 1 } } ) + +.. note:: + + .. versionchanged:: 2.2 + + Before 2.2, you had to create an additional index on ``files_id`` + to shard using *only* this field. + +The default ``files_id`` value is an :term:`ObjectId`, as a result +the values of ``files_id`` are always ascending, and applications +will insert all new GridFS data to a single chunk and shard. If +your write load is too high for a single server to handle, consider +a different shard key or use a different value for different value +for ``_id`` in the ``files`` collection. diff --git a/source/tutorial/specify-language-for-text-index.txt b/source/tutorial/specify-language-for-text-index.txt new file mode 100644 index 00000000000..aace439a249 --- /dev/null +++ b/source/tutorial/specify-language-for-text-index.txt @@ -0,0 +1,28 @@ +================================= +Specify a Language for Text Index +================================= + +.. default-domain:: mongodb + +The default language associated with the indexed data determines the +list of stop words and the rules for the stemmer and tokenizer. The +default language for the indexed data is ``english``. + +To specify a different language, use the ``default_language`` option +when creating the ``text`` index. See :ref:`text-search-languages` for +the languages available for ``default_language``. + +The following example creates a ``text`` index on the +``content`` field and sets the ``default_language`` to +``spanish``: + +.. code-block:: javascript + + db.collection.ensureIndex( + { content : "text" }, + { default_language: "spanish" } + ) + +.. seealso:: + + :doc:`/tutorial/create-text-index-on-multi-language-collection` diff --git a/source/tutorial/store-javascript-function-on-server.txt b/source/tutorial/store-javascript-function-on-server.txt new file mode 100644 index 00000000000..3dca5c506ad --- /dev/null +++ b/source/tutorial/store-javascript-function-on-server.txt @@ -0,0 +1,69 @@ +========================================= +Store a JavaScript Function on the Server +========================================= + +.. default-domain:: mongodb + +.. note:: + + We do **not** recommend using server-side stored functions if + possible. + +There is a special system collection named ``system.js`` that can store +JavaScript functions for reuse. + +To store a function, you can use the :method:`db.collection.save()`, as +in the following example: + +.. code-block:: javascript + + db.system.js.save( + { + _id : "myAddFunction" , + value : function (x, y){ return x + y; } + } + ); + +- The ``_id`` field holds the name of the function and is unique per + database. + +- The ``value`` field holds the function definition + +Once you save a function in the ``system.js`` collection, you can use +the function from any JavaScript context (e.g. :dbcommand:`eval` +command or the:program:`mongo` shell method :method:`db.eval()`, +:operator:`$where` operator, :dbcommand:`mapReduce` or :program:`mongo` +shell method :method:`db.collection.mapReduce()`). + +Consider the following example from the :program:`mongo` shell that +first saves a function named ``echoFunction`` to the ``system.js`` +collection and calls the function using :method:`db.eval()` +method: + +.. code-block:: javascript + + db.system.js.save( + { _id: "echoFunction", + value : function(x) { return x; } + } + ) + + db.eval( "echoFunction( 'test' )" ) + +See ``_ for a full example. + +.. versionadded:: 2.1 + In the :program:`mongo` shell, you can use + :method:`db.loadServerScripts()` to load all the scripts saved in + the ``system.js`` collection for the current db. Once loaded, you + can invoke the functions directly in the shell, as in the following + example: + +.. code-block:: javascript + + db.loadServerScripts(); + + echoFunction(3); + + myAddFunction(3, 5); + diff --git a/source/tutorial/troubleshoot-map-function.txt b/source/tutorial/troubleshoot-map-function.txt new file mode 100644 index 00000000000..21d20113cbf --- /dev/null +++ b/source/tutorial/troubleshoot-map-function.txt @@ -0,0 +1,84 @@ +============================= +Troubleshoot the Map Function +============================= + +.. default-domain:: mongodb + +The ``map`` function is a JavaScript function that associates or “maps” +a value with a key and emits the key and value pair during a +:doc:`map-reduce ` operation. + +To verify the ``key`` and ``value`` pairs emitted by the ``map`` +function, write your own ``emit`` function. + +Consider a collection ``orders`` that contains documents of the +following prototype: + +.. code-block:: javascript + + { + _id: ObjectId("50a8240b927d5d8b5891743c"), + cust_id: "abc123", + ord_date: new Date("Oct 04, 2012"), + status: 'A', + price: 250, + items: [ { sku: "mmm", qty: 5, price: 2.5 }, + { sku: "nnn", qty: 5, price: 2.5 } ] + } + +#. Define the ``map`` function that maps the ``price`` to the + ``cust_id`` for each document and emits the ``cust_id`` and ``price`` + pair: + + .. code-block:: javascript + + var map = function() { + emit(this.cust_id, this.price); + }; + +#. Define the ``emit`` function to print the key and value: + + .. code-block:: javascript + + var emit = function(key, value) { + print("emit"); + print("key: " + key + " value: " + tojson(value)); + } + +#. Invoke the ``map`` function with a single document from the ``orders`` + collection: + + .. code-block:: javascript + + var myDoc = db.orders.findOne( { _id: ObjectId("50a8240b927d5d8b5891743c") } ); + map.apply(myDoc); + +#. Verify the key and value pair is as you expected. + + .. code-block:: javascript + + emit + key: abc123 value:250 + +#. Invoke the ``map`` function with multiple documents from the ``orders`` + collection: + + .. code-block:: javascript + + var myCursor = db.orders.find( { cust_id: "abc123" } ); + + while (myCursor.hasNext()) { + var doc = myCursor.next(); + print ("document _id= " + tojson(doc._id)); + map.apply(doc); + print(); + } + +#. Verify the key and value pairs are as you expected. + +.. seealso:: + + The ``map`` function must meet various requirements. For a list of all + the requirements for the ``map`` function, see :dbcommand:`mapReduce`, + or the :program:`mongo` shell helper method + :method:`db.collection.mapReduce()`. diff --git a/source/tutorial/troubleshoot-reduce-function.txt b/source/tutorial/troubleshoot-reduce-function.txt new file mode 100644 index 00000000000..762a04f8220 --- /dev/null +++ b/source/tutorial/troubleshoot-reduce-function.txt @@ -0,0 +1,221 @@ +================================ +Troubleshoot the Reduce Function +================================ + +.. default-domain:: mongodb + +The ``reduce`` function is a JavaScript function that “reduces” to a +single object all the values associated with a particular key during a +:doc:`map-reduce ` operation. The ``reduce`` function +must meet various requirements. This tutorial helps verify that the +``reduce`` function meets the following criteria: + +- The ``reduce`` function must return an object whose *type* must be + **identical** to the type of the ``value`` emitted by the ``map`` + function. + +- The order of the elements in the ``valuesArray`` should not affect + the output of the ``reduce`` function. + +- The ``reduce`` function must be *idempotent*. + +For a list of all the requirements for the ``reduce`` function, see +:dbcommand:`mapReduce`, or the :program:`mongo` shell helper method +:method:`db.collection.mapReduce()`. + +Confirm Output Type +------------------- + +You can test that the ``reduce`` function returns a value that is the +same type as the value emitted from the ``map`` function. + +#. Define a ``reduceFunction1`` function that takes the arguments + ``keyCustId`` and ``valuesPrices``. ``valuesPrices`` is an array of + integers: + + .. code-block:: javascript + + var reduceFunction1 = function(keyCustId, valuesPrices) { + return Array.sum(valuesPrices); + }; + +#. Define a sample array of integers: + + .. code-block:: javascript + + var myTestValues = [ 5, 5, 10 ]; + +#. Invoke the ``reduceFunction1`` with ``myTestValues``: + + .. code-block:: javascript + + reduceFunction1('myKey', myTestValues); + +#. Verify the ``reduceFunction1`` returned an integer: + + .. code-block:: javascript + + 20 + +#. Define a ``reduceFunction2`` function that takes the arguments + ``keySKU`` and ``valuesCountObjects``. ``valuesCountObjects`` is an array of + documents that contain two fields ``count`` and ``qty``: + + .. code-block:: javascript + + var reduceFunction2 = function(keySKU, valuesCountObjects) { + reducedValue = { count: 0, qty: 0 }; + + for (var idx = 0; idx < valuesCountObjects.length; idx++) { + reducedValue.count += valuesCountObjects[idx].count; + reducedValue.qty += valuesCountObjects[idx].qty; + } + + return reducedValue; + }; + +#. Define a sample array of documents: + + .. code-block:: javascript + + var myTestObjects = [ + { count: 1, qty: 5 }, + { count: 2, qty: 10 }, + { count: 3, qty: 15 } + ]; + +#. Invoke the ``reduceFunction2`` with ``myTestObjects``: + + .. code-block:: javascript + + reduceFunction2('myKey', myTestObjects); + +#. Verify the ``reduceFunction2`` returned a document with exactly the + ``count`` and the ``qty`` field: + + .. code-block:: javascript + + { "count" : 6, "qty" : 30 } + +Ensure Insensitivity to the Order of Mapped Values +-------------------------------------------------- + +The ``reduce`` function takes a ``key`` and a ``values`` array as its +argument. You can test that the result of the ``reduce`` function does +not depend on the order of the elements in the ``values`` array. + +#. Define a sample ``values1`` array and a sample ``values2`` array + that only differ in the order of the array elements: + + .. code-block:: javascript + + var values1 = [ + { count: 1, qty: 5 }, + { count: 2, qty: 10 }, + { count: 3, qty: 15 } + ]; + + var values2 = [ + { count: 3, qty: 15 }, + { count: 1, qty: 5 }, + { count: 2, qty: 10 } + ]; + +#. Define a ``reduceFunction2`` function that takes the arguments + ``keySKU`` and ``valuesCountObjects``. ``valuesCountObjects`` is an array of + documents that contain two fields ``count`` and ``qty``: + + .. code-block:: javascript + + var reduceFunction2 = function(keySKU, valuesCountObjects) { + reducedValue = { count: 0, qty: 0 }; + + for (var idx = 0; idx < valuesCountObjects.length; idx++) { + reducedValue.count += valuesCountObjects[idx].count; + reducedValue.qty += valuesCountObjects[idx].qty; + } + + return reducedValue; + }; + +#. Invoke the ``reduceFunction2`` first with ``values1`` and then with + ``values2``: + + .. code-block:: javascript + + reduceFunction2('myKey', values1); + reduceFunction2('myKey', values2); + +#. Verify the ``reduceFunction2`` returned the same result: + + .. code-block:: javascript + + { "count" : 6, "qty" : 30 } + +Ensure Reduce Function Idempotence +---------------------------------- + +Because the map-reduce operation may call a ``reduce`` multiple times +for the same key, the ``reduce`` function must return a value of the +same type as the value emitted from the ``map`` function. You can test +that the ``reduce`` function process "reduced" values without +affecting the *final* value. + +#. Define a ``reduceFunction2`` function that takes the arguments + ``keySKU`` and ``valuesCountObjects``. ``valuesCountObjects`` is an array of + documents that contain two fields ``count`` and ``qty``: + + .. code-block:: javascript + + var reduceFunction2 = function(keySKU, valuesCountObjects) { + reducedValue = { count: 0, qty: 0 }; + + for (var idx = 0; idx < valuesCountObjects.length; idx++) { + reducedValue.count += valuesCountObjects[idx].count; + reducedValue.qty += valuesCountObjects[idx].qty; + } + + return reducedValue; + }; + +#. Define a sample key: + + .. code-block:: javascript + + var myKey = 'myKey'; + +#. Define a sample ``valuesIdempotent`` array that contains an element that is a + call to the ``reduceFunction2`` function: + + .. code-block:: javascript + + var valuesIdempotent = [ + { count: 1, qty: 5 }, + { count: 2, qty: 10 }, + reduceFunction2(myKey, [ { count:3, qty: 15 } ] ) + ]; + +#. Define a sample ``values1`` array that combines the values passed to + ``reduceFunction2``: + + .. code-block:: javascript + + var values1 = [ + { count: 1, qty: 5 }, + { count: 2, qty: 10 }, + { count: 3, qty: 15 } + ]; + +#. Invoke the ``reduceFunction2`` first with ``myKey`` and + ``valuesIdempotent`` and then with ``myKey`` and ``values1``: + + .. code-block:: javascript + + reduceFunction2(myKey, valuesIdempotent); + reduceFunction2(myKey, values1); + +#. Verify the ``reduceFunction2`` returned the same result: + + .. code-block:: javascript + + { "count" : 6, "qty" : 30 } diff --git a/source/tutorial/troubleshoot-replica-sets.txt b/source/tutorial/troubleshoot-replica-sets.txt new file mode 100644 index 00000000000..3995228ff9d --- /dev/null +++ b/source/tutorial/troubleshoot-replica-sets.txt @@ -0,0 +1,321 @@ +========================= +Troubleshoot Replica Sets +========================= + +.. default-domain:: mongodb + +This section describes common strategies for troubleshooting +:term:`replica sets `. + +.. _replica-set-troubleshooting-check-replication-status: + +Check Replica Set Status +------------------------ + +To display the current state of the replica set and current state of +each member, run the :method:`rs.status()` method in a :program:`mongo` +shell connected to the replica set's :term:`primary`. For descriptions +of the information displayed by :method:`rs.status()`, see +:doc:`/reference/replica-status`. + +.. note:: + + The :method:`rs.status()` method is a wrapper that runs the + :dbcommand:`replSetGetStatus` database command. + +.. _replica-set-replication-lag: + +Check the Replication Lag +------------------------- + +Replication lag is a delay between an operation on the :term:`primary` +and the application of that operation from the :term:`oplog` to the +:term:`secondary`. Replication lag can be a significant issue and can +seriously affect MongoDB :term:`replica set` deployments. Excessive +replication lag makes "lagged" members ineligible to quickly become +primary and increases the possibility that distributed +read operations will be inconsistent. + +To check the current length of replication lag: + +- In a :program:`mongo` shell connected to the primary, call the + :method:`db.printSlaveReplicationInfo()` method. + + The returned document displays the ``syncedTo`` value for each member, + which shows you when each member last read from the oplog, as shown in the following + example: + + .. code-block:: javascript + + source: m1.example.net:30001 + syncedTo: Tue Oct 02 2012 11:33:40 GMT-0400 (EDT) + = 7475 secs ago (2.08hrs) + source: m2.example.net:30002 + syncedTo: Tue Oct 02 2012 11:33:40 GMT-0400 (EDT) + = 7475 secs ago (2.08hrs) + + .. note:: + + The :method:`rs.status()` method is a wrapper around the + :dbcommand:`replSetGetStatus` database command. + +- Monitor the rate of replication by watching the oplog time in the + "replica" graph in the `MongoDB Monitoring Service`_. For more + information see the `documentation for MMS`_. + +.. _`MongoDB Monitoring Service`: http://mms.10gen.com/ +.. _`documentation for MMS`: http://mms.10gen.com/help/ + +Possible causes of replication lag include: + +- **Network Latency** + + Check the network routes between the members of your set to ensure + that there is no packet loss or network routing issue. + + Use tools including ``ping`` to test latency between set + members and ``traceroute`` to expose the routing of packets + network endpoints. + +- **Disk Throughput** + + If the file system and disk device on the secondary is + unable to flush data to disk as quickly as the primary, then + the secondary will have difficulty keeping state. Disk-related + issues are incredibly prevalent on multi-tenant systems, including + vitalized instances, and can be transient if the system accesses + disk devices over an IP network (as is the case with Amazon's + EBS system.) + + Use system-level tools to assess disk status, including + ``iostat`` or ``vmstat``. + +- **Concurrency** + + In some cases, long-running operations on the primary can block + replication on secondaries. For best results, configure + :ref:`write concern ` to require confirmation of + replication to secondaries, as described in + :ref:`replica-set-write-concern`. This prevents write operations from + returning if replication cannot keep up with the write load. + + Use the :term:`database profiler` to see if there are slow queries + or long-running operations that correspond to the incidences of lag. + +- **Appropriate Write Concern** + + If you are performing a large data ingestion or bulk load operation + that requires a large number of writes to the primary, particularly + with :ref:`unacknowledged write concern `, the + secondaries will not be able to read the oplog fast enough to keep + up with changes. + + To prevent this, require :ref:`write acknowledgment or journaled + write concern ` after every 100, + 1,000, or an another interval to provide an opportunity for + secondaries to catch up with the primary. + + For more information see: + + - :ref:`replica-set-write-concern` + - :ref:`replica-set-oplog-sizing` + +.. _replica-set-troubleshooting-check-connection: + +Test Connections Between all Members +------------------------------------ + +All members of a :term:`replica set` must be able to connect to every +other member of the set to support replication. Always verify +connections in both "directions." Networking topologies and firewall +configurations prevent normal and required connectivity, which can +block replication. + +Consider the following example of a bidirectional test of networking: + +.. example:: Given a replica set with three members running on three separate + hosts: + + - ``m1.example.net`` + - ``m2.example.net`` + - ``m3.example.net`` + + 1. Test the connection from ``m1.example.net`` to the other hosts + with the following operation set ``m1.example.net``: + + .. code-block:: sh + + mongo --host m2.example.net --port 27017 + + mongo --host m3.example.net --port 27017 + + #. Test the connection from ``m2.example.net`` to the other two + hosts with the following operation set from ``m2.example.net``, + as in: + + .. code-block:: sh + + mongo --host m1.example.net --port 27017 + + mongo --host m3.example.net --port 27017 + + You have now tested the connection between + ``m2.example.net`` and ``m1.example.net`` in both directions. + + #. Test the connection from ``m3.example.net`` to the other two + hosts with the following operation set from the + ``m3.example.net`` host, as in: + + .. code-block:: sh + + mongo --host m1.example.net --port 27017 + + mongo --host m2.example.net --port 27017 + + If any connection, in any direction fails, check your networking + and firewall configuration and reconfigure your environment to + allow these connections. + +.. _replica-set-troubleshooting-check-oplog-size: + +Check the Size of the Oplog +--------------------------- + +A larger :term:`oplog` can give a replica set a greater tolerance for +lag, and make the set more resilient. + +To check the size of the oplog for a given :term:`replica set` member, +connect to the member in a :program:`mongo` shell and run the +:method:`db.printReplicationInfo()` method. + +The output displays the size of the oplog and the date ranges of the +operations contained in the oplog. In the following example, the oplog +is about 10MB and is able to fit about 26 hours (94400 seconds) of +operations: + +.. code-block:: javascript + + configured oplog size: 10.10546875MB + log length start to end: 94400 (26.22hrs) + oplog first event time: Mon Mar 19 2012 13:50:38 GMT-0400 (EDT) + oplog last event time: Wed Oct 03 2012 14:59:10 GMT-0400 (EDT) + now: Wed Oct 03 2012 15:00:21 GMT-0400 (EDT) + +The oplog should be long enough to hold all transactions for the +longest downtime you expect on a secondary. At a minimum, an oplog +should be able to hold minimum 24 hours of operations; however, many +users prefer to have 72 hours or even a week's work of operations. + +For more information on how oplog size affects operations, see: + +- The :ref:`replica-set-oplog-sizing` topic in the :doc:`/core/replication` document. +- The :ref:`replica-set-delayed-members` topic in this document. +- The :ref:`replica-set-replication-lag` topic in this document. + +.. note:: You normally want the oplog to be the same size on all + members. If you resize the oplog, resize it on all members. + +To change oplog size, see the :doc:`/tutorial/change-oplog-size` +tutorial. + + +Oplog Entry Timestamp Error +--------------------------- + +.. todo:: link this topic to assertion 13290 once assertion guide exists. + +Consider the following error in :program:`mongod` output and logs: + +.. code-block:: javascript + + replSet error fatal couldn't query the local local.oplog.rs collection. Terminating mongod after 30 seconds. + [rsStart] bad replSet oplog entry? + +Often, an incorrectly typed value in the ``ts`` field in the last +:term:`oplog` entry causes this error. The correct data type is +Timestamp. + +Check the type of the ``ts`` value using the following two queries +against the oplog collection: + +.. code-block:: javascript + + db = db.getSiblingDB("local") + db.oplog.rs.find().sort({$natural:-1}).limit(1) + db.oplog.rs.find({ts:{$type:17}}).sort({$natural:-1}).limit(1) + +The first query returns the last document in the oplog, while the +second returns the last document in the oplog where the ``ts`` value +is a Timestamp. The :operator:`$type` operator allows you to select +:term:`BSON type ` 17, is the Timestamp data type. + +If the queries don't return the same document, then the last document in +the oplog has the wrong data type in the ``ts`` field. + +.. example:: + + If the first query returns this as the last oplog entry: + + .. code-block:: javascript + + { "ts" : {t: 1347982456000, i: 1}, + "h" : NumberLong("8191276672478122996"), + "op" : "n", + "ns" : "", + "o" : { "msg" : "Reconfig set", "version" : 4 } } + + And the second query returns this as the last entry where ``ts`` + has the ``Timestamp`` type: + + .. code-block:: javascript + + { "ts" : Timestamp(1347982454000, 1), + "h" : NumberLong("6188469075153256465"), + "op" : "n", + "ns" : "", + "o" : { "msg" : "Reconfig set", "version" : 3 } } + + Then the value for the ``ts`` field in the last oplog entry is of the + wrong data type. + +To set the proper type for this value and resolve this issue, +use an update operation that resembles the following: + +.. code-block:: javascript + + db.oplog.rs.update( { ts: { t:1347982456000, i:1 } }, + { $set: { ts: new Timestamp(1347982456000, 1)}}) + +Modify the timestamp values as needed based on your oplog entry. This +operation may take some period to complete because the update must +scan and pull the entire oplog into memory. + +Duplicate Key Error on ``local.slaves`` +--------------------------------------- + +The *duplicate key on local.slaves* error, occurs when a +:term:`secondary` or :term:`slave` changes its hostname and the +:term:`primary` or :term:`master` tries to update its ``local.slaves`` +collection with the new name. The update fails because it contains the +same ``_id`` value as the document containing the previous hostname. The +error itself will resemble the following. + +.. code-block:: none + + exception 11000 E11000 duplicate key error index: local.slaves.$_id_ dup key: { : ObjectId('') } 0ms + +This is a benign error and does not affect replication operations on +the :term:`secondary` or :term:`slave`. + +To prevent the error from appearing, drop the ``local.slaves`` +collection from the :term:`primary` or :term:`master`, with the +following sequence of operations in the :program:`mongo` shell: + +.. code-block:: javascript + + use local + db.slaves.drop() + +The next time a :term:`secondary` or :term:`slave` polls the +:term:`primary` or :term:`master`, the :term:`primary` or :term:`master` +recreates the ``local.slaves`` collection. diff --git a/source/tutorial/use-capped-collections-for-fast-writes-and-reads.txt b/source/tutorial/use-capped-collections-for-fast-writes-and-reads.txt new file mode 100644 index 00000000000..e9283b5ff05 --- /dev/null +++ b/source/tutorial/use-capped-collections-for-fast-writes-and-reads.txt @@ -0,0 +1,31 @@ +================================================ +Use Capped Collections for Fast Writes and Reads +================================================ + +.. default-domain:: mongodb + +Use Capped Collections for Fast Writes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:doc:`/core/capped-collections` are circular, fixed-size collections +that keep documents well-ordered, even without the use of an +index. This means that capped collections can receive very high-speed +writes and sequential reads. + +These collections are particularly useful for keeping log files but are +not limited to that purpose. Use capped collections where appropriate. + +Use Natural Order for Fast Reads +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To return documents in the order they exist on disk, return sorted +operations using the :operator:`$natural` operator. On a capped +collection, this also returns the documents in the order in which they +were written. + +:term:`Natural +order ` does not use indexes but can be fast for +operations when you want to select the first or last items on +disk. + +.. seealso:: :method:`~cursor.sort()` and :method:`~cursor.limit()`. diff --git a/source/tutorial/use-index-operators-to-optimize-query-performance.txt b/source/tutorial/use-index-operators-to-optimize-query-performance.txt new file mode 100644 index 00000000000..f52d7dbc1da --- /dev/null +++ b/source/tutorial/use-index-operators-to-optimize-query-performance.txt @@ -0,0 +1,63 @@ +================================================================= +Use Index Operators and Projections to Optimize Query Performance +================================================================= + +.. default-domain:: mongodb + +Limit the Number of Query Results to Reduce Network Demand +---------------------------------------------------------- + +MongoDB :term:`cursors ` return results in groups of multiple +documents. If you know the number of results you want, you can reduce +the demand on network resources by issuing the :method:`cursor.limit()` +method. + +This is typically used in conjunction with sort operations. For example, +if you need only 10 results from your query to the ``posts`` +collection, you would issue the following command: + +.. code-block:: javascript + + db.posts.find().sort( { timestamp : -1 } ).limit(10) + +For more information on limiting results, see :method:`cursor.limit()` + +Use Projections to Return Only Necessary Data +--------------------------------------------- + +When you need only a subset of fields from documents, you can achieve better +performance by returning only the fields you need: + +For example, if in your query to the ``posts`` collection, you need only +the ``timestamp``, ``title``, ``author``, and ``abstract`` fields, you +would issue the following command: + +.. code-block:: javascript + + db.posts.find( {}, { timestamp : 1 , title : 1 , author : 1 , abstract : 1} ).sort( { timestamp : -1 } ) + +For more information on using projections, see +:ref:`read-operations-projection`. + +Use ``$hint`` to Select a Particular Index +------------------------------------------ + +In most cases the :ref:`query optimizer +` selects the optimal index for a +specific operation; however, you can force MongoDB to use a specific +index using the :method:`hint() ` method. Use +:method:`hint() ` to support performance testing, or on +some queries where you must select a field or field included in +several indexes. + +Use the Increment Operator to Perform Operations Server-Side +------------------------------------------------------------ + +Use MongoDB's :operator:`$inc` operator to increment or decrement +values in documents. The operator increments the value of the field on +the server side, as an alternative to selecting a document, making +simple modifications in the client and then writing the entire +document to the server. The :operator:`$inc` operator can also help +avoid race conditions, which would result when two application +instances queried for a document, manually incremented a field, and +saved the entire document back at the same time. diff --git a/source/tutorial/use-indexes-to-optimize-query-performance.txt b/source/tutorial/use-indexes-to-optimize-query-performance.txt new file mode 100644 index 00000000000..4176081c53e --- /dev/null +++ b/source/tutorial/use-indexes-to-optimize-query-performance.txt @@ -0,0 +1,48 @@ +=================================== +Use Indexes to Optimize Performance +=================================== + +.. default-domain:: mongodb + +For commonly issued queries, create :doc:`indexes `. If a +query searches multiple fields, create a :ref:`compound index +`. Scanning an index is much faster than scanning a +collection. The indexes structures are smaller than the documents +reference, and store references in order. + +.. example:: If you have a ``posts`` collection containing blog posts, + and if you regularly issue a query that sorts on the ``author_name`` + field, then you can optimize the query by creating an index on the + ``author_name`` field: + + .. code-block:: javascript + + db.posts.ensureIndex( { author_name : 1 } ) + +Indexes also improve efficiency on queries that routinely sort on a +given field. + +.. example:: If you regularly issue a query that sorts on the + ``timestamp`` field, then you can optimize the query by creating an + index on the ``timestamp`` field: + + Creating this index: + + .. code-block:: javascript + + db.posts.ensureIndex( { timestamp : 1 } ) + + Optimizes this query: + + .. code-block:: javascript + + db.posts.find().sort( { timestamp : -1 } ) + +Because MongoDB can read indexes in both ascending and descending +order, the direction of a single-key index does not matter. + +Indexes support queries, update operations, and some phases of the +:ref:`aggregation pipeline +`. + +.. include:: /includes/fact-bindata-storage-optimization.rst diff --git a/source/use-cases/hierarchical-aggregation.txt b/source/use-cases/hierarchical-aggregation.txt index 23d8379b0d2..e22377f9548 100644 --- a/source/use-cases/hierarchical-aggregation.txt +++ b/source/use-cases/hierarchical-aggregation.txt @@ -49,8 +49,9 @@ output of each job: different from the operation that aggregates hourly statistics into the daily collection. -.. seealso:: :term:`map-reduce` and the :doc:`/applications/map-reduce` - page for more information on the Map-reduce data aggregation paradigm. +.. seealso:: :term:`map-reduce`, :dbcommand:`mapReduce`, and the + :doc:`/core/map-reduce` page for more information on the Map-reduce + data aggregation paradigm. Schema ------ diff --git a/source/use-cases/product-catalog.txt b/source/use-cases/product-catalog.txt index 6c5055a5448..d4d8f5b4bcd 100644 --- a/source/use-cases/product-catalog.txt +++ b/source/use-cases/product-catalog.txt @@ -558,4 +558,4 @@ or results = db.product.find(..., read_preference=pymongo.SECONDARY_ONLY) -.. seealso:: ":ref:`Replica Set Read Preference `." +.. seealso:: :doc:`Replica Set Read Preference ` diff --git a/source/use-cases/storing-log-data.txt b/source/use-cases/storing-log-data.txt index c24ff5c44dd..36cd1eecea9 100644 --- a/source/use-cases/storing-log-data.txt +++ b/source/use-cases/storing-log-data.txt @@ -274,7 +274,7 @@ trade-off between safety and speed. penalty incurred by more stringent write concern across a group of inserts. -.. seealso:: ":ref:`Write Concern for Replica Sets `" +.. seealso:: :doc:`Write Concern for Replica Sets ` and :dbcommand:`getLastError`. Finding All Events for a Particular Page @@ -580,10 +580,10 @@ a pipeline that: ":ref:`sharding considerations `." of this document for additional recommendations for using sharding. -.. seealso:: ":doc:`/applications/aggregation`" +.. seealso:: ":doc:`/core/aggregation`" .. [#sql-aggregation-equivalents] To translate statements from the - :doc:`aggregation framework ` to SQL, + :doc:`aggregation framework ` to SQL, you can consider the :pipeline:`$match` equivalent to ``WHERE``, :pipeline:`$project` to ``SELECT``, and :pipeline:`$group` to ``GROUP BY``. diff --git a/themes/mongodb/layout.html b/themes/mongodb/layout.html index d016434f5d7..2fd415f066f 100644 --- a/themes/mongodb/layout.html +++ b/themes/mongodb/layout.html @@ -19,6 +19,28 @@ {%- macro jiralink() -%} https://jira.mongodb.org/secure/CreateIssueDetails!init.jspa?pid=10380&issuetype=4&priority=4&summary=Comment+on%3a+%22{{ pagename | escape }}%2Etxt%22 {%- endmacro -%} +{%- set reldelim1 = reldelim1 is not defined and '' or reldelim1 %} +{%- set reldelim2 = reldelim2 is not defined and ' |' or reldelim2 %} +{%- macro relbar() %} +
+{#

{{ _('Navigation') }}

#} +
    +{# {%- for rellink in rellinks %} +
  • + {{ rellink[3] }} + {%- if not loop.first %}{{ reldelim2 }}{% endif %}
  • + {%- endfor %} +#} {%- block rootrellink %} +{#
  • {{ shorttitle|e }}{{ reldelim1 }}
  • #} + {%- endblock %} + {%- for parent in parents %} +
  • {{ parent.title }}{{ reldelim1 }}
  • + {%- endfor %} + {%- block relbaritems %} {% endblock %} +
+
+{%- endmacro %} {%- macro sidebar() %} {%- if render_sidebar %} @@ -144,6 +166,7 @@
{%- if render_sidebar %}
{%- endif %}
+ {{ relbar() }}
diff --git a/themes/mongodb/pagenav.html b/themes/mongodb/pagenav.html index ad4c9be242f..4c65e3c30cc 100644 --- a/themes/mongodb/pagenav.html +++ b/themes/mongodb/pagenav.html @@ -7,4 +7,4 @@

{{ shorttitle }} {# the following span is replaced by the javascript in versionnav.html #}

Version: {{version}}

{% endif %} -{{ toctree(maxdepth=-1) }} +{{ toctree(maxdepth=-1,titles_only=1) }} diff --git a/themes/mongodb/static/mongodb-docs.css_t b/themes/mongodb/static/mongodb-docs.css_t index f50eae2c7e6..d78a5999cfc 100644 --- a/themes/mongodb/static/mongodb-docs.css_t +++ b/themes/mongodb/static/mongodb-docs.css_t @@ -46,7 +46,7 @@ div.body { div.body > div#cse-results + div.section { margin:0 1.5em; } div.body > div#cse-results + span.target + div.section { margin:0 1.5em; } div.body > div#cse-results + span.target + span.target+ div.section { margin:0 1.5em; } - +div.body > div#cse-results {margin:0;padding:0;margin-left:24px;height:auto;} div.footer { color:white; width:auto; @@ -94,6 +94,7 @@ div.sphinxsidebar { font-size: 0.72em; line-height: 1.4em; width: 280px; + padding-top: .45em; } div.sphinxsidebarwrapper{ @@ -160,7 +161,7 @@ p.searchtip { font-size: 93%; } /* -- body styles ----------------------------------------------------------- */ a { - color: #774212; + color: #005B81; text-decoration: none; } @@ -177,7 +178,7 @@ div.body h1, div.body h2, div.body h3, div.body h4, div.body h5, div.body h6 { } div.body h1 { - border-top: 5px solid white; + border-top: none; margin-top: 0; font-size: 200%; } @@ -283,7 +284,9 @@ tt { font-family: "Courier New", Courier, monospace; } -a>tt { color: #774212; } +a>tt { + color: #005B81; +} .viewcode-back { font-family: helvetica,arial,sans-serif; } @@ -379,16 +382,20 @@ div#mongodb ul.simple { list-style:circle; } +/* div.sphinxsidebar li.toctree-l1 a.reference { font-size: 1.2em; color: black; line-height: 1.5em; } +*/ +/* div.sphinxsidebar ul.current li.toctree-l1 a { font-size: 1.2em; color: black; line-height: 1.5em; } +*/ div.sphinxsidebar ul.current li.toctree-l2 a { font-size: 1.1em; color: black; @@ -414,6 +421,7 @@ div.related li.right { margin-right: 5px; margin-top: 85px; } + div.sphinxsidebar tt.literal .pre { font-weight: normal; } @@ -464,7 +472,7 @@ ol#breadcrumbs { margin-left: -14px; } line-height: 1.4em; } -#breadcrumbs { color:#158f35; } +#breadcrumbs { color: #122A78; } #main-db { padding-top: 2.5em; @@ -837,6 +845,30 @@ div.admonition-example pre { background-color: #FBFBFB; } li p.first { margin-bottom: 0em; } h4#vnd {padding:0;margin:0;line-height:1.5;padding-left:10px;} -ul.current li.toctree-l1 a:active {text-decoration:underline;} -ul.current li.toctree-l1 a:hover {font-weight:900;} -ul.current li.toctree-l1 a:focus {text-decoration:none;font-weight:900;} +div.sphinxsidebarwrapper {padding-top:0.5em; padding-left: 8px;} +div.sphinxsidebarwrapper ul {margin:0;padding:0;margin-left:10px;} +div.sphinxsidebarwrapper ul li {font-size:1.0em;font-family:Helvetica,Arial,Sans-serif;line-height:1.5;} +div.sphinxsidebarwrapper ul li a:active, +div.sphinxsidebarwrapper ul li a:focus, +div.sphinxsidebarwrapper ul li a:hover {font-weight:900;} +div.sphinxsidebarwrapper ul li a tt span {font-family:Helvetica,Arial,sans-serif;font-size:inherit;color: black;} +div.sphinxsidebarwrapper ul li a:active tt span, div.sphinxsidebarwrapper ul li a:hover tt span, div.sphinxsidebarwrapper ul li a:focus tt span {font-weight:900} +#div.sphinxsidebarwrapper ul.current li.current a tt span {font-weight:900;} +#div.sphinxsidebar ul.current li.current tt.literal .pre {font-weight:900;} +div.sphinxsidebar a:active tt.literal .pre, +div.sphinxsidebar a:focus tt.literal .pre, +div.sphinxsidebar a:hover tt.literal .pre {font-weight:900;} +div.body div.bc { + padding-top: .6em; + margin-left:24px; + background:white; +} +div.body div.bc li.jr {float: right;display:none;} +div.body div.bc ul {padding:0;margin:0} +div.body div.bc ul li {list-style: none;font-size:0.9em;display:inline;} +div.body div.bc ul li a { + color: #005B81; +} +div.bc ul li:after {content: " >" ; font-weight: bold; font-size:0.9em; color: #343434; } +div.sphinxsidebarwrapper h3 {line-height:2.0;margin:0;padding:0} +div.sphinxsidebarwrapper h3:first-child {line-height:1.0;margin:0;padding:0}