diff --git a/.gitignore b/.gitignore index d9fb5e0c6c..0ab0672302 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ MANIFEST +docs/source/operators/events build dist _build @@ -6,15 +7,7 @@ docs/man/*.gz docs/source/api/generated docs/source/config.rst docs/gh-pages -notebook/i18n/*/LC_MESSAGES/*.mo -notebook/i18n/*/LC_MESSAGES/nbjs.json -notebook/static/components -notebook/static/style/*.min.css* -notebook/static/*/js/built/ -notebook/static/*/built/ -notebook/static/built/ -notebook/static/*/js/main.min.js* -notebook/static/lab/*bundle.js +docs/source/events node_modules *.py[co] __pycache__ diff --git a/MANIFEST.in b/MANIFEST.in index 9d4060fc69..b81a6d5536 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,9 @@ include setupbase.py include Dockerfile graft tools +# Event Schemas +graft jupyter_server/event-schemas + # Documentation graft docs exclude docs/\#* diff --git a/docs/doc-requirements.txt b/docs/doc-requirements.txt index 48b3eda1d0..4167aabf6d 100644 --- a/docs/doc-requirements.txt +++ b/docs/doc-requirements.txt @@ -8,4 +8,5 @@ prometheus_client sphinxcontrib_github_alt sphinxcontrib-openapi sphinxemoji -git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file +git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master +jupyter_telemetry_sphinxext \ No newline at end of file diff --git a/docs/environment.yml b/docs/environment.yml index 5d77bc7bb4..1d9c9d3eb8 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -13,4 +13,5 @@ dependencies: - sphinxcontrib_github_alt - sphinxcontrib-openapi - sphinxemoji - - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file + - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master + - sphinx-jsonschema diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css new file mode 100644 index 0000000000..63ee6cc74c --- /dev/null +++ b/docs/source/_static/theme_overrides.css @@ -0,0 +1,13 @@ +/* override table width restrictions */ +@media screen and (min-width: 767px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from overriding + this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } +} diff --git a/docs/source/conf.py b/docs/source/conf.py index e105e82d40..4add156c81 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,8 +70,7 @@ 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib_github_alt', - 'sphinxcontrib.openapi', - 'sphinxemoji.sphinxemoji' + 'jupyter_telemetry_sphinxext' ] # Add any paths that contain templates here, relative to this directory. @@ -208,6 +207,12 @@ # since it is needed to properly generate _static in the build directory html_static_path = ['_static'] +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # override wide tables in RTD theme + ], +} + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. @@ -371,3 +376,8 @@ # import before any doc is built, so _ is guaranteed to be injected import jupyter_server.transutils + +# Jupyter telemetry configuration values. +jupyter_telemetry_schema_source = "../jupyter_server/event-schemas" # Path is relative to conf.py +jupyter_telemetry_schema_output = "source/operators/events" # Path is relative to conf.py +jupyter_telemetry_index_title = "Telemetry Event Schemas" # Title of the index page that lists all found schemas. \ No newline at end of file diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst new file mode 100644 index 0000000000..7229717f69 --- /dev/null +++ b/docs/source/eventlog.rst @@ -0,0 +1,61 @@ +Eventlogging and Telemetry +========================== + +The Notebook Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that the Notebook Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +Emitting Server Events +---------------------- + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + +`eventlog` endpoint +------------------- + +The Notebook Server provides a public REST endpoint for external applications to validate and log events +through the Server's Event Log. + +To log events, send a `POST` request to the `/api/eventlog` endpoint. The body of the request should be a +JSON blog and is required to have the follow keys: + + 1. `'schema'` : the event's schema ID. + 2. `'version'` : the version of the event's schema. + 3. `'event'` : the event data in JSON format. + +Events that are validated by this endpoint must have their schema listed in the `allowed_schemas` trait listed above. + +.. _below: + + +Server Event schemas +-------------------- + +.. toctree:: + :maxdepth: 2 + + events/index diff --git a/docs/source/operators/index.rst b/docs/source/operators/index.rst index a654be1a0c..a6d2e212fd 100644 --- a/docs/source/operators/index.rst +++ b/docs/source/operators/index.rst @@ -12,4 +12,5 @@ These pages are targeted at people using, configuring, and/or deploying multiple configuring-extensions migrate-from-nbserver public-server - security \ No newline at end of file + security + telemetry \ No newline at end of file diff --git a/docs/source/operators/telemetry.rst b/docs/source/operators/telemetry.rst new file mode 100644 index 0000000000..2c94e99a7c --- /dev/null +++ b/docs/source/operators/telemetry.rst @@ -0,0 +1,61 @@ +Telemetry and Eventlogging +========================== + +Jupyter Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that the Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +Emitting Server Events +---------------------- + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + +`eventlog` endpoint +------------------- + +The Notebook Server provides a public REST endpoint for external applications to validate and log events +through the Server's Event Log. + +To log events, send a `POST` request to the `/api/eventlog` endpoint. The body of the request should be a +JSON blog and is required to have the follow keys: + + 1. `'schema'` : the event's schema ID. + 2. `'version'` : the version of the event's schema. + 3. `'event'` : the event data in JSON format. + +Events that are validated by this endpoint must have their schema listed in the `allowed_schemas` trait listed above. + +.. _below: + + +Server Event schemas +-------------------- + +.. toctree:: + :maxdepth: 2 + + events/index diff --git a/docs/source/other/full-config.rst b/docs/source/other/full-config.rst index f7f0cab4ba..70852ea40f 100644 --- a/docs/source/other/full-config.rst +++ b/docs/source/other/full-config.rst @@ -897,7 +897,7 @@ FileContentsManager.root_dir : Unicode No description -NotebookNotary.algorithm : 'md5'|'sha3_384'|'sha3_512'|'sha256'|'sha1'|'blake2s'|'sha3_256'|'sha3_224'|'sha384'|'sha512'|'blake2b'|'sha224' +NotebookNotary.algorithm : 'sha1'|'sha3_224'|'blake2s'|'sha384'|'sha224'|'sha3_256'|'sha3_384'|'sha3_512'|'sha512'|'sha256'|'md5'|'blake2b' Default: ``'sha256'`` The hashing algorithm used to sign notebooks. diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index 75467718c8..5185365c4d 100755 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -204,6 +204,10 @@ def jinja_template_vars(self): """User-supplied values to supply to jinja templates.""" return self.settings.get('jinja_template_vars', {}) + @property + def eventlog(self): + return self.settings.get('eventlog') + #--------------------------------------------------------------- # URLs #--------------------------------------------------------------- diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml new file mode 100644 index 0000000000..31a5f293a9 --- /dev/null +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml @@ -0,0 +1,83 @@ +"$id": eventlogging.jupyter.org/notebook/contentsmanager-actions +version: 1 +title: Contents Manager activities +personal-data: true +description: | + Record actions on files via the ContentsManager REST API. + + The notebook ContentsManager REST API is used by all frontends to retreive, + save, list, delete and perform other actions on notebooks, directories, + and other files through the UI. This is pluggable - the default acts on + the file system, but can be replaced with a different ContentsManager + implementation - to work on S3, Postgres, other object stores, etc. + The events get recorded regardless of the ContentsManager implementation + being used. + + Limitations: + + 1. This does not record all filesystem access, just the ones that happen + explicitly via the notebook server's REST API. Users can (and often do) + trivially access the filesystem in many other ways (such as `open()` calls + in their code), so this is usually never a complete record. + 2. As with all events recorded by the notebook server, users most likely + have the ability to modify the code of the notebook server. Unless other + security measures are in place, these events should be treated as user + controlled and not used in high security areas. + 3. Events are only recorded when an action succeeds. +type: object +required: +- action +- path +properties: + action: + enum: + - get + - create + - save + - upload + - rename + - copy + - delete + category: unrestricted + description: | + Action performed by the ContentsManager API. + + This is a required field. + + Possible values: + + 1. get + Get contents of a particular file, or list contents of a directory. + + 2. create + Create a new directory or file at 'path'. Currently, name of the + file or directory is auto generated by the ContentsManager implementation. + + 3. save + Save a file at path with contents from the client + + 4. upload + Upload a file at given path with contents from the client + + 5. rename + Rename a file or directory from value in source_path to + value in path. + + 5. copy + Copy a file or directory from value in source_path to + value in path. + + 6. delete + Delete a file or empty directory at given path + path: + category: personally-identifiable-information + type: string + description: | + Logical path on which the operation was performed. + + This is a required field. + source_path: + category: personally-identifiable-information + type: string + description: | + Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 21a4e68ca9..18f28967f0 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -33,6 +33,8 @@ import warnings import webbrowser import urllib +from ruamel.yaml import YAML +from glob import glob from types import ModuleType from base64 import encodebytes @@ -99,10 +101,19 @@ ) from ipython_genutils import py3compat from jupyter_core.paths import jupyter_runtime_dir, jupyter_path +from jupyter_telemetry.eventlog import EventLog + from jupyter_server._sysinfo import get_sys_info from ._tz import utcnow, utcfromtimestamp -from .utils import url_path_join, check_pid, url_escape, urljoin, pathname2url +from .utils import ( + url_path_join, + check_pid, + url_escape, + urljoin, + pathname2url, + get_schema_files +) from jupyter_server.extension.serverextension import ( ServerExtensionApp, @@ -279,7 +290,8 @@ def init_settings(self, jupyter_app, kernel_manager, contents_manager, server_root_dir=root_dir, jinja2_env=env, terminals_available=False, # Set later if terminals are available - serverapp=self + serverapp=self, + eventlog=jupyter_app.eventlog ) # allow custom overrides for the tornado web app. @@ -1758,6 +1770,11 @@ def _init_asyncio_patch(): # WindowsProactorEventLoopPolicy is not compatible with tornado 6 # fallback to the pre-3.8 default of Selector asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy()) + def init_eventlog(self): + self.eventlog = EventLog(parent=self) + # Register schemas for notebook services. + for file_path in get_schema_files(): + self.eventlog.register_schema_file(file_path) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): @@ -1788,10 +1805,12 @@ def initialize(self, argv=None, find_extensions=True, new_httpserver=True): self.init_server_extensions() # Initialize all components of the ServerApp. self.init_logging() + self.init_eventlog() if self._dispatching: return self.init_configurables() self.init_components() + self.init_eventlog() self.init_webapp() if new_httpserver: self.init_httpserver() diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 53aff09078..9b7802ff2a 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -5,14 +5,18 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. - +import os import json from tornado import web - -from jupyter_server.utils import url_path_join, url_escape, ensure_async from jupyter_client.jsonutil import date_default +from jupyter_server.utils import ( + url_path_join, + url_escape, + ensure_async, + eventlogging_schema_fqn +) from jupyter_server.base.handlers import ( JupyterHandler, APIHandler, path_regex, ) @@ -111,6 +115,11 @@ async def get(self, path=''): path=path, type=type, format=format, content=content, )) validate_model(model, expect_content=content) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { 'action': 'get', 'path': model['path'] } + ) self._finish_model(model, location=False) @web.authenticated @@ -120,10 +129,21 @@ async def patch(self, path=''): model = self.get_json_body() if model is None: raise web.HTTPError(400, u'JSON body missing') - model = cm.update(model, path) + self.log.info(model) + model = await ensure_async(cm.update(model, path)) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'rename', + 'path': model['path'], + 'source_path': path.lstrip(os.path.sep) + } + ) self._finish_model(model) + async def _copy(self, copy_from, copy_to=None): """Copy a file, optionally specifying a target directory.""" self.log.info(u"Copying {copy_from} to {copy_to}".format( @@ -133,6 +153,15 @@ async def _copy(self, copy_from, copy_to=None): model = self.contents_manager.copy(copy_from, copy_to) self.set_status(201) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'copy', + 'path': model['path'], + 'source_path': copy_from.lstrip(os.path.sep) + } + ) self._finish_model(model) async def _upload(self, model, path): @@ -141,6 +170,11 @@ async def _upload(self, model, path): model = self.contents_manager.new(model, path) self.set_status(201) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { 'action': 'upload', 'path': model['path'] } + ) self._finish_model(model) async def _new_untitled(self, path, type='', ext=''): @@ -149,6 +183,11 @@ async def _new_untitled(self, path, type='', ext=''): model = self.contents_manager.new_untitled(path=path, type=type, ext=ext) self.set_status(201) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), 1, + # Set path to path of created object, not directory it was created in + { 'action': 'create', 'path': model['path'] } + ) self._finish_model(model) async def _save(self, model, path): @@ -158,6 +197,11 @@ async def _save(self, model, path): self.log.info(u"Saving file at %s", path) model = self.contents_manager.save(model, path) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { 'action': 'save', 'path': model['path'] } + ) self._finish_model(model) @web.authenticated @@ -227,9 +271,12 @@ async def delete(self, path=''): self.log.warning('delete %s', path) cm.delete(path) self.set_status(204) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), 1, + { 'action': 'delete', 'path': path.lstrip(os.path.sep) } + ) self.finish() - class CheckpointsHandler(APIHandler): @web.authenticated diff --git a/jupyter_server/services/eventlog/__init__.py b/jupyter_server/services/eventlog/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py new file mode 100644 index 0000000000..0c9b69815f --- /dev/null +++ b/jupyter_server/services/eventlog/handlers.py @@ -0,0 +1,42 @@ +import json + +from tornado import web + +from notebook.utils import url_path_join +from notebook.base.handlers import APIHandler, json_errors +from jupyter_telemetry.eventlog import EventLog + +class EventLoggingHandler(APIHandler): + """ + A handler that receives and stores telemetry data from the client. + """ + @json_errors + @web.authenticated + def post(self, *args, **kwargs): + try: + # Parse the data from the request body + raw_event = json.loads(self.request.body.strip().decode()) + except Exception as e: + raise web.HTTPError(400, str(e)) + + required_fields = {'schema', 'version', 'event'} + for rf in required_fields: + if rf not in raw_event: + raise web.HTTPError(400, '{} is a required field'.format(rf)) + + schema_name = raw_event['schema'] + version = raw_event['version'] + event = raw_event['event'] + + # Profile, may need to move to a background thread if this is problematic + try: + self.eventlog.record_event(schema_name, version, event) + except Exception as e: + raise web.HTTPError(400, e) + + self.set_status(204) + self.finish() + +default_handlers = [ + (r"/api/eventlog", EventLoggingHandler), +] \ No newline at end of file diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 42a6ae9278..ec44e13b75 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -440,3 +440,25 @@ def wrapped(): result = asyncio.ensure_future(maybe_async) return result return wrapped() + + +def eventlogging_schema_fqn(name): + """ + Return fully qualified event schema name + + Matches convention for this particular repo + """ + return 'eventlogging.jupyter.org/jupyter_server/{}'.format(name) + + +def get_schema_files(): + """Yield a sequence of event schemas for jupyter services.""" + # Hardcode path to event schemas directory. + event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') + #schema_files = [] + # Recursively register all .json files under event-schemas + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.yaml'): + file_path = os.path.join(dirname, file) + yield file_path diff --git a/setup.py b/setup.py index 2697fad271..6123e8f64f 100755 --- a/setup.py +++ b/setup.py @@ -94,7 +94,9 @@ 'Send2Trash', 'terminado>=0.8.3', 'prometheus_client', - "pywin32>=1.0 ; sys_platform == 'win32'" + "pywin32>=1.0 ; sys_platform == 'win32'", + # Install the working branch of telemetry. + 'jupyter_telemetry@git+https://github.com/Zsailer/telemetry.git@personal-data' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters', diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py new file mode 100644 index 0000000000..1f7b587327 --- /dev/null +++ b/tests/test_eventlog.py @@ -0,0 +1,4 @@ + + +def test_eventlog(serverapp): + pass \ No newline at end of file