cloud-py-api · bigcat88 · Dec 31, 2023 · Dec 31, 2023 · Dec 31, 2023 · Dec 31, 2023
diff --git a/.run/Speech2TxtProvider (last).run.xml b/.run/Speech2TxtProvider (last).run.xml
@@ -0,0 +1,31 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="Speech2TxtProvider (last)" type="PythonConfigurationType" factoryName="Python">
+    <module name="nc_py_api" />
+    <option name="ENV_FILES" value="" />
+    <option name="INTERPRETER_OPTIONS" value="" />
+    <option name="PARENT_ENVS" value="true" />
+    <envs>
+      <env name="APP_HOST" value="0.0.0.0" />
+      <env name="APP_ID" value="speech2text_example" />
+      <env name="APP_PORT" value="9036" />
+      <env name="APP_SECRET" value="12345" />
+      <env name="APP_VERSION" value="1.0.0" />
+      <env name="NEXTCLOUD_URL" value="http://nextcloud.local" />
+      <env name="PYTHONUNBUFFERED" value="1" />
+    </envs>
+    <option name="SDK_HOME" value="" />
+    <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/examples/as_app/speech2text/lib" />
+    <option name="IS_MODULE_SDK" value="true" />
+    <option name="ADD_CONTENT_ROOTS" value="true" />
+    <option name="ADD_SOURCE_ROOTS" value="true" />
+    <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
+    <option name="SCRIPT_NAME" value="$PROJECT_DIR$/examples/as_app/speech2text/lib/main.py" />
+    <option name="PARAMETERS" value="" />
+    <option name="SHOW_COMMAND_LINE" value="false" />
+    <option name="EMULATE_TERMINAL" value="false" />
+    <option name="MODULE_MODE" value="false" />
+    <option name="REDIRECT_INPUT" value="false" />
+    <option name="INPUT_FILE" value="" />
+    <method v="2" />
+  </configuration>
+</component>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,20 +2,26 @@
 
 All notable changes to this project will be documented in this file.
 
-## [0.7.2 - 2022-12-28]
+## [0.8.0 - 2024-01-xx]
+
+### Added
+
+- API for registering Speech to Text provider(*avalaible from Nextcloud 29*). #196
+
+## [0.7.2 - 2023-12-28]
 
 ### Fixed
 
 - files: proper url encoding of special chars in `mkdir` and `delete` methods. #191 Thanks to @tobenary
 - files: proper url encoding of special chars in all other `DAV` methods. #194
 
-## [0.7.1 - 2022-12-21]
+## [0.7.1 - 2023-12-21]
 
 ### Added
 
 - The `ocs` method is now public, making it easy to use Nextcloud OCS that has not yet been described. #187
 
-## [0.7.0 - 2022-12-17]
+## [0.7.0 - 2023-12-17]
 
 ### Added
 

diff --git a/README.md b/README.md
@@ -24,21 +24,21 @@ Python library that provides a robust and well-documented API that allows develo
  * **Sync + Async**: Provides both sync and async APIs.
 
 ### Capabilities
-| **_Capability_**      | Nextcloud 26 | Nextcloud 27 | Nextcloud 28 |
-|-----------------------|:------------:|:------------:|:------------:|
-| Calendar              |      ✅       |      ✅       |      ✅       |
-| File System & Tags    |      ✅       |      ✅       |      ✅       |
-| Nextcloud Talk        |      ✅       |      ✅       |      ✅       |
-| Notifications         |      ✅       |      ✅       |      ✅       |
-| Shares                |      ✅       |      ✅       |      ✅       |
-| Users & Groups        |      ✅       |      ✅       |      ✅       |
-| User & Weather status |      ✅       |      ✅       |      ✅       |
-| Other APIs***         |      ✅       |      ✅       |      ✅       |
-| Talk Bot API*         |     N/A      |      ✅       |      ✅       |
-| Text Processing*      |     N/A      |      ❌       |      ❌       |
-| SpeechToText*         |     N/A      |      ❌       |      ❌       |
-
-&ast;_available only for NextcloudApp_<br>
+| **_Capability_**      | Nextcloud 26 | Nextcloud 27 | Nextcloud 28 | Nextcloud 29 |
+|-----------------------|:------------:|:------------:|:------------:|:------------:|
+| Calendar              |      ✅       |      ✅       |      ✅       |      ✅       |
+| File System & Tags    |      ✅       |      ✅       |      ✅       |      ✅       |
+| Nextcloud Talk        |      ✅       |      ✅       |      ✅       |      ✅       |
+| Notifications         |      ✅       |      ✅       |      ✅       |      ✅       |
+| Shares                |      ✅       |      ✅       |      ✅       |      ✅       |
+| Users & Groups        |      ✅       |      ✅       |      ✅       |      ✅       |
+| User & Weather status |      ✅       |      ✅       |      ✅       |      ✅       |
+| Other APIs***         |      ✅       |      ✅       |      ✅       |      ✅       |
+| Talk Bot API*         |     N/A      |      ✅       |      ✅       |      ✅       |
+| TextProcessing*       |     N/A      |     N/A      |     N/A      |      ❌       |
+| SpeechToText*         |     N/A      |     N/A      |     N/A      |      ✅       |
+
+&ast;_available only for **NextcloudApp**_<br>
 &ast;&ast;&ast;_Activity, Notes_
 
 ### Differences between the Nextcloud and NextcloudApp classes

diff --git a/docs/reference/ExApp.rst b/docs/reference/ExApp.rst
@@ -56,3 +56,12 @@ UI methods should be accessed with the help of :class:`~nc_py_api.nextcloud.Next
 
 .. autoclass:: nc_py_api.ex_app.ui.resources.UiStyle
     :members:
+
+.. autoclass:: nc_py_api.ex_app.providers.providers.ProvidersApi
+    :members:
+
+.. autoclass:: nc_py_api.ex_app.providers.speech_to_text.SpeechToTextProvider
+    :members:
+
+.. autoclass:: nc_py_api.ex_app.providers.speech_to_text._SpeechToTextProviderAPI
+    :members:
diff --git a/examples/as_app/speech2text/Dockerfile b/examples/as_app/speech2text/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.11-slim-bookworm
+
+COPY requirements.txt /
+
+ADD cs[s] /app/css
+ADD im[g] /app/img
+ADD j[s] /app/js
+ADD l10[n] /app/l10n
+ADD li[b] /app/lib
+
+RUN \
+  python3 -m pip install -r requirements.txt && rm -rf ~/.cache && rm requirements.txt
+
+WORKDIR /app/lib
+ENTRYPOINT ["python3", "main.py"]
diff --git a/examples/as_app/speech2text/Makefile b/examples/as_app/speech2text/Makefile
@@ -0,0 +1,43 @@
+.DEFAULT_GOAL := help
+
+.PHONY: help
+help:
+	@echo "Welcome to Speech2TextProvider example. Please use \`make <target>\` where <target> is one of"
+	@echo " "
+	@echo "  Next commands are only for dev environment with nextcloud-docker-dev!"
+	@echo "  They should run from the host you are developing on(with activated venv) and not in the container with Nextcloud!"
+	@echo "  "
+	@echo "  build-push        build image and upload to ghcr.io"
+	@echo "  "
+	@echo "  deploy            deploy Speech2TextProvider to registered 'docker_dev' for Nextcloud Last"
+	@echo "  "
+	@echo "  run               install Speech2TextProvider for Nextcloud Last"
+	@echo "  "
+	@echo "  For development of this example use PyCharm run configurations. Development is always set for last Nextcloud."
+	@echo "  First run 'Speech2TextProvider' and then 'make registerXX', after that you can use/debug/develop it and easy test."
+	@echo "  "
+	@echo "  register          perform registration of running Speech2TextProvider into the 'manual_install' deploy daemon."
+
+.PHONY: build-push
+build-push:
+	docker login ghcr.io
+	docker buildx build --push --platform linux/arm64/v8,linux/amd64 --tag ghcr.io/cloud-py-api/speech_to_text_example:latest .
+
+.PHONY: deploy
+deploy:
+	docker exec master-nextcloud-1 sudo -u www-data php occ app_api:app:unregister speech2text_example --silent || true
+	docker exec master-nextcloud-1 sudo -u www-data php occ app_api:app:deploy speech2text_example docker_dev \
+		--info-xml https://raw.githubusercontent.com/cloud-py-api/nc_py_api/main/examples/as_app/speech2text_example/appinfo/info.xml
+
+.PHONY: run
+run:
+	docker exec master-nextcloud-1 sudo -u www-data php occ app_api:app:unregister speech2text_example --silent || true
+	docker exec master-nextcloud-1 sudo -u www-data php occ app_api:app:register speech2text_example docker_dev --force-scopes \
+		--info-xml https://raw.githubusercontent.com/cloud-py-api/nc_py_api/main/examples/as_app/speech2text_example/appinfo/info.xml
+
+.PHONY: register
+register:
+	docker exec master-nextcloud-1 sudo -u www-data php occ app_api:app:unregister speech2text_example --silent || true
+	docker exec master-nextcloud-1 sudo -u www-data php occ app_api:app:register speech2text_example manual_install --json-info \
+  "{\"appid\":\"speech2text_example\",\"name\":\"SpeechToText Provider\",\"daemon_config_name\":\"manual_install\",\"version\":\"1.0.0\",\"secret\":\"12345\",\"host\":\"host.docker.internal\",\"port\":9036,\"scopes\":{\"required\":[\"AI_PROVIDERS\"],\"optional\":[]},\"protocol\":\"http\",\"system_app\":0}" \
+  --force-scopes --wait-finish
diff --git a/examples/as_app/speech2text/appinfo/info.xml b/examples/as_app/speech2text/appinfo/info.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0"?>
+<info>
+	<id>speech2text_example</id>
+	<name>SpeechToText Provider</name>
+	<summary>Example of SpeechToText Provider</summary>
+	<description>
+	<![CDATA[Simplest Speech to Text Provider example written in python]]>
+	</description>
+	<version>1.0.0</version>
+	<licence>MIT</licence>
+	<author mail="[email protected]" homepage="https://github.com/andrey18106">Andrey Borysenko</author>
+	<author mail="[email protected]" homepage="https://github.com/bigcat88">Alexander Piskun</author>
+	<namespace>PyAppV2_Speech2TextProvider</namespace>
+	<category>tools</category>
+	<website>https://github.com/cloud-py-api/nc_py_api</website>
+	<bugs>https://github.com/cloud-py-api/nc_py_api/issues</bugs>
+	<repository type="git">https://github.com/cloud-py-api/nc_py_api</repository>
+	<dependencies>
+		<nextcloud min-version="29" max-version="30"/>
+	</dependencies>
+	<external-app>
+		<docker-install>
+			<registry>ghcr.io</registry>
+			<image>cloud-py-api/speech2text_example</image>
+			<image-tag>latest</image-tag>
+		</docker-install>
+		<scopes>
+			<required>
+				<value>AI_PROVIDERS</value>
+			</required>
+			<optional>
+			</optional>
+		</scopes>
+		<protocol>http</protocol>
+		<system>false</system>
+	</external-app>
+</info>
diff --git a/examples/as_app/speech2text/lib/main.py b/examples/as_app/speech2text/lib/main.py
@@ -0,0 +1,78 @@
+"""Use the simplest model to just test speech recognition.
+
+Example is not production ready, as probably in production app we want running requests in subprocesses with timeout or
+run multiply workers to process requests simultaneously.
+"""
+
+import os
+import tempfile
+import typing
+from contextlib import asynccontextmanager
+
+import torch
+from fastapi import Depends, FastAPI, UploadFile, responses
+from huggingface_hub import snapshot_download
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+
+from nc_py_api import NextcloudApp
+from nc_py_api.ex_app import nc_app, persistent_storage, run_app, set_handlers
+
+MODEL_NAME = "distil-whisper/distil-small.en"
+
+
+@asynccontextmanager
+async def lifespan(_app: FastAPI):
+    set_handlers(APP, enabled_handler, models_to_fetch={MODEL_NAME: {"ignore_patterns": ["*.bin", "*onnx*"]}})
+    yield
+
+
+APP = FastAPI(lifespan=lifespan)
+
+
+@APP.post("/distil_whisper_small")
+async def distil_whisper_small(
+    _nc: typing.Annotated[NextcloudApp, Depends(nc_app)],
+    data: UploadFile,
+    max_execution_time: float = 0,
+):
+    print(max_execution_time)
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        snapshot_download(
+            MODEL_NAME,
+            local_files_only=True,
+            cache_dir=persistent_storage(),
+        ),
+        torch_dtype=torch.float32,
+        low_cpu_mem_usage=True,
+        use_safetensors=True,
+    ).to("cpu")
+
+    processor = AutoProcessor.from_pretrained(MODEL_NAME)
+    pipe = pipeline(
+        "automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        max_new_tokens=128,
+        torch_dtype=torch.float32,
+        device="cpu",
+    )
+    _, file_extension = os.path.splitext(data.filename)
+    with tempfile.NamedTemporaryFile(mode="w+b", suffix=f"{file_extension}") as tmp:
+        tmp.write(await data.read())
+        result = pipe(tmp.name)
+    return responses.Response(content=result["text"])
+
+
+# async
+def enabled_handler(enabled: bool, nc: NextcloudApp) -> str:
+    print(f"enabled={enabled}")
+    if enabled is True:
+        nc.providers.speech_to_text.register("distil_whisper_small", "DistilWhisperSmall", "/distil_whisper_small")
+    else:
+        nc.providers.speech_to_text.unregister("distil_whisper_small")
+    return ""
+
+
+if __name__ == "__main__":
+    run_app("main:APP", log_level="trace")
diff --git a/examples/as_app/speech2text/requirements.txt b/examples/as_app/speech2text/requirements.txt
@@ -0,0 +1 @@
+nc_py_api[app]>=0.8.0
diff --git a/nc_py_api/_version.py b/nc_py_api/_version.py
@@ -1,3 +1,3 @@
 """Version of nc_py_api."""
 
-__version__ = "0.7.2"
+__version__ = "0.8.0.dev0"
diff --git a/nc_py_api/ex_app/defs.py b/nc_py_api/ex_app/defs.py
@@ -39,6 +39,8 @@ class ApiScope(enum.IntEnum):
     """Allows access to Talk API endpoints."""
     TALK_BOT = 60
     """Allows to register Talk Bots."""
+    AI_PROVIDERS = 61
+    """Allows to register AI providers."""
     ACTIVITIES = 110
     """Activity App endpoints."""
     NOTES = 120

diff --git a/nc_py_api/ex_app/providers/__init__.py b/nc_py_api/ex_app/providers/__init__.py
@@ -0,0 +1 @@
+"""APIs related to Nextcloud Providers."""
diff --git a/nc_py_api/ex_app/providers/providers.py b/nc_py_api/ex_app/providers/providers.py
@@ -0,0 +1,24 @@
+"""Nextcloud API for AI Providers."""
+
+from ..._session import AsyncNcSessionApp, NcSessionApp
+from .speech_to_text import _AsyncSpeechToTextProviderAPI, _SpeechToTextProviderAPI
+
+
+class ProvidersApi:
+    """Class that encapsulates all AI Providers functionality."""
+
+    speech_to_text: _SpeechToTextProviderAPI
+    """SpeechToText Provider API."""
+
+    def __init__(self, session: NcSessionApp):
+        self.speech_to_text = _SpeechToTextProviderAPI(session)
+
+
+class AsyncProvidersApi:
+    """Class that encapsulates all AI Providers functionality."""
+
+    speech_to_text: _AsyncSpeechToTextProviderAPI
+    """SpeechToText Provider API."""
+
+    def __init__(self, session: AsyncNcSessionApp):
+        self.speech_to_text = _AsyncSpeechToTextProviderAPI(session)