JohnSnowLabs
diff --git a/‎CHANGELOG‎
Lines changed: 30 additions & 0 deletions b/‎CHANGELOG‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 53 additions & 50 deletions b/‎README.md‎
Lines changed: 53 additions & 50 deletions
diff --git a/‎build.sbt‎
Lines changed: 1 addition & 1 deletion b/‎build.sbt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/_includes/head.html‎
Lines changed: 1 addition & 1 deletion b/‎docs/_includes/head.html‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/_layouts/landing.html‎
Lines changed: 5 additions & 3 deletions b/‎docs/_layouts/landing.html‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎docs/api/com/index.html‎
Lines changed: 4 additions & 4 deletions b/‎docs/api/com/index.html‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/api/com/johnsnowlabs/client/CloudClient.html‎
Lines changed: 4 additions & 4 deletions b/‎docs/api/com/johnsnowlabs/client/CloudClient.html‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/api/com/johnsnowlabs/client/CloudManager.html‎
Lines changed: 4 additions & 4 deletions b/‎docs/api/com/johnsnowlabs/client/CloudManager.html‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/api/com/johnsnowlabs/client/CloudResources$.html‎
Lines changed: 4 additions & 4 deletions b/‎docs/api/com/johnsnowlabs/client/CloudResources$.html‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/api/com/johnsnowlabs/client/CloudStorage.html‎
Lines changed: 4 additions & 4 deletions b/‎docs/api/com/johnsnowlabs/client/CloudStorage.html‎
Lines changed: 4 additions & 4 deletions
@@ -1,3 +1,33 @@
+========
+5.2.0
+========
+----------------
+New Features & Enhancements
+----------------
+* **NEW:** Introduceding the `CLIPForZeroShotClassification` for Zero-Shot Image Classification using OpenAI's CLIP models
+* **NEW:** Introduceding the `DocumentTokenSplitter` which allows users to split large documents into smaller chunks to be used in RAG with LLM models
+* **NEW:** Introducing support for ONNX Runtime in T5Transformer annotator
+* **NEW:** Introducing support for ONNX Runtime in MarianTransformer annotator
+* **NEW:** Introducing support for ONNX Runtime in BertSentenceEmbeddings annotator
+* **NEW:** Introducing support for ONNX Runtime in XlmRoBertaSentenceEmbeddings annotator
+* **NEW:** Introducing support for ONNX Runtime in CamemBertForQuestionAnswering, CamemBertForTokenClassification, and CamemBertForSequenceClassification annotators
+* Adding a caching support for newly imported T5 models in TF format to improve the performance to be competitive to ONNX version
+* Improve ZIP util and add tests for both ZipArchiveUtil and OnnxWrapper
+* Refactor ONNX and add OnnxSession to broadcast
+* Update ONNX Runtime to 1.16.3
+* Add a new notebook fro structure streaming
+
+----------------
+Bug Fixes
+----------------
+* Fix random dimension mismatch in E5Embeddings and MPNetEmbeddings due to a missing average_pool after last_hidden_state in the output
+* Fix batching exception in E5 and MPNet embeddings annotators failing when sentence is used instead of document
+* Fix chunk construction when an entity is found
+* Fix a bug in library's version in Scala
+* Fix Whisper models not downloading due to wrong library's version
+* Fix and refactor saving best model based on given metrics during NerDL training
+
+
 ========
 5.1.4
 ========
 
@@ -6,7 +6,7 @@ name := getPackageName(is_silicon, is_gpu, is_aarch64)
 
 organization := "com.johnsnowlabs.nlp"
 
-version := "5.1.4"
+version := "5.2.0"
 
 (ThisBuild / scalaVersion) := scalaVer
 
 
@@ -16,7 +16,7 @@
 {%- assign _article_pagetitle = __return -%}
 
 {%- if page.layout == "landing" -%}
-<title>Spark NLP - State of the Art NLP</title>
+<title>Spark NLP - State of the Art NLP Library for Large Language Models (LLMs)</title>
 {%- elsif page.layout == "model" -%}
 <title>{%- include snippets/get-article-modeltitle.html article=page -%}</title>
 {%- elsif _pagetitle -%}
 
@@ -201,7 +201,7 @@ <h3 class="grey h3_title">{{ _section.title }}</h3>
                   <div class="highlight-box">
     {% highlight bash %}
     # Using PyPI
-    $ pip install spark-nlp==5.1.4
+    $ pip install spark-nlp==5.2.0
 
     # Using Anaconda/Conda
     $ conda install -c johnsnowlabs spark-nlp
@@ -336,12 +336,14 @@ <h4 class="blue h4_title">NLP Features</h4>
                     <li>Vision Transformer (Google ViT) <strong>Image Classification</strong></li>
                     <li>Microsoft Swin Transformer <strong>Image Classification</strong></li>
                     <li>Facebook ConvNext <strong>Image Classification</strong></li>
+                    <li>Image to Text <strong>Image Captioning</strong></li>
+                    <li>Zero-Shot <strong>Image Classification (OpenAI CLIP)</strong></li>
                     <li>Automatic Speech Recognition <strong>(OpenAI Whisper, Wav2Vec2 & HuBERT)</strong></li>
                     <li>Easy <strong>ONNX</strong> and <strong>TensorFlow</strong> integrations</li>
                     <li><strong>GPU</strong> Support</li>
                     <li>Full integration with <strong>Spark ML</strong> functions</li>
-                    <li><strong>16800+</strong> pre-trained <strong>models </strong> in <strong>200+ languages! </strong>
-                    <li><strong>5900+</strong> pre-trained <strong>pipelines </strong> in <strong>200+ languages! </strong>
+                    <li><strong>24000+</strong> pre-trained <strong>models </strong> in <strong>200+ languages! </strong>
+                    <li><strong>6000+</strong> pre-trained <strong>pipelines </strong> in <strong>200+ languages! </strong>
                   </ul>
                 </div>
 {% highlight python %}
 
@@ -3,9 +3,9 @@
         <head>
           <meta http-equiv="X-UA-Compatible" content="IE=edge" />
           <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
-          <title>Spark NLP 5.1.4 ScalaDoc  - com</title>
-          <meta name="description" content="Spark NLP 5.1.4 ScalaDoc - com" />
-          <meta name="keywords" content="Spark NLP 5.1.4 ScalaDoc com" />
+          <title>Spark NLP 5.2.0 ScalaDoc  - com</title>
+          <meta name="description" content="Spark NLP 5.2.0 ScalaDoc - com" />
+          <meta name="keywords" content="Spark NLP 5.2.0 ScalaDoc com" />
           <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
 
 
@@ -28,7 +28,7 @@
         </head>
         <body>
       <div id="search">
-        <span id="doc-title">Spark NLP 5.1.4 ScalaDoc<span id="doc-version"></span></span>
+        <span id="doc-title">Spark NLP 5.2.0 ScalaDoc<span id="doc-version"></span></span>
         <span class="close-results"><span class="left">&lt;</span> Back</span>
         <div id="textfilter">
           <span class="input">
 
@@ -3,9 +3,9 @@
         <head>
           <meta http-equiv="X-UA-Compatible" content="IE=edge" />
           <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
-          <title>Spark NLP 5.1.4 ScalaDoc  - com.johnsnowlabs.client.CloudClient</title>
-          <meta name="description" content="Spark NLP 5.1.4 ScalaDoc - com.johnsnowlabs.client.CloudClient" />
-          <meta name="keywords" content="Spark NLP 5.1.4 ScalaDoc com.johnsnowlabs.client.CloudClient" />
+          <title>Spark NLP 5.2.0 ScalaDoc  - com.johnsnowlabs.client.CloudClient</title>
+          <meta name="description" content="Spark NLP 5.2.0 ScalaDoc - com.johnsnowlabs.client.CloudClient" />
+          <meta name="keywords" content="Spark NLP 5.2.0 ScalaDoc com.johnsnowlabs.client.CloudClient" />
           <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
 
 
@@ -28,7 +28,7 @@
         </head>
         <body>
       <div id="search">
-        <span id="doc-title">Spark NLP 5.1.4 ScalaDoc<span id="doc-version"></span></span>
+        <span id="doc-title">Spark NLP 5.2.0 ScalaDoc<span id="doc-version"></span></span>
         <span class="close-results"><span class="left">&lt;</span> Back</span>
         <div id="textfilter">
           <span class="input">
 
@@ -3,9 +3,9 @@
         <head>
           <meta http-equiv="X-UA-Compatible" content="IE=edge" />
           <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
-          <title>Spark NLP 5.1.4 ScalaDoc  - com.johnsnowlabs.client.CloudManager</title>
-          <meta name="description" content="Spark NLP 5.1.4 ScalaDoc - com.johnsnowlabs.client.CloudManager" />
-          <meta name="keywords" content="Spark NLP 5.1.4 ScalaDoc com.johnsnowlabs.client.CloudManager" />
+          <title>Spark NLP 5.2.0 ScalaDoc  - com.johnsnowlabs.client.CloudManager</title>
+          <meta name="description" content="Spark NLP 5.2.0 ScalaDoc - com.johnsnowlabs.client.CloudManager" />
+          <meta name="keywords" content="Spark NLP 5.2.0 ScalaDoc com.johnsnowlabs.client.CloudManager" />
           <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
 
 
@@ -28,7 +28,7 @@
         </head>
         <body>
       <div id="search">
-        <span id="doc-title">Spark NLP 5.1.4 ScalaDoc<span id="doc-version"></span></span>
+        <span id="doc-title">Spark NLP 5.2.0 ScalaDoc<span id="doc-version"></span></span>
         <span class="close-results"><span class="left">&lt;</span> Back</span>
         <div id="textfilter">
           <span class="input">
 
@@ -3,9 +3,9 @@
         <head>
           <meta http-equiv="X-UA-Compatible" content="IE=edge" />
           <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
-          <title>Spark NLP 5.1.4 ScalaDoc  - com.johnsnowlabs.client.CloudResources</title>
-          <meta name="description" content="Spark NLP 5.1.4 ScalaDoc - com.johnsnowlabs.client.CloudResources" />
-          <meta name="keywords" content="Spark NLP 5.1.4 ScalaDoc com.johnsnowlabs.client.CloudResources" />
+          <title>Spark NLP 5.2.0 ScalaDoc  - com.johnsnowlabs.client.CloudResources</title>
+          <meta name="description" content="Spark NLP 5.2.0 ScalaDoc - com.johnsnowlabs.client.CloudResources" />
+          <meta name="keywords" content="Spark NLP 5.2.0 ScalaDoc com.johnsnowlabs.client.CloudResources" />
           <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
 
 
@@ -28,7 +28,7 @@
         </head>
         <body>
       <div id="search">
-        <span id="doc-title">Spark NLP 5.1.4 ScalaDoc<span id="doc-version"></span></span>
+        <span id="doc-title">Spark NLP 5.2.0 ScalaDoc<span id="doc-version"></span></span>
         <span class="close-results"><span class="left">&lt;</span> Back</span>
         <div id="textfilter">
           <span class="input">
 
@@ -3,9 +3,9 @@
         <head>
           <meta http-equiv="X-UA-Compatible" content="IE=edge" />
           <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
-          <title>Spark NLP 5.1.4 ScalaDoc  - com.johnsnowlabs.client.CloudStorage</title>
-          <meta name="description" content="Spark NLP 5.1.4 ScalaDoc - com.johnsnowlabs.client.CloudStorage" />
-          <meta name="keywords" content="Spark NLP 5.1.4 ScalaDoc com.johnsnowlabs.client.CloudStorage" />
+          <title>Spark NLP 5.2.0 ScalaDoc  - com.johnsnowlabs.client.CloudStorage</title>
+          <meta name="description" content="Spark NLP 5.2.0 ScalaDoc - com.johnsnowlabs.client.CloudStorage" />
+          <meta name="keywords" content="Spark NLP 5.2.0 ScalaDoc com.johnsnowlabs.client.CloudStorage" />
           <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
 
 
@@ -28,7 +28,7 @@
         </head>
         <body>
       <div id="search">
-        <span id="doc-title">Spark NLP 5.1.4 ScalaDoc<span id="doc-version"></span></span>
+        <span id="doc-title">Spark NLP 5.2.0 ScalaDoc<span id="doc-version"></span></span>
         <span class="close-results"><span class="left">&lt;</span> Back</span>
         <div id="textfilter">
           <span class="input">