pytorch · cjyabraham · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/_ecosystem/opencompass b/_ecosystem/opencompass
@@ -0,0 +1,10 @@
+---
+layout: ecosystem_detail
+title: OpenCompass
+summary: OpenCompass is an LLM evaluation platform, supporting a wide range of models (Llama3, Mistral, InternLM2,GPT-4,LLaMa2, Qwen,GLM, Claude, etc) over 100+ datasets.
+link: https://opencompass.org.cn/
+summary-home: OpenCompass is an LLM evaluation platform, supporting a wide range of models (Llama3, Mistral, InternLM2,GPT-4,LLaMa2, Qwen,GLM, Claude, etc) over 100+ datasets.
+featured-home: false
+github-id: open-compass/opencompass
+date-added: 12/18/24
+---
diff --git a/_get_started/get-started-via-cloud-partners.md b/_get_started/get-started-via-cloud-partners.md
@@ -32,11 +32,15 @@ get-started-via-cloud: true
 {% include_relative installation/google-cloud.md %}
 {% endcapture %}
 
+{% capture lightning-studios %}
+{% include_relative installation/lightning-studios.md %}
+{% endcapture %}
 
 <div id="cloud">
   <div class="platform aws">{{aws | markdownify }}</div>
   <div class="platform google-cloud">{{google-cloud | markdownify }}</div>
   <div class="platform microsoft-azure">{{azure | markdownify }}</div>
+  <div class="platform lightning-studios">{{lightning-studios | markdownify }}</div>
 </div>
 
 <script page-id="get-started-via-cloud-partners" src="{{ site.baseurl }}/assets/menu-tab-selection.js"></script>

diff --git a/_get_started/installation/lightning-studios.md b/_get_started/installation/lightning-studios.md
@@ -0,0 +1,35 @@
+# Using PyTorch with Lightning Studios
+{:.no_toc}
+
+Lightning Studios let you fully experience PyTorch and its ecosystem on accelerated compute in seconds. You can pick a GPU and customize from your browser or any local IDE with zero setup.
+
+**Lightning Studios provide:**
+
+* ready-to-use environments that come with PyTorch and PyTorch Lightning pre-installed
+* accelerated computing on GPUs such as L4, L40S, and H100, and the ability to switch between them in seconds
+* optimized multi-node training, to scale up PyTorch training jobs across machines
+
+Lightning Studios enable you to share fully reproducible environments preloaded with everything you need to build AI systems, like data processing, pretraining, finetuning, inference, etc. Our library of 2K community-built, open sourced templates have pre-installed dependencies, model weights, data, code and more.
+
+## Getting Started
+{: #ls-getting-started}
+
+
+* Go to [lightning.ai](http://lightning.ai/)
+* Sign up (you get 22 free GPU hours monthly)
+* Start up your first Studio
+* Or duplicate one of our templates at [lightning.ai/studios](http://lightning.ai/studios)
+
+**With Studios, you can:**
+
+* Pay-as-you-go
+* Get GPUs from $0.40 p/h
+* Use your own AWS credits
+* Access 24/7 Enterprise support
+
+## Build AI, not infrastructure
+{: #ls-build}
+
+With Lightning Studios, you can easily build AI products with full and low code tools in one place, plus access GPUs, train models and deploy.
+
+AI products like Stable Diffusion and NVIDIA’s NeMo are built with Lightning. Whether you're experimenting with your first model, AI app, or deploying AI at enterprise scale. Lightning powers every stage — even pretraining LLMs on 10,000+ GPUs.
diff --git a/_includes/quick-start-module.js b/_includes/quick-start-module.js
@@ -27,6 +27,7 @@ var supportedCloudPlatforms = [
   'aws',
   'google-cloud',
   'microsoft-azure',
+  'lightning-studios',
 ];
 
 var os = $(".os > .option");

diff --git a/_includes/quick_start_cloud_options.html b/_includes/quick_start_cloud_options.html
@@ -44,4 +44,15 @@
       </ul>
     </div>
   </div>
+
+  <div class="cloud-option-row">
+    <div class="cloud-option" data-toggle="cloud-dropdown">
+      <div class="cloud-option-body lightning-studios" id="lightning-studios">
+        Lightning Studios
+      </div>
+      <ul>
+        <li><a href="https://lightning.ai/">lightning.ai</a></li>
+      </ul>
+    </div>
+  </div>
 </div>
diff --git a/_posts/2024-12-18-doctr-joins-pytorch-ecosystem.md b/_posts/2024-12-18-doctr-joins-pytorch-ecosystem.md
@@ -0,0 +1,169 @@
+---
+layout: blog_detail
+title: "docTR joins PyTorch Ecosystem: From Pixels to Data, Building a Recognition Pipeline with PyTorch and docTR"
+author: Olivier Dulcy & Sebastian Olivera, Mindee
+---
+
+![docTR logo](/assets/images/doctr-joins-pytorch-ecosystem/fg1.png){:style="width:100%;display: block;max-width:400px; margin-left:auto; margin-right:auto;"}
+
+We’re thrilled to announce that the docTR project has been integrated into the PyTorch ecosystem! This integration ensures that docTR aligns with PyTorch’s standards and practices, giving developers a reliable, community-backed solution for powerful OCR workflows.
+
+**For more information on what it means to be a PyTorch ecosystem project, see the [PyTorch Ecosystem Tools page](https://pytorch.org/ecosystem/).**
+
+
+## About docTR
+
+docTR is an Apache 2.0 project developed and distributed by [Mindee](https://www.mindee.com/) to help developers integrate OCR capabilities into applications with no prior knowledge required.
+
+To quickly and efficiently extract text information, docTR uses a two-stage approach:
+
+
+
+* First, it performs text **detection** to localize words.
+* Then, it conducts text **recognition** to identify all characters in a word.
+
+**Detection** and **recognition** are performed by state-of-the-art models written in PyTorch. To learn more about this approach, you can refer [to the docTR documentation](https://mindee.github.io/doctr/using_doctr/using_models.html).
+
+docTR enhances the user experience in PyTorch projects by providing high-performance OCR capabilities right out of the box. Its specially designed models require minimal to no fine-tuning for common use cases, allowing developers to quickly integrate advanced document analysis features.
+
+
+## Local installation
+
+docTR requires Python >= 3.10 and supports Windows, Mac and Linux. Please refer to our [README](https://github.com/mindee/doctr?tab=readme-ov-file#installation) for necessary dependencies for MacBook with the M1 chip.
+
+```
+pip3 install -U pip
+pip3 install "python-doctr[torch,viz]"
+```
+
+This will install docTR along with the latest version of PyTorch.
+
+
+```
+Note: docTR also provides docker images for an easy deployment, such as a part of Kubernetes cluster.
+```
+
+
+
+## Text recognition
+
+Now, let’s try docTR’s OCR recognition on this sample:
+
+
+![OCR sample](/assets/images/doctr-joins-pytorch-ecosystem/fg2.jpg){:style="width:100%;display: block;max-width:300px; margin-left:auto; margin-right:auto;"}
+
+
+The OCR recognition model expects an image with only one word on it and will output the predicted word with a confidence score. You can use the following snippet to test OCR capabilities from docTR:
+
+```
+python
+from doctr.io import DocumentFile
+from doctr.models import recognition_predictor
+
+doc = DocumentFile.from_images("/path/to/image")
+
+# Load the OCR model
+# This will download pre-trained models hosted by Mindee
+model = recognition_predictor(pretrained=True)
+
+result = model(doc)
+print(result)
+```
+
+Here, the most important line of code is `model = recognition_predictor(pretrained=True)`. This will load a default text recognition model, `crnn_vgg16_bn`, but you can select other models through the `arch` parameter. You can check out the [available architectures](https://mindee.github.io/doctr/using_doctr/using_models.html).
+
+When run on the sample, the recognition predictor retrieves the following data: `[('MAGAZINE', 0.9872216582298279)]`
+
+
+```
+Note: using the DocumentFile object docTR provides an easy way to manipulate PDF or Images.
+```
+
+
+
+## Text detection
+
+The last example was a crop on a single word. Now, what about an image with several words on it, like this one?
+
+
+![photo of magazines](/assets/images/doctr-joins-pytorch-ecosystem/fg3.jpg){:style="width:100%;display: block;max-width:300px; margin-left:auto; margin-right:auto;"}
+
+
+A text detection model is used before the text recognition to output a segmentation map representing the location of the text. Following that, the text recognition is applied on every detected patch.
+
+Below is a snippet to run only the detection part:
+
+```
+from doctr.io import DocumentFile
+from doctr.models import detection_predictor
+from matplotlib import pyplot as plt
+from doctr.utils.geometry import detach_scores
+from doctr.utils.visualization import draw_boxes
+
+doc = DocumentFile.from_images("path/to/my/file")
+model = detection_predictor(pretrained=True)
+
+result = model(doc)
+
+draw_boxes(detach_scores([result[0]["words"]])[0][0], doc[0])
+plt.axis('off')
+plt.show()
+```
+
+Running it on the full sample yields the following:
+
+
+![photo of magazines](/assets/images/doctr-joins-pytorch-ecosystem/fg4.png){:style="width:100%;display: block;max-width:300px; margin-left:auto; margin-right:auto;"}
+
+
+Similarly to the text recognition, `detection_predictor` will load a default model (`fast_base` here). You can also load another one by providing it through the `arch` parameter.
+
+
+## The full implementation
+
+Now, let’s plug both components into the same pipeline. 
+
+Conveniently, docTR provides a wrapper that does exactly that for us:
+
+```
+from doctr.io import DocumentFile
+from doctr.models import ocr_predictor
+
+doc = DocumentFile.from_images("/path/to/image")
+
+model = ocr_predictor(pretrained=True, assume_straight_pages=False)
+
+result = model(doc)
+result.show()
+```
+
+![photo of magazines](/assets/images/doctr-joins-pytorch-ecosystem/fg5.png){:style="width:100%;display: block;max-width:300px; margin-left:auto; margin-right:auto;"}
+
+The last line should display a matplotlib window which shows the detected patches. Hovering the mouse over them will display their contents.
+
+You can also do more with this output, such as reconstituting a synthetic document like so:
+
+```
+import matplotlib.pyplot as plt
+
+synthetic_pages = result.synthesize()
+plt.imshow(synthetic_pages[0])
+plt.axis('off')
+plt.show()
+```
+
+![black text on white](/assets/images/doctr-joins-pytorch-ecosystem/fg6.png){:style="width:100%;display: block;max-width:300px; margin-left:auto; margin-right:auto;"}
+
+
+The pipeline is highly customizable, where you can modify the detection or recognition model behaviors by passing arguments to the `ocr_predictor`. Please refer to the [documentation](https://mindee.github.io/doctr/using_doctr/using_models.html) to learn more about it. 
+
+
+## Conclusion
+
+We’re excited to welcome docTR into the PyTorch Ecosystem, where it seamlessly integrates with PyTorch pipelines to deliver state-of-the-art OCR capabilities right out of the box. 
+
+By empowering developers to quickly extract text from images or PDFs using familiar tooling, docTR simplifies complex document analysis tasks and enhances the overall PyTorch experience.
+
+We invite you to explore the [docTR GitHub repository](https://github.com/mindee/doctr), join the [docTR community on Slack](https://slack.mindee.com/), and reach out at [email protected] for inquiries or collaboration opportunities. 
+
+Together, we can continue to push the boundaries of document understanding and develop even more powerful, accessible tools for everyone in the PyTorch community.
diff --git a/_sass/quick-start-module.scss b/_sass/quick-start-module.scss
@@ -316,6 +316,10 @@
     content: url($baseurl + "/assets/images/microsoft-azure-logo.svg");
   }
 
+  &.lightning-studios:before {
+    content: url($baseurl + "/assets/images/lightning-studios-logo.svg");
+  }
+
   &.google-cloud:before {
     content: url($baseurl + "/assets/images/google-cloud-logo.svg");
   }

diff --git a/assets/get-started-sidebar.js b/assets/get-started-sidebar.js
@@ -4,7 +4,7 @@ $([".macos", ".linux", ".windows"]).each(function(index, osClass) {
   buildSidebarMenu(osClass, "#get-started-locally-sidebar-list");
 });
 
-$([".alibaba", ".aws", ".microsoft-azure", ".google-cloud"]).each(function(index, cloudPartner) {
+$([".alibaba", ".aws", ".microsoft-azure", ".google-cloud", ".lightning-studios"]).each(function(index, cloudPartner) {
   buildSidebarMenu(cloudPartner, "#get-started-cloud-sidebar-list");
 });
 
@@ -15,7 +15,7 @@ $(["macos", "linux", "windows"]).each(function(index, osClass) {
 });
 
 // Show cloud partner side nav on click or hide side nav if already open 
-$(["alibaba", "aws", "microsoft-azure", "google-cloud"]).each(function(index, sidebarClass) {
+$(["alibaba", "aws", "microsoft-azure", "google-cloud", "lightning-studios"]).each(function(index, sidebarClass) {
   $("#" + sidebarClass).click(function() {
     showSidebar(sidebarClass, ".get-started-cloud-sidebar li");
     // alibaba filter for centering cloud module

diff --git a/assets/images/doctr-joins-pytorch-ecosystem/fg1.png b/assets/images/doctr-joins-pytorch-ecosystem/fg1.png
diff --git a/assets/images/doctr-joins-pytorch-ecosystem/fg2.jpg b/assets/images/doctr-joins-pytorch-ecosystem/fg2.jpg
diff --git a/assets/images/doctr-joins-pytorch-ecosystem/fg3.jpg b/assets/images/doctr-joins-pytorch-ecosystem/fg3.jpg
diff --git a/assets/images/doctr-joins-pytorch-ecosystem/fg4.png b/assets/images/doctr-joins-pytorch-ecosystem/fg4.png
diff --git a/assets/images/doctr-joins-pytorch-ecosystem/fg5.png b/assets/images/doctr-joins-pytorch-ecosystem/fg5.png
diff --git a/assets/images/doctr-joins-pytorch-ecosystem/fg6.png b/assets/images/doctr-joins-pytorch-ecosystem/fg6.png
diff --git a/assets/images/lightning-studios-logo.svg b/assets/images/lightning-studios-logo.svg
diff --git a/assets/quick-start-module.js b/assets/quick-start-module.js
@@ -27,6 +27,7 @@ var supportedCloudPlatforms = [
   'aws',
   'google-cloud',
   'microsoft-azure',
+  'lightning-studios',
 ];
 
 var os = $(".os > .option");