diff --git a/README.md b/README.md index 0bb61681..63457e9d 100644 --- a/README.md +++ b/README.md @@ -24,59 +24,59 @@ Familiarity with Machine Learning and Python development is recommended. For mor ## 🗄️ Table of Content: -- [QuickStart](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/quickstart.ipynb): Introductory tutorial to get started quickly. +- [QuickStart](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/quickstart.ipynb): Introductory tutorial to get started quickly. ### 🚀 Real-time AI Systems -- [Fraud Online](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/real-time-ai-systems/fraud_online): Detect Fraud Transactions -- [AML](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/real-time-ai-systems/aml): Anti-money laundering predictions -- [TikTok RecSys](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/real-time-ai-systems/tiktok_recsys): TikTok-style recommendation system -- [TimeSeries](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/real-time-ai-systems/timeseries): Timeseries price prediction +- [Fraud Online](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/real-time-ai-systems/fraud_online): Detect Fraud Transactions +- [AML](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/real-time-ai-systems/aml): Anti-money laundering predictions +- [TikTok RecSys](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/real-time-ai-systems/tiktok_recsys): TikTok-style recommendation system +- [TimeSeries](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/real-time-ai-systems/timeseries): Timeseries price prediction ### ⚙️ Batch AI Systems -- [Loan Approval](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/batch-ai-systems/loan_approval): Predict loan approvals -- [Fraud Batch](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/batch-ai-systems/fraud_batch): Detect Fraud Transactions -- [Churn](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/batch-ai-systems/churn): Predict customers at risk of churning -- [Credit Scores](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/batch-ai-systems/credit_scores): Predict clients' repayment abilities -- [Hospital Wait Time](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/batch-ai-systems/hospital_wait_time): Predict waiting time for deceased donor kidneys -- [NYC Taxi Fares](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/batch-ai-systems/nyc_taxi_fares): Predict NYC taxi fare amounts +- [Loan Approval](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/batch-ai-systems/loan_approval): Predict loan approvals +- [Fraud Batch](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/batch-ai-systems/fraud_batch): Detect Fraud Transactions +- [Churn](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/batch-ai-systems/churn): Predict customers at risk of churning +- [Credit Scores](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/batch-ai-systems/credit_scores): Predict clients' repayment abilities +- [Hospital Wait Time](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/batch-ai-systems/hospital_wait_time): Predict waiting time for deceased donor kidneys +- [NYC Taxi Fares](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/batch-ai-systems/nyc_taxi_fares): Predict NYC taxi fare amounts ### 🔮 LLM AI Systems -- [Fraud Cheque Detection](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/llm-ai-systems/fraud_cheque_detection): AI assistant for detecting fraudulent scanned cheques -- [LLM PDF](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/llm-ai-systems/llm_pdfs): RAG-based AI assistant for PDF document Q&A -- [Recommender System](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/llm-ai-systems/recommender-system): Fashion items recommender system +- [Fraud Cheque Detection](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/llm-ai-systems/fraud_cheque_detection): AI assistant for detecting fraudulent scanned cheques +- [LLM PDF](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/llm-ai-systems/llm_pdfs): RAG-based AI assistant for PDF document Q&A +- [Recommender System](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/llm-ai-systems/recommender-system): Fashion items recommender system ### 🧬 API Examples - Vector Similarity Search: - - [Feature Group Embeddings API](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/vector_similarity_search/1_feature_group_embeddings_api.ipynb) - - [Feature View Embeddings API](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/vector_similarity_search/2_feature_view_embeddings_api.ipynb) -- [Datasets](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/datasets.ipynb) -- [Feature Group Change Notification CDC](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/feature_group_change_notification_cdc.ipynb) -- [Feature Monitoring](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/feature_monitoring.ipynb) -- [Git Integration](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/git.ipynb) -- [Jobs Management](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/jobs.ipynb) -- [Kafka Integration](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/kafka.ipynb) -- [OpenSearch Integration](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/opensearch.ipynb) -- [Projects Management](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/projects.ipynb) -- [Secrets Management](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/secrets.ipynb) + - [Feature Group Embeddings API](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/vector_similarity_search/1_feature_group_embeddings_api.ipynb) + - [Feature View Embeddings API](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/vector_similarity_search/2_feature_view_embeddings_api.ipynb) +- [Datasets](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/datasets.ipynb) +- [Feature Group Change Notification CDC](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/feature_group_change_notification_cdc.ipynb) +- [Feature Monitoring](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/feature_monitoring.ipynb) +- [Git Integration](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/git.ipynb) +- [Jobs Management](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/jobs.ipynb) +- [Kafka Integration](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/kafka.ipynb) +- [OpenSearch Integration](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/opensearch.ipynb) +- [Projects Management](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/projects.ipynb) +- [Secrets Management](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/secrets.ipynb) ### 🔬 Integrations -- [Airflow GCP](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/airflow_gcp): Apache Airflow integration with Google Cloud Platform. -- [AzureSQL](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/azuresql): Create an External Feature Group using Azure SQL Database. -- [BigQuery](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/big_query): Create an External Feature Group using BigQuery Storage Connector. -- [Bytewax](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/bytewax): Real-time feature computation using Bytewax. -- [DBT with BigQuery](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/dbt_bq): Perform feature engineering in DBT on BigQuery. -- [Federated Offline Query](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/federated-offline-query): Execute federated queries across offline data sources. -- [Google Cloud Storage](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/gcs): Create an External Feature Group using GCS Storage Connector. -- [Great Expectations](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/great_expectations): Introduction to Great Expectations concepts for Hopsworks MLOps platform. -- [Java](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/java): Java-based integrations including Apache Beam and Apache Flink. -- [LangChain](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/langchain): Integration with LangChain for LLM applications. -- [MageAI](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/mage_ai): Build and operate ML systems with Mage and Hopsworks. -- [Neo4j](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/neo4j): Perform Anti-money laundering predictions using Neo4j Graph. -- [Polars](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/polars): Introductory tutorial on using Polars with Hopsworks. -- [PySpark Streaming](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/pyspark_streaming): Real-time feature computation using PySpark. -- [Redshift](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/redshift): Create an External Feature Group using Redshift Storage Connector. -- [Snowflake](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/snowflake): Create an External Feature Group using Snowflake Storage Connector. -- [Weights & Biases](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/integrations/wandb): Build machine learning models with Weights & Biases. +- [Airflow GCP](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/airflow_gcp): Apache Airflow integration with Google Cloud Platform. +- [AzureSQL](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/azuresql): Create an External Feature Group using Azure SQL Database. +- [BigQuery](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/big_query): Create an External Feature Group using BigQuery Storage Connector. +- [Bytewax](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/bytewax): Real-time feature computation using Bytewax. +- [DBT with BigQuery](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/dbt_bq): Perform feature engineering in DBT on BigQuery. +- [Federated Offline Query](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/federated-offline-query): Execute federated queries across offline data sources. +- [Google Cloud Storage](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/gcs): Create an External Feature Group using GCS Storage Connector. +- [Great Expectations](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/great_expectations): Introduction to Great Expectations concepts for Hopsworks MLOps platform. +- [Java](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/java): Java-based integrations including Apache Beam and Apache Flink. +- [LangChain](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/langchain): Integration with LangChain for LLM applications. +- [MageAI](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/mage_ai): Build and operate ML systems with Mage and Hopsworks. +- [Neo4j](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/neo4j): Perform Anti-money laundering predictions using Neo4j Graph. +- [Polars](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/polars): Introductory tutorial on using Polars with Hopsworks. +- [PySpark Streaming](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/pyspark_streaming): Real-time feature computation using PySpark. +- [Redshift](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/redshift): Create an External Feature Group using Redshift Storage Connector. +- [Snowflake](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/snowflake): Create an External Feature Group using Snowflake Storage Connector. +- [Weights & Biases](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/integrations/wandb): Build machine learning models with Weights & Biases. ## 📝 Feedback & Comments: We welcome your input through: diff --git a/api_examples/vector_similarity_search/1_feature_group_embeddings_api.ipynb b/api_examples/vector_similarity_search/1_feature_group_embeddings_api.ipynb index 47188f4f..b255003f 100644 --- a/api_examples/vector_similarity_search/1_feature_group_embeddings_api.ipynb +++ b/api_examples/vector_similarity_search/1_feature_group_embeddings_api.ipynb @@ -324,7 +324,7 @@ "\n", "## ➡️ Next step\n", "\n", - "Now you are able to search articles using natural language. You can learn how to rank the result in [this tutorial](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/vector_similarity_search/2_feature_view_embeddings_api.ipynb)." + "Now you are able to search articles using natural language. You can learn how to rank the result in [this tutorial](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/vector_similarity_search/2_feature_view_embeddings_api.ipynb)." ] } ], diff --git a/api_examples/vector_similarity_search/2_feature_view_embeddings_api.ipynb b/api_examples/vector_similarity_search/2_feature_view_embeddings_api.ipynb index d5e53768..19358273 100644 --- a/api_examples/vector_similarity_search/2_feature_view_embeddings_api.ipynb +++ b/api_examples/vector_similarity_search/2_feature_view_embeddings_api.ipynb @@ -13,7 +13,7 @@ "id": "8988ff65", "metadata": {}, "source": [ - "In the [previous tutorial](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/api_examples/vector_similarity_search/1_feature_group_embeddings_api.ipynb), you learned how to search news articles using natural language queries. In this tutorial, we will focus on ranking the search results to make them more useful and relevant.\n", + "In the [previous tutorial](https://github.com/logicalclocks/hopsworks-tutorials/tree/branch-4.5/api_examples/vector_similarity_search/1_feature_group_embeddings_api.ipynb), you learned how to search news articles using natural language queries. In this tutorial, we will focus on ranking the search results to make them more useful and relevant.\n", "\n", "To achieve this, we will use the number of views as a scoring metric for news articles, as it reflects their popularity. The steps are as follows:\n", "\n", diff --git a/batch-ai-systems/churn/1_churn_feature_pipeline.ipynb b/batch-ai-systems/churn/1_churn_feature_pipeline.ipynb index e6d39c6a..bb087d18 100644 --- a/batch-ai-systems/churn/1_churn_feature_pipeline.ipynb +++ b/batch-ai-systems/churn/1_churn_feature_pipeline.ipynb @@ -7,7 +7,7 @@ "source": [ "# **Hopsworks Feature Store** - Part 01: Feature Pipeline\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/1_churn_feature_pipeline.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/churn/1_churn_feature_pipeline.ipynb)\n", "\n", "\n", "## 🗒️ This notebook is divided into the following sections:\n", @@ -367,7 +367,7 @@ "\n", "In the following notebook you will use your feature groups to create a train dataset, train a model and add a trained model to model registry.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/2_churn_training_pipeline.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/churn/2_churn_training_pipeline.ipynb)" ] } ], diff --git a/batch-ai-systems/churn/2_churn_training_pipeline.ipynb b/batch-ai-systems/churn/2_churn_training_pipeline.ipynb index a922bd33..f0be6d79 100644 --- a/batch-ai-systems/churn/2_churn_training_pipeline.ipynb +++ b/batch-ai-systems/churn/2_churn_training_pipeline.ipynb @@ -6,7 +6,7 @@ "source": [ "# **Hopsworks Feature Store** - Part 02: Training Pipeline\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/2_churn_training_pipeline.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/churn/2_churn_training_pipeline.ipynb)\n", "\n", "This is the second part of the quick start series of tutorials about predicting customers that are at risk of churning with the Hopsworks Feature Store.\n", "\n", @@ -455,7 +455,7 @@ "\n", "In the following notebook you will use your model for batch inference.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/3_churn_batch_inference.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/churn/3_churn_batch_inference.ipynb)\n", "\n", "---" ] diff --git a/batch-ai-systems/churn/3_churn_batch_inference.ipynb b/batch-ai-systems/churn/3_churn_batch_inference.ipynb index b6ac1d78..643d74cd 100644 --- a/batch-ai-systems/churn/3_churn_batch_inference.ipynb +++ b/batch-ai-systems/churn/3_churn_batch_inference.ipynb @@ -7,7 +7,7 @@ "source": [ "# **Hopsworks Feature Store** - Part 03: Batch Inference\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/3_churn_batch_inference.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/churn/3_churn_batch_inference.ipynb)" ] }, { @@ -451,7 +451,7 @@ "> `conda activate ./miniconda/envs/hopsworks`
\n", "> `python -m streamlit run churn/streamlit_app.py`
\n", "\n", - "**⚠️** If you are running on Colab, you will need to follow a different procedure. As highlighted in this [notebook](https://colab.research.google.com/github/mrm8488/shared_colab_notebooks/blob/master/Create_streamlit_app.ipynb). " + "**⚠️** If you are running on Colab, you will need to follow a different procedure. As highlighted in this [notebook](https://colab.research.google.com/github/mrm8488/shared_colab_notebooks/blob/branch-4.5/Create_streamlit_app.ipynb). " ] }, { diff --git a/batch-ai-systems/nyc_taxi_fares/README.md b/batch-ai-systems/nyc_taxi_fares/README.md index 4f157005..a19fc8e1 100644 --- a/batch-ai-systems/nyc_taxi_fares/README.md +++ b/batch-ai-systems/nyc_taxi_fares/README.md @@ -30,7 +30,7 @@ Also, you obviously need to have [streamlit](https://docs.streamlit.io/library/g ## Data -You will generate random data for this tutorial. See corresponding functions in the [functions.py](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/advanced_tutorials/nyc_taxi_fares/functions.py). +You will generate random data for this tutorial. See corresponding functions in the [functions.py](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/advanced_tutorials/nyc_taxi_fares/functions.py). ## Streamlit run diff --git a/benchmarks/online-inference-pipeline/README.md b/benchmarks/online-inference-pipeline/README.md index 6eed969c..b94b5d0c 100644 --- a/benchmarks/online-inference-pipeline/README.md +++ b/benchmarks/online-inference-pipeline/README.md @@ -8,26 +8,26 @@ This repository benchmarks a deployment running inside **Hopsworks** using [Locu - Run all the provided notebooks to set up your deployment inside Hopsworks. 2. **Configure Target Host** - - Add the **host name** and **IP address** of your deployment in [`locustfile.py`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/online-inference-pipeline/locust/locustfile.py#L12). + - Add the **host name** and **IP address** of your deployment in [`locustfile.py`](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/benchmarks/online-inference-pipeline/locust/locustfile.py#L12). - You can find this information in the Hopsworks **Deployment UI**. 3. **Add Hopsworks API Key** - - Insert your Hopsworks API key into the same [`locustfile.py`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/online-inference-pipeline/locust/locustfile.py#L12). + - Insert your Hopsworks API key into the same [`locustfile.py`](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/benchmarks/online-inference-pipeline/locust/locustfile.py#L12). - Generate the API key by following [this guide](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). 4. **Create the 'HOPSWORKS_API_KEY' secret** - Create a secret with the name `HOPSWORKS_API_KEY` which contains the API key by following [this guide](https://docs.hopsworks.ai/latest/user_guides/projects/secrets/create_secret/). 5. **Build the Locust Docker Image** - - Use the provided [Dockerfile](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/online-inference-pipeline/locust/Dockerfile) to build a Locust image. + - Use the provided [Dockerfile](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/benchmarks/online-inference-pipeline/locust/Dockerfile) to build a Locust image. - Push the image to your preferred container registry. 6. **Update Kubernetes Manifests** - Update the image URL in both: - - [`master-deployment.yaml`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/online-inference-pipeline/locust/kubernetes-locust/master-deployment.yaml#L28) - - [`slave-deployment.yaml`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/online-inference-pipeline/locust/kubernetes-locust/slave-deployment.yaml#L28) + - [`master-deployment.yaml`](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/benchmarks/online-inference-pipeline/locust/kubernetes-locust/master-deployment.yaml#L28) + - [`slave-deployment.yaml`](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/benchmarks/online-inference-pipeline/locust/kubernetes-locust/slave-deployment.yaml#L28) 7. **Deploy Locust** - - Run the [deployment script](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/online-inference-pipeline/locust/kubernetes-locust/deploy.sh) to deploy Locust master and worker nodes. + - Run the [deployment script](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/benchmarks/online-inference-pipeline/locust/kubernetes-locust/deploy.sh) to deploy Locust master and worker nodes. - This will deploy into a Kubernetes namespace named `locust`. - **Note:** Ensure you have `kubectl` access to the cluster. @@ -57,4 +57,4 @@ One benchmark that has been performed targets **5000 RPS** with a **P99 latency The high number of replicas for predictors is necessary to mitigate the effects of Python's [Global Interpreter Lock (GIL)](https://wiki.python.org/moin/GlobalInterpreterLock). This allows for greater parallelism and lower latency, especially at high RPS. -You can view the full benchmark report generated by Locust [here](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/locust_reports/locust_report_5k_rps_25_batch_size.pdf). +You can view the full benchmark report generated by Locust [here](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/locust_reports/locust_report_5k_rps_25_batch_size.pdf). diff --git a/integrations/great_expectations/Great_Expectations_Hopsworks_Concepts.ipynb b/integrations/great_expectations/Great_Expectations_Hopsworks_Concepts.ipynb index 66c44762..498eeebd 100644 --- a/integrations/great_expectations/Great_Expectations_Hopsworks_Concepts.ipynb +++ b/integrations/great_expectations/Great_Expectations_Hopsworks_Concepts.ipynb @@ -9,7 +9,7 @@ "# Short Introduction to Great Expectations Concepts on Hopsworks\n", "\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/integrations/great_expectations/Great_Expectations_Hopsworks_Concepts.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/integrations/great_expectations/Great_Expectations_Hopsworks_Concepts.ipynb)" ] }, { @@ -348,7 +348,7 @@ "source": [ "### Next Step : Data Validation with Great Expectation applied to the Fraud tutorial\n", "\n", - "Check it out here : [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/integrations/great_expectations/fraud_batch_data_validation.ipynb)" + "Check it out here : [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/integrations/great_expectations/fraud_batch_data_validation.ipynb)" ] }, { diff --git a/integrations/great_expectations/fraud_batch_data_validation.ipynb b/integrations/great_expectations/fraud_batch_data_validation.ipynb index dc987d80..9858efe1 100644 --- a/integrations/great_expectations/fraud_batch_data_validation.ipynb +++ b/integrations/great_expectations/fraud_batch_data_validation.ipynb @@ -19,7 +19,7 @@ "source": [ "# Data Validation using Hopsworks integration with Great Expectations \n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/integrations/great_expectations/fraud_batch_data_validation.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/integrations/great_expectations/fraud_batch_data_validation.ipynb)\n", "\n", "**Note**: you may get an error when installing hopsworks on Colab, and it is safe to ignore it.\n", "\n", @@ -47,7 +47,7 @@ "source": [ "\n", "Check the step 1 in the fraud batch tutorial to learn more about Feature Group : \n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/fraud_batch/1_feature_groups.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/fraud_batch/1_feature_groups.ipynb)\n", "\n", "\n", "## 🗒️ This notebook is divided in 5 sections:\n", diff --git a/integrations/langchain/news-search-langchain.ipynb b/integrations/langchain/news-search-langchain.ipynb index 0e8440a3..8f9a2890 100644 --- a/integrations/langchain/news-search-langchain.ipynb +++ b/integrations/langchain/news-search-langchain.ipynb @@ -15,7 +15,7 @@ "source": [ "In this tutorial, you will learn how to create a news search bot which can answer users' question about news using Opensearch in Hopsworks with Langchain. Concretely, you will create a RAG (Retrieval-Augmented Generation) application which searches news matching users' questions, and answers the question using a LLM with the retrieved news as the context.\n", "The steps include:\n", - "1. [Ingest news data to Hopsworks](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/api_examples/hsfs/knn_search/news-search-knn.ipynb)\n", + "1. [Ingest news data to Hopsworks](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/api_examples/hsfs/knn_search/news-search-knn.ipynb)\n", "2. Setup a `vectorstores` in Langchain using Opensearch in Hopsworks\n", "3. Create a LLM using model from huggingface\n", "4. Create a RAG application using `RetrievalQA` chain in Langchain" @@ -34,7 +34,7 @@ "id": "ba83a905-3944-4bf1-b4d7-43d4336f0beb", "metadata": {}, "source": [ - "You need to run this [notebook](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/api_examples/hsfs/knn_search/news-search-knn.ipynb) to ingest news data to Hopsworks." + "You need to run this [notebook](https://github.com/logicalclocks/hopsworks-tutorials/blob/branch-4.5/api_examples/hsfs/knn_search/news-search-knn.ipynb) to ingest news data to Hopsworks." ] }, { diff --git a/integrations/polars/quickstart_polars.ipynb b/integrations/polars/quickstart_polars.ipynb index 0ce839b5..cf1ff0b1 100644 --- a/integrations/polars/quickstart_polars.ipynb +++ b/integrations/polars/quickstart_polars.ipynb @@ -13,7 +13,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/quickstart.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/quickstart.ipynb)" ] }, { diff --git a/integrations/wandb/1_feature_groups.ipynb b/integrations/wandb/1_feature_groups.ipynb index 48d07b0d..0a4b75bc 100755 --- a/integrations/wandb/1_feature_groups.ipynb +++ b/integrations/wandb/1_feature_groups.ipynb @@ -15,7 +15,7 @@ "source": [ "# Part 01: Load, Engineer & Connect\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/integrations/wandb/1_feature_groups.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/integrations/wandb/1_feature_groups.ipynb)\n", "\n", "**Note**: you may get an error when installing hopsworks on Colab, and it is safe to ignore it.\n", "\n", @@ -433,7 +433,7 @@ "\n", "In the following notebook you will use your feature groups to create a dataset you can train a model on.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/integrations/wandb/2_feature_view_creation.ipynb)\n" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/integrations/wandb/2_feature_view_creation.ipynb)\n" ] } ], diff --git a/integrations/wandb/2_feature_view_creation.ipynb b/integrations/wandb/2_feature_view_creation.ipynb index 97e00e5b..c37ccc71 100755 --- a/integrations/wandb/2_feature_view_creation.ipynb +++ b/integrations/wandb/2_feature_view_creation.ipynb @@ -13,7 +13,7 @@ "source": [ "# Part 02: Training Data & Feature views\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/integrations/wandb/2_feature_view_creation.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/integrations/wandb/2_feature_view_creation.ipynb)\n", "\n", "**Note**: you may get an error when installing hopsworks on Colab, and it is safe to ignore it.\n", "\n", @@ -195,7 +195,7 @@ "\n", "In the following notebook, you will train a model on the dataset you created in this notebook.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/integrations/wandb/3_model_training.ipynb)\n" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/integrations/wandb/3_model_training.ipynb)\n" ] } ], diff --git a/integrations/wandb/3_model_training.ipynb b/integrations/wandb/3_model_training.ipynb index f37d2481..6803c822 100644 --- a/integrations/wandb/3_model_training.ipynb +++ b/integrations/wandb/3_model_training.ipynb @@ -15,7 +15,7 @@ "source": [ "# Part 03: Model training with Weights & Biases & UI Exploration\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/integrations/wandb/3_model_training.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/branch-4.5/integrations/wandb/3_model_training.ipynb)\n", "\n", "**Note**: you may get an error when installing hopsworks on Colab, and it is safe to ignore it.\n", "\n", diff --git a/quickstart.ipynb b/quickstart.ipynb index f8113b6b..06048aae 100644 --- a/quickstart.ipynb +++ b/quickstart.ipynb @@ -1102,7 +1102,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "tutorial", "language": "python", "name": "python3" }, @@ -1116,7 +1116,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.5" + "version": "3.11.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/real-time-ai-systems/fraud_online/1_fraud_online_feature_pipeline.ipynb b/real-time-ai-systems/fraud_online/1_fraud_online_feature_pipeline.ipynb index c82af3e0..ef906efb 100644 --- a/real-time-ai-systems/fraud_online/1_fraud_online_feature_pipeline.ipynb +++ b/real-time-ai-systems/fraud_online/1_fraud_online_feature_pipeline.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "49806257", "metadata": {}, "outputs": [], @@ -89,10 +89,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "27f2b52e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cc_numgender
04796807885357879F
14529266636192966F
24922690008243953F
\n", + "
" + ], + "text/plain": [ + " cc_num gender\n", + "0 4796807885357879 F\n", + "1 4529266636192966 F\n", + "2 4922690008243953 F" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Read the profiles data from a CSV file\n", "profiles_df = pd.read_csv(\n", @@ -112,10 +169,109 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "713a9568", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tiddatetimecc_numcategoryamountlatitudelongitudecitycountryfraud_label
011df919988c134d97bbff2678eb68e222022-01-01 00:00:244473593503484549Health/Beauty62.9542.30865-83.48216CantonUS0
1dd0b2d6d4266ccd3bf05bc2ea91cf1802022-01-01 00:00:564272465718946864Grocery85.4533.52253-117.70755Laguna NiguelUS0
2e627f5d9a9739833bd52d2da51761fc32022-01-01 00:02:324104216579248948Domestic Transport21.6337.60876-77.37331MechanicsvilleUS0
\n", + "
" + ], + "text/plain": [ + " tid datetime cc_num \\\n", + "0 11df919988c134d97bbff2678eb68e22 2022-01-01 00:00:24 4473593503484549 \n", + "1 dd0b2d6d4266ccd3bf05bc2ea91cf180 2022-01-01 00:00:56 4272465718946864 \n", + "2 e627f5d9a9739833bd52d2da51761fc3 2022-01-01 00:02:32 4104216579248948 \n", + "\n", + " category amount latitude longitude city country \\\n", + "0 Health/Beauty 62.95 42.30865 -83.48216 Canton US \n", + "1 Grocery 85.45 33.52253 -117.70755 Laguna Niguel US \n", + "2 Domestic Transport 21.63 37.60876 -77.37331 Mechanicsville US \n", + "\n", + " fraud_label \n", + "0 0 \n", + "1 0 \n", + "2 0 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Read the transactions data from a CSV file\n", "trans_df = pd.read_csv(\n", @@ -129,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "4ad0edf3", "metadata": {}, "outputs": [], @@ -146,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "8efc0deb", "metadata": {}, "outputs": [], @@ -188,10 +344,105 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "6f7d5009", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tiddatetimecc_numamountcountryfraud_labelloc_delta_t_plus_1loc_delta_t_minus_1time_delta_t_minus_1
04c51b54665c7ddb466ea5936f4f3a4282022-01-01 08:11:01446736074068208977.77US00.00.0001480.333333
14c30185aea2e28e7d9797004710e13c62022-01-01 10:03:424700702588013561781.27US00.00.0000700.416667
21a109febabc5c36409f2caf729e110d32022-01-01 10:08:59420509487725610536.25US00.00.0001080.333333
\n", + "
" + ], + "text/plain": [ + " tid datetime cc_num \\\n", + "0 4c51b54665c7ddb466ea5936f4f3a428 2022-01-01 08:11:01 4467360740682089 \n", + "1 4c30185aea2e28e7d9797004710e13c6 2022-01-01 10:03:42 4700702588013561 \n", + "2 1a109febabc5c36409f2caf729e110d3 2022-01-01 10:08:59 4205094877256105 \n", + "\n", + " amount country fraud_label loc_delta_t_plus_1 loc_delta_t_minus_1 \\\n", + "0 77.77 US 0 0.0 0.000148 \n", + "1 781.27 US 0 0.0 0.000070 \n", + "2 36.25 US 0 0.0 0.000108 \n", + "\n", + " time_delta_t_minus_1 \n", + "0 0.333333 \n", + "1 0.416667 \n", + "2 0.333333 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Use the prepare_transactions_fraud function to process the trans_df DataFrame\n", "trans_df = transactions_fraud.prepare_transactions_fraud(trans_df)\n", @@ -210,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "5b97f5f2", "metadata": {}, "outputs": [], @@ -230,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "bbc8e914", "metadata": {}, "outputs": [], @@ -272,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "d2331129", "metadata": {}, "outputs": [], @@ -289,10 +540,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "51383029", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{\"expectation_type\": \"expect_column_values_to_be_null\", \"kwargs\": {\"column\": \"cc_num\", \"mostly\": 0.0}, \"meta\": {}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Check binary gender column to be in set ['M', 'F']\n", "expectation_suite_profiles.add_expectation(\n", @@ -348,10 +610,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "35f1e17e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-08-08 20:24:16,171 INFO: Initializing external client\n", + "2025-08-08 20:24:16,172 INFO: Base URL: https://10.87.45.79:28181\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-08-08 20:24:17,307 INFO: Python Engine initialized.\n", + "\n", + "Logged in to project, explore it here https://10.87.45.79:28181/p/119\n" + ] + } + ], "source": [ "import hopsworks\n", "\n", @@ -370,7 +658,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "3e926dc7", "metadata": {}, "outputs": [], @@ -399,10 +687,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "9a366430", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature Group created successfully, explore it at \n", + "https://10.87.45.79:28181/p/119/fs/67/fg/1037\n", + "2025-08-08 20:24:20,011 INFO: \t5 expectation(s) included in expectation_suite.\n", + "Validation succeeded.\n", + "Validation Report saved successfully, explore a summary at https://10.87.45.79:28181/p/119/fs/67/fg/1037\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Uploading Dataframe: 100.00% |█████████████| Rows 365112/365112 | Elapsed Time: 02:28 | Remaining Time: 00:00\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Launching job: transactions_fraud_online_fg_1_offline_fg_materialization\n", + "Job started successfully, you can follow the progress at \n", + "https://10.87.45.79:28181/p/119/jobs/named/transactions_fraud_online_fg_1_offline_fg_materialization/executions\n", + "✅ Done!\n" + ] + } + ], "source": [ "# Insert data into feature group\n", "trans_fg.insert(trans_df)\n", @@ -411,7 +728,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "3d7de1db", "metadata": {}, "outputs": [], @@ -442,10 +759,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "e8027f2d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature Group created successfully, explore it at \n", + "https://10.87.45.79:28181/p/119/fs/67/fg/2061\n", + "2025-08-08 20:27:04,696 INFO: \t2 expectation(s) included in expectation_suite.\n", + "Validation succeeded.\n", + "Validation Report saved successfully, explore a summary at https://10.87.45.79:28181/p/119/fs/67/fg/2061\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Uploading Dataframe: 100.00% |█████████████████| Rows 1000/1000 | Elapsed Time: 00:00 | Remaining Time: 00:00\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Launching job: profile_fraud_online_fg_1_offline_fg_materialization\n", + "Job started successfully, you can follow the progress at \n", + "https://10.87.45.79:28181/p/119/jobs/named/profile_fraud_online_fg_1_offline_fg_materialization/executions\n", + "✅ Done!\n" + ] + } + ], "source": [ "# Get or create the 'profile_fraud_online_fg' feature group\n", "profile_fg = fs.get_or_create_feature_group(\n", @@ -463,7 +809,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "ef348581", "metadata": {}, "outputs": [], @@ -492,7 +838,6 @@ "cell_type": "markdown", "id": "36294255", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -556,7 +901,7 @@ "hash": "e1ddeae6eefc765c17da80d38ea59b893ab18c0c0904077a035ef84cfe367f83" }, "kernelspec": { - "display_name": "Python", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -570,7 +915,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/real-time-ai-systems/fraud_online/2_fraud_online_training_pipeline.ipynb b/real-time-ai-systems/fraud_online/2_fraud_online_training_pipeline.ipynb index 805ca851..bf7c082b 100644 --- a/real-time-ai-systems/fraud_online/2_fraud_online_training_pipeline.ipynb +++ b/real-time-ai-systems/fraud_online/2_fraud_online_training_pipeline.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -73,9 +73,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-08-08 20:27:34,164 INFO: Initializing external client\n", + "2025-08-08 20:27:34,165 INFO: Base URL: https://10.87.45.79:28181\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-08-08 20:27:34,758 INFO: Python Engine initialized.\n", + "\n", + "Logged in to project, explore it here https://10.87.45.79:28181/p/119\n" + ] + } + ], "source": [ "import hopsworks\n", "\n", @@ -102,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -122,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -156,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -182,9 +208,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature view created successfully, explore it at \n", + "https://10.87.45.79:28181/p/119/fs/67/fv/transactions_fraud_online_fv/version/1\n" + ] + } + ], "source": [ "# Get or create the 'transactions_fraud_online_fv' feature view\n", "feature_view = fs.get_or_create_feature_view(\n", @@ -206,9 +241,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (35.19s) \n", + "2025-08-08 20:29:55,880 WARNING: VersionWarning: Incremented version to `1`.\n", + "\n" + ] + } + ], "source": [ "# Training/Test splits, datasets creation. Using timerange arguments.\n", "train_start = \"2022/01/01\"\n", @@ -235,7 +280,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -248,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -261,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -271,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -284,9 +329,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "fraud_label\n", + "0 0.996458\n", + "1 0.003542\n", + "Name: proportion, dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Display the normalized value counts of the training labels (y_train)\n", "y_train.value_counts(normalize=True)" @@ -317,12 +376,975 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "scrolled": true, "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              feature_weights=None, gamma=None, grow_policy=None,\n",
+       "              importance_type=None, interaction_constraints=None,\n",
+       "              learning_rate=None, max_bin=None, max_cat_threshold=None,\n",
+       "              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n",
+       "              max_leaves=None, min_child_weight=None, missing=nan,\n",
+       "              monotone_constraints=None, multi_strategy=None, n_estimators=None,\n",
+       "              n_jobs=None, num_parallel_tree=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " feature_weights=None, gamma=None, grow_policy=None,\n", + " importance_type=None, interaction_constraints=None,\n", + " learning_rate=None, max_bin=None, max_cat_threshold=None,\n", + " max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None, missing=nan,\n", + " monotone_constraints=None, multi_strategy=None, n_estimators=None,\n", + " n_jobs=None, num_parallel_tree=None, ...)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Initialize an XGBoost classifier\n", "model = xgb.XGBClassifier()\n", @@ -333,7 +1355,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -346,27 +1368,127 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, ..., 0, 0, 0])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "y_pred_test" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
amountloc_delta_t_plus_1loc_delta_t_minus_1time_delta_t_minus_1label_encoder_country_label_encoder_gender_
693641.150.5914180.2678640.0420491110
4120910.940.2724490.3121190.0123961110
30369660.350.6165260.4925780.1680211111
\n", + "
" + ], + "text/plain": [ + " amount loc_delta_t_plus_1 loc_delta_t_minus_1 time_delta_t_minus_1 \\\n", + "6936 41.15 0.591418 0.267864 0.042049 \n", + "41209 10.94 0.272449 0.312119 0.012396 \n", + "303696 60.35 0.616526 0.492578 0.168021 \n", + "\n", + " label_encoder_country_ label_encoder_gender_ \n", + "6936 111 0 \n", + "41209 111 0 \n", + "303696 111 1 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "X_test.head(3)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'f1_score': 1.0}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Compute f1 score\n", "metrics = {\n", @@ -377,9 +1499,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[40229 0]\n", + " [ 0 0]]\n" + ] + } + ], "source": [ "# Calculate the confusion matrix for the test set predictions\n", "results = confusion_matrix(\n", @@ -410,7 +1541,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -424,9 +1555,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['fraud_online_model/xgboost_fraud_online_model.pkl']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Save the trained XGBoost model\n", "joblib.dump(model, os.path.join(model_dir, \"xgboost_fraud_online_model.pkl\"))" @@ -434,7 +1576,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -469,9 +1611,97 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "da9394e111414814bf717fb19dcc5f74", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/6 [00:00