diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..1be1596
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,31 @@
+{
+ "name": "azure-openai-keyless",
+ "image": "mcr.microsoft.com/devcontainers/python:3.12-bullseye",
+ "features": {
+ "ghcr.io/azure/azure-dev/azd:latest": {}
+ },
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.python",
+ "ms-python.vscode-pylance",
+ "charliermarsh.ruff",
+ "ms-azuretools.azure-dev",
+ "ms-azuretools.vscode-bicep"
+ ],
+ "python.defaultInterpreterPath": "/usr/local/bin/python",
+ "[python]": {
+ "editor.formatOnSave": true,
+ "editor.codeActionsOnSave": {
+ "source.fixAll": "explicit"
+ },
+ "editor.defaultFormatter": "charliermarsh.ruff"
+ }
+ }
+ },
+ "postCreateCommand": "",
+ "remoteUser": "vscode",
+ "hostRequirements": {
+ "memory": "8gb"
+ }
+}
diff --git a/.env.sample b/.env.sample
new file mode 100644
index 0000000..8f97853
--- /dev/null
+++ b/.env.sample
@@ -0,0 +1,2 @@
+AZURE_OPENAI_GPT_DEPLOYMENT=
+AZURE_OPENAI_SERVICE=
diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..f9ba8cf
--- /dev/null
+++ b/.github/CODE_OF_CONDUCT.md
@@ -0,0 +1,9 @@
+# Microsoft Open Source Code of Conduct
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+
+Resources:
+
+- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
+- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
+- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..850f279
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,76 @@
+# Contributing to azure-openai-entity-extraction
+
+This project welcomes contributions and suggestions. Most contributions require you to agree to a
+Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
+the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
+
+When you submit a pull request, a CLA bot will automatically determine whether you need to provide
+a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
+provided by the bot. You will only need to do this once across all repos using our CLA.
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
+contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+
+ - [Code of Conduct](#coc)
+ - [Issues and Bugs](#issue)
+ - [Feature Requests](#feature)
+ - [Submission Guidelines](#submit)
+
+## Code of Conduct
+Help us keep this project open and inclusive. Please read and follow our [Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+
+## Found an Issue?
+If you find a bug in the source code or a mistake in the documentation, you can help us by
+[submitting an issue](#submit-issue) to the GitHub Repository. Even better, you can
+[submit a Pull Request](#submit-pr) with a fix.
+
+## Want a Feature?
+You can *request* a new feature by [submitting an issue](#submit-issue) to the GitHub
+Repository. If you would like to *implement* a new feature, please submit an issue with
+a proposal for your work first, to be sure that we can use it.
+
+* **Small Features** can be crafted and directly [submitted as a Pull Request](#submit-pr).
+
+## Submission Guidelines
+
+### Submitting an Issue
+Before you submit an issue, search the archive, maybe your question was already answered.
+
+If your issue appears to be a bug, and hasn't been reported, open a new issue.
+Help us to maximize the effort we can spend fixing issues and adding new
+features, by not reporting duplicate issues. Providing the following information will increase the
+chances of your issue being dealt with quickly:
+
+* **Overview of the Issue** - if an error is being thrown a non-minified stack trace helps
+* **Version** - what version is affected (e.g. 0.1.2)
+* **Motivation for or Use Case** - explain what are you trying to do and why the current behavior is a bug for you
+* **Browsers and Operating System** - is this a problem with all browsers?
+* **Reproduce the Error** - provide a live example or a unambiguous set of steps
+* **Related Issues** - has a similar issue been reported before?
+* **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be
+ causing the problem (line of code or commit)
+
+You can file new issues by providing the above information at the corresponding repository's issues link: https://github.com/Azure-samples/azure-openai-entity-extraction/issues/new].
+
+### Submitting a Pull Request (PR)
+Before you submit your Pull Request (PR) consider the following guidelines:
+
+* Search the repository (https://github.com/Azure-samples/azure-openai-entity-extraction/pulls) for an open or closed PR
+ that relates to your submission. You don't want to duplicate effort.
+
+* Make your changes in a new git fork:
+
+* Commit your changes using a descriptive commit message
+* Push your fork to GitHub:
+* In GitHub, create a pull request
+* If we suggest changes then:
+ * Make the required updates.
+ * Rebase your fork and force push to your GitHub repository (this will update your Pull Request):
+
+ ```shell
+ git rebase master -i
+ git push -f
+ ```
+
+That's it! Thank you for your contribution!
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000..15c7f60
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,33 @@
+
+> Please provide us with the following information:
+> ---------------------------------------------------------------
+
+### This issue is for a: (mark with an `x`)
+```
+- [ ] bug report -> please search issues before submitting
+- [ ] feature request
+- [ ] documentation issue or request
+- [ ] regression (a behavior that used to work and stopped in a new release)
+```
+
+### Minimal steps to reproduce
+>
+
+### Any log messages given by the failure
+>
+
+### Expected/desired behavior
+>
+
+### OS and Version?
+> Windows 7, 8 or 10. Linux (which distribution). macOS (Yosemite? El Capitan? Sierra?)
+
+### Versions
+>
+
+### Mention any other details that might be useful
+
+> ---------------------------------------------------------------
+> Thanks! We'll be in touch soon.
diff --git a/.github/SECURITY.md b/.github/SECURITY.md
new file mode 100644
index 0000000..0b01b69
--- /dev/null
+++ b/.github/SECURITY.md
@@ -0,0 +1,37 @@
+# Security
+
+Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
+
+If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](), please report it to us as described below.
+
+## Reporting Security Issues
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
+
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/msrc/pgp-key-msrc).
+
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
+
+Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
+
+- Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
+- Full paths of source file(s) related to the manifestation of the issue
+- The location of the affected source code (tag/branch/commit or direct URL)
+- Any special configuration required to reproduce the issue
+- Step-by-step instructions to reproduce the issue
+- Proof-of-concept or exploit code (if possible)
+- Impact of the issue, including how an attacker might exploit the issue
+
+This information will help us triage your report more quickly.
+
+If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
+
+## Preferred Languages
+
+We prefer all communications to be in English.
+
+## Policy
+
+Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/msrc/cvd).
diff --git a/.github/workflows/azure-dev.yaml b/.github/workflows/azure-dev.yaml
new file mode 100644
index 0000000..1fcf437
--- /dev/null
+++ b/.github/workflows/azure-dev.yaml
@@ -0,0 +1,56 @@
+name: Provision with azd
+
+on:
+ workflow_dispatch:
+ push:
+ # Run when commits are pushed to mainline branch (main or master)
+ # Set this to the mainline branch you are using
+ branches:
+ - main
+
+# GitHub Actions workflow to deploy to Azure using azd
+# To configure required secrets for connecting to Azure, simply run `azd pipeline config`
+
+# Set up permissions for deploying with secretless Azure federated credentials
+# https://learn.microsoft.com/en-us/azure/developer/github/connect-from-azure?tabs=azure-portal%2Clinux#set-up-azure-login-with-openid-connect-authentication
+permissions:
+ id-token: write
+ contents: read
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ env:
+ # azd required
+ AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }}
+ AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }}
+ AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }}
+ AZURE_ENV_NAME: ${{ vars.AZURE_ENV_NAME }}
+ AZURE_LOCATION: ${{ vars.AZURE_LOCATION }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Install azd
+ uses: Azure/setup-azd@v1.0.0
+
+ - name: Install Nodejs
+ uses: actions/setup-node@v4
+ with:
+ node-version: 18
+
+ - name: Log in with Azure (Federated Credentials)
+ if: ${{ env.AZURE_CLIENT_ID != '' }}
+ run: |
+ azd auth login `
+ --client-id "$Env:AZURE_CLIENT_ID" `
+ --federated-credential-provider "github" `
+ --tenant-id "$Env:AZURE_TENANT_ID"
+ shell: pwsh
+
+ - name: Provision Infrastructure
+ run: azd provision --no-prompt
+ env:
+ AZD_INITIAL_ENVIRONMENT_CONFIG: ${{ secrets.AZD_INITIAL_ENVIRONMENT_CONFIG }}
+ AZURE_SERVER_APP_SECRET: ${{ secrets.AZURE_SERVER_APP_SECRET }}
+ AZURE_CLIENT_APP_SECRET: ${{ secrets.AZURE_CLIENT_APP_SECRET }}
diff --git a/.github/workflows/template-validation.yaml b/.github/workflows/template-validation.yaml
new file mode 100644
index 0000000..0d72b33
--- /dev/null
+++ b/.github/workflows/template-validation.yaml
@@ -0,0 +1,28 @@
+# This is for internal use to make sure our samples follow best practices. You can delete this in your fork.
+name: Template validation sample workflow
+on:
+ workflow_dispatch:
+
+permissions:
+ contents: read
+ id-token: write
+ pull-requests: write
+
+jobs:
+ template_validation_job:
+ runs-on: ubuntu-latest
+ name: template validation
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: microsoft/template-validation-action@v0.2.2
+ env:
+ AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }}
+ AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }}
+ AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }}
+ AZURE_ENV_NAME: ${{ vars.AZURE_ENV_NAME }}
+ AZURE_LOCATION: ${{ vars.AZURE_LOCATION }}
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: print result
+ run: cat ${{ steps.validation.outputs.resultFile }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4546243
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,149 @@
+# Azure az webapp deployment details
+.azure
+*_env
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# NPM
+npm-debug.log*
+node_modules
+static/
+
+.DS_Store
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..46e7b88
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,16 @@
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.5.0
+ hooks:
+ - id: check-yaml
+ - id: end-of-file-fixer
+ exclude: ^tests/snapshots
+ - id: trailing-whitespace
+- repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.1.0
+ hooks:
+ # Run the linter.
+ - id: ruff
+ args: [ --fix ]
+ # Run the formatter.
+ - id: ruff-format
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..493936e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Azure Samples
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8001252
--- /dev/null
+++ b/README.md
@@ -0,0 +1,156 @@
+
+# Entity extraction with Azure OpenAI structured outputs (Python)
+
+[![Open in GitHub Codespaces](https://img.shields.io/static/v1?style=for-the-badge&label=GitHub+Codespaces&message=Open&color=brightgreen&logo=github)](https://github.com/codespaces/new?hide_repo_select=true&ref=main&skip_quickstart=true&machine=basicLinux32gb&repo=784926917&devcontainer_path=.devcontainer%2Fdevcontainer.json&geo=WestUs2)
+[![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/Azure-Samples/azure-openai-entity-extraction)
+
+This repository includes both the infrastructure and Python files needed so that you can create an Azure OpenAI gpt-4o model deployment and then perform entity extraction using the [structured outputs mode](https://learn.microsoft.com/azure/ai-services/openai/how-to/structured-outputs?tabs=python-secure) and the Python openai SDK. Example scripts are provided for extracting details from images, PDFs, webpages, and GitHub issues.
+
+* [Features](#features)
+* [Getting started](#getting-started)
+ * [GitHub Codespaces](#github-codespaces)
+ * [VS Code Dev Containers](#vs-code-dev-containers)
+ * [Local environment](#local-environment)
+* [Deployment](#deployment)
+* [Running the Python example](#running-the-python-example)
+* [Guidance](#guidance)
+ * [Costs](#costs)
+ * [Security guidelines](#security-guidelines)
+* [Resources](#resources)
+
+## Features
+
+* Provisions an Azure OpenAI account with keyless authentication enabled
+* Grants the "Cognitive Services OpenAI User" RBAC role to your user account
+* Deploys a gpt-4o model, version 2024-08-06 (the [only version supported for structured outputs](https://learn.microsoft.com/azure/ai-services/openai/how-to/structured-outputs?tabs=python-secure#supported-models)
+* Example scripts use the [openai Python package](https://pypi.org/project/openai/) and [Pydantic models](https://docs.pydantic.dev/) to make requests for structured outputs
+
+### Architecture diagram
+
+![Architecture diagram: Microsoft Entra managed identity connecting to Azure AI services](./diagram.png)
+
+## Getting started
+
+You have a few options for getting started with this template.
+The quickest way to get started is GitHub Codespaces, since it will setup all the tools for you, but you can also [set it up locally](#local-environment).
+
+### GitHub Codespaces
+
+You can run this template virtually by using GitHub Codespaces. The button will open a web-based VS Code instance in your browser:
+
+1. Open the template (this may take several minutes):
+
+ [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/Azure-Samples/azure-openai-entity-extraction)
+
+2. Open a terminal window
+3. Continue with the [deployment steps](#deployment)
+
+### VS Code Dev Containers
+
+A related option is VS Code Dev Containers, which will open the project in your local VS Code using the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers):
+
+1. Start Docker Desktop (install it if not already installed)
+2. Open the project:
+
+ [![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/azure-samples/azure-openai-entity-extraction)
+
+3. In the VS Code window that opens, once the project files show up (this may take several minutes), open a terminal window.
+4. Continue with the [deployment steps](#deployment)
+
+### Local environment
+
+1. Make sure the following tools are installed:
+
+ * [Azure Developer CLI (azd)](https://aka.ms/install-azd)
+ * [Python 3.9+](https://www.python.org/downloads/)
+
+2. Make a new directory called `azure-openai-entity-extraction` and clone this template into it using the `azd` CLI:
+
+ ```shell
+ azd init -t azure-openai-entity-extraction
+ ```
+
+ You can also use git to clone the repository if you prefer.
+
+3. Continue with the [deployment steps](#deployment)
+
+## Deployment
+
+1. Login to Azure:
+
+ ```shell
+ azd auth login
+ ```
+
+ For GitHub Codespaces users, if the previous command fails, try:
+
+ ```shell
+ azd auth login --use-device-code
+ ```
+
+2. Provision the OpenAI account:
+
+ ```shell
+ azd provision
+ ```
+
+ It will prompt you to provide an `azd` environment name (like "entityext"), select a subscription from your Azure account, and select a [location where the OpenAI model is available](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#standard-deployment-model-availability) (like "canadaeast"). Then it will provision the resources in your account and deploy the latest code.
+
+ ⚠️ If you get an error or timeout with deployment, changing the location can help, as there may be availability constraints for the OpenAI resource. To change the location run:
+
+ ```shell
+ azd env set AZURE_LOCATION "yournewlocationname"
+ ```
+
+3. When `azd` has finished, you should have an OpenAI account you can use locally when logged into your Azure account, and a `.env` file should now exist with your Azure OpenAI configuration.
+
+4. Then you can proceed to [run the Python examples](#running-the-python-examples).
+
+## Running the Python example
+
+1. If you're not already running in a Codespace or Dev Container, create a Python virtual environment.
+
+2. Install the requirements:
+
+ ```shell
+ python -m pip install -r requirements.txt
+ ```
+
+3. Run an example by running either `python example_file.py` or selecting the `Run` button on the opened file. Available examples:
+
+ * `extract_github_issue.py`: Fetches a public issue using the GitHub API, and then extracts details.
+ * `extract_github_repo.py`: Fetches a public README using the GitHub API, and then extracts details.
+ * `extract_image_graph.py`: Parses a local image of a graph and extracts details like title, axis, legend.
+ * `extract_image_table.py`: Parses a local image with tables and extracts nested tabular data.
+ * `extract_pdf_receipt.py`: Parses a local PDF using `pymupdf`, which converts it to Markdown, and extracts order details.
+ * `extract_webpage.py`: Parses a blog post using `BeautifulSoup`, and extracts title, description, and tags.
+
+## Guidance
+
+### Costs
+
+This template creates only the Azure OpenAI resource, which is free to provision. However, you will be charged for the usage of the Azure OpenAI chat completions API. The pricing is based on the number of tokens used, with around 1-3 tokens used per word. You can find the pricing details for the OpenAI API on the [Azure Cognitive Services pricing page](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/).
+
+### Security guidelines
+
+This template uses [keyless authentication](https://learn.microsoft.com/azure/developer/ai/keyless-connections) for authenticating to the Azure OpenAI resource. This is a secure way to authenticate to Azure resources without needing to store credentials in your code. Your Azure user account is assigned the "Cognitive Services OpenAI User" role, which allows you to access the OpenAI resource. You can find more information about the permissions of this role in the [Azure OpenAI documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/role-based-access-control).
+
+For further security, you could also deploy the Azure OpenAI inside a private virtual network (VNet) and use a private endpoint to access it. This would prevent the OpenAI resource from being accessed from the public internet.
+
+## Resources
+
+* [How to use structured outputs](https://learn.microsoft.com/azure/ai-services/openai/how-to/structured-outputs?tabs=python-secure#supported-models)
diff --git a/azure.yaml b/azure.yaml
new file mode 100644
index 0000000..29c2044
--- /dev/null
+++ b/azure.yaml
@@ -0,0 +1,17 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json
+
+name: azure-openai-entity-extraction
+metadata:
+ template: azure-openai-entity-extraction@0.0.4
+hooks:
+ postprovision:
+ windows:
+ shell: pwsh
+ run: ./infra/write_dot_env.ps1
+ interactive: false
+ continueOnError: false
+ posix:
+ shell: sh
+ run: ./infra/write_dot_env.sh
+ interactive: false
+ continueOnError: false
diff --git a/example_graph_treecover.png b/example_graph_treecover.png
new file mode 100644
index 0000000..ab2347b
Binary files /dev/null and b/example_graph_treecover.png differ
diff --git a/example_receipt.pdf b/example_receipt.pdf
new file mode 100644
index 0000000..87818cb
Binary files /dev/null and b/example_receipt.pdf differ
diff --git a/example_table_plants.png b/example_table_plants.png
new file mode 100644
index 0000000..768a089
Binary files /dev/null and b/example_table_plants.png differ
diff --git a/extract_github_issue.py b/extract_github_issue.py
new file mode 100644
index 0000000..32d8df0
--- /dev/null
+++ b/extract_github_issue.py
@@ -0,0 +1,70 @@
+import logging
+import os
+from enum import Enum
+
+import azure.identity
+import openai
+import requests
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from rich import print
+
+logging.basicConfig(level=logging.WARNING)
+load_dotenv()
+
+# Configure Azure OpenAI
+if not os.getenv("AZURE_OPENAI_SERVICE") or not os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"):
+ logging.warning("AZURE_OPENAI_SERVICE and AZURE_OPENAI_GPT_DEPLOYMENT environment variables are empty. See README.")
+ exit(1)
+credential = azure.identity.DefaultAzureCredential()
+token_provider = azure.identity.get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+client = openai.AzureOpenAI(
+ api_version="2024-08-01-preview",
+ azure_endpoint=f"https://{os.getenv('AZURE_OPENAI_SERVICE')}.openai.azure.com",
+ azure_ad_token_provider=token_provider,
+)
+model_name = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT")
+
+
+# Define models for Structured Outputs
+class Technology(str, Enum):
+ JAVASCRIPT = "JavaScript"
+ PYTHON = "Python"
+ DOTNET = ".NET"
+ AISTUDIO = "AI Studio"
+ AISEARCH = "AI Search"
+ POSTGRESQL = "PostgreSQL"
+ COSMOSDB = "CosmosDB"
+ AZURESQL = "Azure SQL"
+
+
+class HackSubmission(BaseModel):
+ name: str
+ description: str = Field(..., description="A 1-2 sentence description of the project")
+ technologies: list[Technology]
+ repository_url: str
+ video_url: str
+ team_members: list[str]
+
+
+# Fetch an issue from a public GitHub repository
+url = "https://api.github.com/repos/microsoft/RAG_Hack/issues/159"
+response = requests.get(url)
+if response.status_code != 200:
+ logging.error(f"Failed to fetch issue: {response.status_code}")
+ exit(1)
+issue_body = response.json()["body"]
+
+# Send request to GPT model to extract using Structured Outputs
+completion = client.beta.chat.completions.parse(
+ model=model_name,
+ messages=[
+ {"role": "system", "content": "Extract the info from the GitHub issue markdown about this hack submission."},
+ {"role": "user", "content": issue_body},
+ ],
+ response_format=HackSubmission,
+)
+
+output = completion.choices[0].message.parsed
+hack_submission = HackSubmission.model_validate(output)
+print(hack_submission)
diff --git a/extract_github_repo.py b/extract_github_repo.py
new file mode 100644
index 0000000..1144595
--- /dev/null
+++ b/extract_github_repo.py
@@ -0,0 +1,85 @@
+import base64
+import logging
+import os
+from enum import Enum
+
+import azure.identity
+import openai
+import requests
+import rich
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+
+logging.basicConfig(level=logging.WARNING)
+load_dotenv()
+
+# Configure Azure OpenAI
+if not os.getenv("AZURE_OPENAI_SERVICE") or not os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"):
+ logging.warning("AZURE_OPENAI_SERVICE and AZURE_OPENAI_GPT_DEPLOYMENT environment variables are empty. See README.")
+ exit(1)
+credential = azure.identity.DefaultAzureCredential()
+token_provider = azure.identity.get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+client = openai.AzureOpenAI(
+ api_version="2024-08-01-preview",
+ azure_endpoint=f"https://{os.getenv('AZURE_OPENAI_SERVICE')}.openai.azure.com",
+ azure_ad_token_provider=token_provider,
+)
+
+
+# Define models for Structured Outputs
+class Language(str, Enum):
+ JAVASCRIPT = "JavaScript"
+ PYTHON = "Python"
+ DOTNET = ".NET"
+
+
+class AzureService(str, Enum):
+ AISTUDIO = "AI Studio"
+ AISEARCH = "AI Search"
+ POSTGRESQL = "PostgreSQL"
+ COSMOSDB = "CosmosDB"
+ AZURESQL = "Azure SQL"
+
+
+class Framework(str, Enum):
+ LANGCHAIN = "Langchain"
+ SEMANTICKERNEL = "Semantic Kernel"
+ LLAMAINDEX = "Llamaindex"
+ AUTOGEN = "Autogen"
+ SPRINGBOOT = "Spring Boot"
+ PROMPTY = "Prompty"
+
+
+class RepoOverview(BaseModel):
+ name: str
+ description: str = Field(..., description="A 1-2 sentence description of the project")
+ languages: list[Language]
+ azure_services: list[AzureService]
+ frameworks: list[Framework]
+
+
+# Fetch a README from a public GitHub repository
+url = "https://api.github.com/repos/shank250/CareerCanvas-msft-raghack/contents/README.md"
+response = requests.get(url)
+if response.status_code != 200:
+ logging.error(f"Failed to fetch issue: {response.status_code}")
+ exit(1)
+content = response.json()
+readme_content = base64.b64decode(content["content"]).decode("utf-8")
+
+# Send request to GPT model to extract using Structured Outputs
+completion = client.beta.chat.completions.parse(
+ model=os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"),
+ messages=[
+ {
+ "role": "system",
+ "content": "Extract the information from the GitHub issue markdown about this hack submission.",
+ },
+ {"role": "user", "content": readme_content},
+ ],
+ response_format=RepoOverview,
+)
+
+output = completion.choices[0].message.parsed
+repo_overview = RepoOverview.model_validate(output)
+rich.print(repo_overview)
diff --git a/extract_image_graph.py b/extract_image_graph.py
new file mode 100644
index 0000000..4d1fcc3
--- /dev/null
+++ b/extract_image_graph.py
@@ -0,0 +1,64 @@
+import base64
+import logging
+import os
+
+import azure.identity
+import openai
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from rich import print
+
+logging.basicConfig(level=logging.WARNING)
+load_dotenv()
+
+# Configure Azure OpenAI
+if not os.getenv("AZURE_OPENAI_SERVICE") or not os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"):
+ logging.warning("AZURE_OPENAI_SERVICE and AZURE_OPENAI_GPT_DEPLOYMENT environment variables are empty. See README.")
+ exit(1)
+credential = azure.identity.DefaultAzureCredential()
+token_provider = azure.identity.get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+client = openai.AzureOpenAI(
+ api_version="2024-08-01-preview",
+ azure_endpoint=f"https://{os.getenv('AZURE_OPENAI_SERVICE')}.openai.azure.com",
+ azure_ad_token_provider=token_provider,
+)
+model_name = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT")
+
+
+# Define models for Structured Outputs
+class Graph(BaseModel):
+ title: str
+ description: str = Field(..., description="1 sentence description of the graph")
+ x_axis: str
+ y_axis: str
+ legend: list[str]
+
+
+# Prepare local image as base64 URI
+def open_image_as_base64(filename):
+ with open(filename, "rb") as image_file:
+ image_data = image_file.read()
+ image_base64 = base64.b64encode(image_data).decode("utf-8")
+ return f"data:image/png;base64,{image_base64}"
+
+
+image_url = open_image_as_base64("example_graph_treecover.png")
+
+# Send request to GPT model to extract using Structured Outputs
+completion = client.beta.chat.completions.parse(
+ model=os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"),
+ messages=[
+ {"role": "system", "content": "Extract the information from the graph"},
+ {
+ "role": "user",
+ "content": [
+ {"image_url": {"url": image_url}, "type": "image_url"},
+ ],
+ },
+ ],
+ response_format=Graph,
+)
+
+output = completion.choices[0].message.parsed
+graph = Graph.model_validate(output)
+print(graph)
diff --git a/extract_image_table.py b/extract_image_table.py
new file mode 100644
index 0000000..6e88281
--- /dev/null
+++ b/extract_image_table.py
@@ -0,0 +1,71 @@
+import base64
+import logging
+import os
+
+import azure.identity
+import openai
+from dotenv import load_dotenv
+from pydantic import BaseModel
+from rich import print
+
+logging.basicConfig(level=logging.WARNING)
+load_dotenv()
+
+# Configure Azure OpenAI
+if not os.getenv("AZURE_OPENAI_SERVICE") or not os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"):
+ logging.warning("AZURE_OPENAI_SERVICE and AZURE_OPENAI_GPT_DEPLOYMENT environment variables are empty. See README.")
+ exit(1)
+credential = azure.identity.DefaultAzureCredential()
+token_provider = azure.identity.get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+client = openai.AzureOpenAI(
+ api_version="2024-08-01-preview",
+ azure_endpoint=f"https://{os.getenv('AZURE_OPENAI_SERVICE')}.openai.azure.com",
+ azure_ad_token_provider=token_provider,
+)
+
+
+# Define models for Structured Outputs
+class Plant(BaseModel):
+ species: str
+ common_name: str
+ quantity: int
+ size: str
+ price: float
+ county: str
+ notes: str
+
+
+class PlantInventory(BaseModel):
+ annuals: list[Plant]
+ bulbs: list[Plant]
+ grasses: list[Plant]
+
+
+# Prepare local image as base64 URI
+def open_image_as_base64(filename):
+ with open(filename, "rb") as image_file:
+ image_data = image_file.read()
+ image_base64 = base64.b64encode(image_data).decode("utf-8")
+ return f"data:image/png;base64,{image_base64}"
+
+
+image_url = open_image_as_base64("example_table_plants.png")
+
+# Send request to GPT model to extract using Structured Outputs
+completion = client.beta.chat.completions.parse(
+ model=os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"),
+ messages=[
+ {"role": "system", "content": "Extract the information from the table"},
+ {
+ "role": "user",
+ "content": [
+ {"image_url": {"url": image_url}, "type": "image_url"},
+ ],
+ },
+ ],
+ response_format=PlantInventory,
+)
+
+output = completion.choices[0].message.parsed
+plant_inventory = PlantInventory.model_validate(output)
+print(plant_inventory)
diff --git a/extract_pdf_receipt.py b/extract_pdf_receipt.py
new file mode 100644
index 0000000..a2b6d42
--- /dev/null
+++ b/extract_pdf_receipt.py
@@ -0,0 +1,58 @@
+import logging
+import os
+
+import azure.identity
+import openai
+import pymupdf4llm
+from dotenv import load_dotenv
+from pydantic import BaseModel
+from rich import print
+
+logging.basicConfig(level=logging.WARNING)
+load_dotenv()
+
+# Configure Azure OpenAI
+if not os.getenv("AZURE_OPENAI_SERVICE") or not os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"):
+ logging.warning("AZURE_OPENAI_SERVICE and AZURE_OPENAI_GPT_DEPLOYMENT environment variables are empty. See README.")
+ exit(1)
+credential = azure.identity.DefaultAzureCredential()
+token_provider = azure.identity.get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+client = openai.AzureOpenAI(
+ api_version="2024-08-01-preview",
+ azure_endpoint=f"https://{os.getenv('AZURE_OPENAI_SERVICE')}.openai.azure.com",
+ azure_ad_token_provider=token_provider,
+)
+model_name = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT")
+
+
+# Define models for Structured Outputs
+class Item(BaseModel):
+ product: str
+ price: float
+ quantity: int
+
+
+class Receipt(BaseModel):
+ total: float
+ shipping: float
+ payment_method: str
+ items: list[Item]
+ order_number: int
+
+
+# Prepare PDF as markdown text
+md_text = pymupdf4llm.to_markdown("example_receipt.pdf")
+
+# Send request to GPT model to extract using Structured Outputs
+completion = client.beta.chat.completions.parse(
+ model=os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"),
+ messages=[
+ {"role": "system", "content": "Extract the information from the blog post"},
+ {"role": "user", "content": md_text},
+ ],
+ response_format=Receipt,
+)
+
+output = completion.choices[0].message.parsed
+receipt = Receipt.model_validate(output)
+print(receipt)
diff --git a/extract_webpage.py b/extract_webpage.py
new file mode 100644
index 0000000..00ca9f0
--- /dev/null
+++ b/extract_webpage.py
@@ -0,0 +1,59 @@
+import logging
+import os
+
+import azure.identity
+import openai
+import requests
+from bs4 import BeautifulSoup
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from rich import print
+
+logging.basicConfig(level=logging.WARNING)
+load_dotenv()
+
+# Configure Azure OpenAI
+if not os.getenv("AZURE_OPENAI_SERVICE") or not os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"):
+ logging.warning("AZURE_OPENAI_SERVICE and AZURE_OPENAI_GPT_DEPLOYMENT environment variables are empty. See README.")
+ exit(1)
+credential = azure.identity.DefaultAzureCredential()
+token_provider = azure.identity.get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+client = openai.AzureOpenAI(
+ api_version="2024-08-01-preview",
+ azure_endpoint=f"https://{os.getenv('AZURE_OPENAI_SERVICE')}.openai.azure.com",
+ azure_ad_token_provider=token_provider,
+)
+model_name = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT")
+
+
+# Define models for Structured Outputs
+class BlogPost(BaseModel):
+ title: str
+ summary: str = Field(..., description="A 1-2 sentence summary of the blog post")
+ tags: list[str] = Field(..., description="A list of tags for the blog post, like 'python' or 'openai'")
+
+
+# Fetch blog post and extract title/content
+url = "https://blog.pamelafox.org/2024/09/integrating-vision-into-rag-applications.html"
+response = requests.get(url)
+if response.status_code != 200:
+ print(f"Failed to fetch the page: {response.status_code}")
+ exit(1)
+soup = BeautifulSoup(response.content, "html.parser")
+post_title = soup.find("h3", class_="post-title")
+post_contents = soup.find("div", class_="post-body").get_text(strip=True)
+
+
+# Send request to GPT model to extract using Structured Outputs
+completion = client.beta.chat.completions.parse(
+ model=os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"),
+ messages=[
+ {"role": "system", "content": "Extract the information from the blog post"},
+ {"role": "user", "content": f"{post_title}\n{post_contents}"},
+ ],
+ response_format=BlogPost,
+)
+
+output = completion.choices[0].message.parsed
+blog_post = BlogPost.model_validate(output)
+print(blog_post)
diff --git a/infra/main.bicep b/infra/main.bicep
new file mode 100644
index 0000000..4d43acc
--- /dev/null
+++ b/infra/main.bicep
@@ -0,0 +1,109 @@
+targetScope = 'subscription'
+
+@minLength(1)
+@maxLength(64)
+@description('Name of the the environment which is used to generate a short unique hash used in all resources.')
+param environmentName string
+
+@minLength(1)
+@description('Location for the OpenAI resource')
+// https://learn.microsoft.com/azure/ai-services/openai/concepts/models?tabs=python-secure%2Cglobal-standard%2Cstandard-chat-completions#models-by-deployment-type
+@allowed([
+ 'eastus'
+ 'eastus2'
+ 'northcentralus'
+ 'southcentralus'
+ 'spaincentral'
+ 'swedencentral'
+ 'westus'
+ 'westus3'
+])
+@metadata({
+ azd: {
+ type: 'location'
+ }
+})
+param location string
+
+@description('Name of the GPT model to deploy')
+param gptModelName string = 'gpt-4o'
+
+@description('Version of the GPT model to deploy')
+// See version availability in this table:
+// https://learn.microsoft.com/azure/ai-services/openai/concepts/models?tabs=python-secure%2Cglobal-standard%2Cstandard-chat-completions#models-by-deployment-type
+param gptModelVersion string = '2024-08-06'
+
+@description('Name of the model deployment (can be different from the model name)')
+param gptDeploymentName string = 'gpt-4o'
+
+@description('Capacity of the GPT deployment')
+// You can increase this, but capacity is limited per model/region, so you will get errors if you go over
+// https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits
+param gptDeploymentCapacity int = 30
+
+@description('Id of the user or app to assign application roles')
+param principalId string = ''
+
+@description('Non-empty if the deployment is running on GitHub Actions')
+param runningOnGitHub string = ''
+
+var principalType = empty(runningOnGitHub) ? 'User' : 'ServicePrincipal'
+
+var resourceToken = toLower(uniqueString(subscription().id, environmentName, location))
+var prefix = '${environmentName}${resourceToken}'
+var tags = { 'azd-env-name': environmentName }
+
+// Organize resources in a resource group
+resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
+ name: '${prefix}-rg'
+ location: location
+ tags: tags
+}
+
+var openAiServiceName = '${prefix}-openai'
+module openAi 'br/public:avm/res/cognitive-services/account:0.7.1' = {
+ name: 'openai'
+ scope: resourceGroup
+ params: {
+ name: openAiServiceName
+ location: location
+ tags: tags
+ kind: 'OpenAI'
+ sku: 'S0'
+ customSubDomainName: openAiServiceName
+ networkAcls: {
+ defaultAction: 'Allow'
+ bypass: 'AzureServices'
+ }
+ deployments: [
+ {
+ name: gptDeploymentName
+ model: {
+ format: 'OpenAI'
+ name: gptModelName
+ version: gptModelVersion
+ }
+ sku: {
+ name: 'GlobalStandard'
+ capacity: gptDeploymentCapacity
+ }
+ }
+ ]
+ roleAssignments: [
+ {
+ principalId: principalId
+ roleDefinitionIdOrName: 'Cognitive Services OpenAI User'
+ principalType: principalType
+ }
+ ]
+ }
+}
+
+output AZURE_LOCATION string = location
+output AZURE_TENANT_ID string = tenant().tenantId
+output AZURE_RESOURCE_GROUP string = resourceGroup.name
+
+// Specific to Azure OpenAI
+output AZURE_OPENAI_SERVICE string = openAi.outputs.name
+output AZURE_OPENAI_GPT_MODEL string = gptModelName
+output AZURE_OPENAI_GPT_DEPLOYMENT string = gptDeploymentName
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
new file mode 100644
index 0000000..e64bb88
--- /dev/null
+++ b/infra/main.parameters.json
@@ -0,0 +1,18 @@
+{
+ "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
+ "contentVersion": "1.0.0.0",
+ "parameters": {
+ "environmentName": {
+ "value": "${AZURE_ENV_NAME}"
+ },
+ "location": {
+ "value": "${AZURE_LOCATION}"
+ },
+ "principalId": {
+ "value": "${AZURE_PRINCIPAL_ID}"
+ },
+ "runningOnGitHub": {
+ "value": "${GITHUB_ACTIONS}"
+ }
+ }
+}
diff --git a/infra/write_dot_env.ps1 b/infra/write_dot_env.ps1
new file mode 100644
index 0000000..0e8bb46
--- /dev/null
+++ b/infra/write_dot_env.ps1
@@ -0,0 +1,9 @@
+# Clear the contents of the .env file
+Set-Content -Path .env -Value ""
+
+# Append new values to the .env file
+$azureOpenAiDeployment = azd env get-value AZURE_OPENAI_GPT_DEPLOYMENT
+$azureOpenAiService = azd env get-value AZURE_OPENAI_SERVICE
+
+Add-Content -Path .env -Value "AZURE_OPENAI_GPT_DEPLOYMENT=$azureOpenAiDeployment"
+Add-Content -Path .env -Value "AZURE_OPENAI_SERVICE=$azureOpenAiService"
diff --git a/infra/write_dot_env.sh b/infra/write_dot_env.sh
new file mode 100755
index 0000000..f6edd5d
--- /dev/null
+++ b/infra/write_dot_env.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Clear the contents of the .env file
+> .env
+
+# Append new values to the .env file
+echo "AZURE_OPENAI_GPT_DEPLOYMENT=$(azd env get-value AZURE_OPENAI_GPT_DEPLOYMENT)" >> .env
+echo "AZURE_OPENAI_SERVICE=$(azd env get-value AZURE_OPENAI_SERVICE)" >> .env
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..d33ce4e
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,6 @@
+[tool.ruff]
+line-length = 120
+target-version = "py312"
+lint.select = ["E", "F", "I", "UP"]
+lint.ignore = ["D203"]
+lint.isort.known-first-party = ["fastapi_app"]
diff --git a/readme_diagram.png b/readme_diagram.png
new file mode 100644
index 0000000..16f05ba
Binary files /dev/null and b/readme_diagram.png differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..2ac9350
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+azure-identity
+openai
+python-dotenv
+pydantic
+rich
+beautifulsoup4
+pymupdf4llm
+azure-ai-inference