diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..4d75d5f2720 --- /dev/null +++ b/.gitignore @@ -0,0 +1,233 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +build/ +bld/ +[Bb]in/ +[Oo]bj/ +!/build/ + +# Visual Studo 2015 cache/options directory +.vs/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile + +# Visual Studio profiler +*.psess +*.vsp +*.vspx + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding addin-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +*.ncrunch* + + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config + +# Windows Azure Build Output +csx/ +*.build.csdef + +# Windows Store app package directory +AppPackages/ + +# Others +*.[Cc]ache +ClientBin/ +[Ss]tyle[Cc]op.* +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.pfx +*.publishsettings +node_modules/ +bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# User-specific configuration that should not be place under source control +*.azurePubXml + +*-local.config +*.ConsoleHost.config +settings.config + +#Java + +*.class + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.ear + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* + +# Azure diagnostics +*.wadcfgx + +# docs +log/ +obj/ +_site/ +.optemp/ +_themes*/ + +.openpublishing.buildcore.ps1 \ No newline at end of file diff --git a/.local-web-server.json b/.local-web-server.json new file mode 100644 index 00000000000..10bb09416bf --- /dev/null +++ b/.local-web-server.json @@ -0,0 +1,15 @@ +{ + "port": 8000, + "directory": "_site", + "forbid": [], + "rewrite": [ + { + "from": "/azure/architecture/*", + "to": "/$1" + }, + { + "from": "/azure/architecture", + "to": "/azure/architecture/" + } + ] +} \ No newline at end of file diff --git a/.openpublishing.build.ps1 b/.openpublishing.build.ps1 new file mode 100644 index 00000000000..aadef762022 --- /dev/null +++ b/.openpublishing.build.ps1 @@ -0,0 +1,17 @@ +param( + [string]$buildCorePowershellUrl = "https://opbuildstorageprod.blob.core.windows.net/opps1container/.openpublishing.buildcore.ps1", + [string]$parameters +) +# Main +$errorActionPreference = 'Stop' + +# Step-1: Download buildcore script to local +echo "download build core script to local with source url: $buildCorePowershellUrl" +$repositoryRoot = Split-Path -Parent $MyInvocation.MyCommand.Definition +$buildCorePowershellDestination = "$repositoryRoot\.openpublishing.buildcore.ps1" +Invoke-WebRequest $buildCorePowershellUrl -OutFile "$buildCorePowershellDestination" + +# Step-2: Run build core +echo "run build core script with parameters: $parameters" +& "$buildCorePowershellDestination" "$parameters" +exit $LASTEXITCODE diff --git a/.openpublishing.publish.config.json b/.openpublishing.publish.config.json new file mode 100644 index 00000000000..f2b2ea29166 --- /dev/null +++ b/.openpublishing.publish.config.json @@ -0,0 +1,47 @@ +{ + "need_generate_pdf_url_template": false, + "need_generate_intellisense": false, + "branch_target_mapping": { + "docs": ["Publish", "PDF"] + }, + "docsets_to_publish": [ + { + "docset_name": "azure-architecture-center", + "build_source_folder": "docs", + "build_output_subfolder": "azure-architecture-center", + "locale": "en-us", + "version": 0, + "open_to_public_contributors": false, + "type_mapping": { + "Conceptual": "Content", + "ManagedReference": "Content", + "RestApi": "Content" + }, + "build_entry_point": "docs", + "template_folder": "_themes" + } + ], + "Targets": { + "Pdf": { + "template_folder": "_themes.pdf" + } + }, + "notification_subscribers": [], + "branches_to_filter": [], + "skip_source_output_uploading": false, + "need_preview_pull_request": false, + "enable_incremental_build": true, + "dependent_repositories": [ + { + "path_to_root": "_themes", + "url": "https://github.com/Microsoft/templates.docs.msft", + "branch": "master", + "branch_mapping": {} + }, + { + "path_to_root": "_themes.pdf", + "url": "https://github.com/Microsoft/templates.docs.msft.pdf", + "branch": "master" + } + ] +} \ No newline at end of file diff --git a/.openpublishing.redirection.json b/.openpublishing.redirection.json new file mode 100644 index 00000000000..224f9e9dddb --- /dev/null +++ b/.openpublishing.redirection.json @@ -0,0 +1,12 @@ +{ + "redirections": [ + { + "source_path": "docs/guidance.md", + "redirect_url": "/azure/architecture" + }, + { + "source_path": "docs/guidance-architecture.md", + "redirect_url": "/azure/architecture" + } + ] +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000000..45cff81d8af --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,21 @@ +{ + // Use IntelliSense to learn about possible Node.js debug attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Launch Program", + "program": "${workspaceRoot}\\build\\build.js", + "cwd": "${workspaceRoot}" + }, + { + "type": "node", + "request": "attach", + "name": "Attach to Process", + "port": 5858 + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000000..3421b95e9a2 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +// Place your settings in this file to overwrite default and user settings. +{ + "files.exclude": { + "**/.optemp": true, + "**/_themes": true, + "log": true + }, +} \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000000..e056e7c3ec5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,395 @@ +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. \ No newline at end of file diff --git a/LICENSE-CODE b/LICENSE-CODE new file mode 100644 index 00000000000..b17b032a430 --- /dev/null +++ b/LICENSE-CODE @@ -0,0 +1,17 @@ +The MIT License (MIT) +Copyright (c) Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000000..32ff2ad4e12 --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +# patterns & practices + +Some of the content is generated using a build script. +* `index.md` is completely generated. You should not edit it directly. Instead, edit `index.yml` and `index.liquid.md`, then run the build script. +* The metadata for series is generated and copied into the relevant docs. The metadata is derived from the corresponding toc.md. You should not directly editing any of the yml that starts with `pnp.series.`. + +In order to run the build script, first install the supporting packages: + +```bash +npm install +``` + +To generate the files, you can run the `start` task: +```bash +npm start +``` +Or run the script directly: +```bash +node .\build\build.js +``` + +For convenience, you can build, commit, and push with a single command: +```bash +npm run build +``` + +## Legal Notices +Microsoft and any contributors grant you a license to the Microsoft documentation and other content +in this repository under the [Creative Commons Attribution 4.0 International Public License](https://creativecommons.org/licenses/by/4.0/legalcode), +see the [LICENSE](LICENSE) file, and grant you a license to any code in the repository under the [MIT License](https://opensource.org/licenses/MIT), see the +[LICENSE-CODE](LICENSE-CODE) file. + +Microsoft, Windows, Microsoft Azure and/or other Microsoft products and services referenced in the documentation +may be either trademarks or registered trademarks of Microsoft in the United States and/or other countries. +The licenses for this project do not grant you rights to use any Microsoft names, logos, or trademarks. +Microsoft's general trademark guidelines can be found at http://go.microsoft.com/fwlink/?LinkID=254653. + +Privacy information can be found at https://privacy.microsoft.com/en-us/ + +Microsoft and any contributors reserve all others rights, whether under their respective copyrights, patents, +or trademarks, whether by implication, estoppel or otherwise. diff --git a/build/blueprints/build.js b/build/blueprints/build.js new file mode 100644 index 00000000000..6198a765344 --- /dev/null +++ b/build/blueprints/build.js @@ -0,0 +1,55 @@ +var path = require('path'); +var fs = require('fs'); +var series = require('./series'); + +var yamlFront = require('yaml-front-matter') +var Liquid = require('shopify-liquid'); + +var engine = Liquid({ + root: path.resolve(__dirname, 'templates'), + extname: '.liquid' +}); + +module.exports = function () { + var basePath = path.resolve(__dirname, `../../docs/blueprints/`); + var catalog = { series: [] }; + + // render series overviews + var seriesTemplate = engine.parse("{%- include 'series-overview' -%}"); + [ + 'virtual-machines-linux', + 'virtual-machines-windows', + 'app-service', + 'identity', + 'hybrid-networking', + 'dmz' + ].forEach(slug => { + var seriesPath = path.resolve(basePath, `${slug}`); + var model = series(slug, seriesPath); + + var ymlPath = path.resolve(seriesPath, `series.yml`); + var content = fs.readFileSync(ymlPath, 'utf8'); + var yml = yamlFront.loadFront(content); + + Object.assign(model, yml); + + model.next = model.articles[0].url; + model.path = slug; + catalog.series.push(model); + + var outputhPath = path.resolve(seriesPath, `index.md`); + engine.render(seriesTemplate, { series: model }) + .then(markdown => fs.writeFile(outputhPath, markdown)); + }); + + // render main index + ['index'].forEach(x => { + var templatePath = path.resolve(basePath, `${x}.liquid.md`); + var outputhPath = path.resolve(basePath, `${x}.md`); + var template = engine.parse(fs.readFileSync(templatePath, 'utf8')); + + console.log(x); + engine.render(template, catalog) + .then(markdown => fs.writeFile(outputhPath, markdown)); + }); +} \ No newline at end of file diff --git a/build/blueprints/createSeriesModel.js b/build/blueprints/createSeriesModel.js new file mode 100644 index 00000000000..d3c6940f80c --- /dev/null +++ b/build/blueprints/createSeriesModel.js @@ -0,0 +1,53 @@ +const toc = require('./toc'); + +function adorn(model) { + var articles = model.articles; + + articles.forEach((article, i) => { + var prevIndex = i - 1; + var nextIndex = i + 1; + if (prevIndex >= 0) { + article.prev = articles[prevIndex].url; + } else { + article.prev = './index'; + } + if (nextIndex < articles.length) { + article.next = articles[nextIndex].url; + } + }); +} + +function resolveUrl(url) { + return url.replace('.md', '').replace('./', ''); +} + +function getArticleByFile(file) { + return this.articles.find(x => (x.filePath.indexOf(file) >= 0)); +} + +module.exports = (slug, path) => { + + var contents = toc(path); + + var model = { + root: slug, + title: contents[0].title, + description: null, + articles: [], + getArticleByFile: getArticleByFile + }; + + contents.shift(); + + model.articles = contents.map(x => { + return { + title: x.title, + filePath: x.filePath, + url: resolveUrl(x.filePath) + }; + }); + + adorn(model); + + return model; +} \ No newline at end of file diff --git a/build/blueprints/series.js b/build/blueprints/series.js new file mode 100644 index 00000000000..3574ad6e004 --- /dev/null +++ b/build/blueprints/series.js @@ -0,0 +1,50 @@ +var createSeriesModel = require('./createSeriesModel'); +var lec = require('line-ending-corrector').LineEndingCorrector.correctSync; +var yamlFront = require('yaml-front-matter'); +var path = require('path'); +var fs = require('fs'); +var os = require('os'); + +module.exports = function (seriesSlug, seriesPath) { + console.log(`series: ${seriesSlug}`); + + var tocPath = path.resolve(seriesPath, `toc.md`); + var toc = fs.readFileSync(tocPath, 'utf8'); + + var series = createSeriesModel(seriesSlug, seriesPath); + + var files = fs.readdirSync(seriesPath); + files.forEach(file => { + if (file.substr(-3) != ".md") return; + if (['index.md', 'toc.md'].indexOf(file) > -1) return; + + var filePath = path.resolve(seriesPath, file); + var content = fs.readFileSync(filePath, 'utf8'); + + var yml = yamlFront.loadFront(content); + var original = yml.__content; + delete yml.__content; + + var article = series.getArticleByFile(file); + if (!article) return; + + var cardTitle = article.title; + + Object.assign(article, yml); + + + if (article.prev) { + yml['pnp.series.prev'] = article.prev; + } + if (article.next) { + yml['pnp.series.next'] = article.next; + } + yml['pnp.series.title'] = series.title; + yml['cardTitle'] = cardTitle; + + var updated = lec('---' + os.EOL + yamlFront.dump(yml) + '---' + original, { eolc: 'CRLF' }); + fs.writeFileSync(filePath, updated[1]); + }); + + return series; +} \ No newline at end of file diff --git a/build/blueprints/templates/series-overview.liquid b/build/blueprints/templates/series-overview.liquid new file mode 100644 index 00000000000..4d5bdf23ec3 --- /dev/null +++ b/build/blueprints/templates/series-overview.liquid @@ -0,0 +1,17 @@ +--- +title: {{ series.title }} | Architectural Blueprints +description: {{ series.summary }} +layout: LandingPage +pnp.series.title: {{ series.title }} +pnp.series.next: {{ series.next }} +--- + + +# Series overview +[!INCLUDE [header](../../_includes/header.md)] + +{{ series.description }} + +{% include 'series' %} + +{{ series.considerations }} \ No newline at end of file diff --git a/build/blueprints/templates/series.liquid b/build/blueprints/templates/series.liquid new file mode 100644 index 00000000000..fea81cf7121 --- /dev/null +++ b/build/blueprints/templates/series.liquid @@ -0,0 +1,21 @@ + \ No newline at end of file diff --git a/build/blueprints/toc.js b/build/blueprints/toc.js new file mode 100644 index 00000000000..30a4df2a9db --- /dev/null +++ b/build/blueprints/toc.js @@ -0,0 +1,25 @@ +const path = require('path'); +const fs = require('fs'); + +module.exports = folder => { + const articleRE = /^#+ \[([a-zA-Z-\s]*)\]\(([\.\/a-zA-Z-]*)\)/igm; + + const tocPath = path.resolve(folder, `toc.md`); + const content = fs.readFileSync(tocPath, 'utf8'); + + var output = []; + + // extract the main articles from the TOC + var result = articleRE.exec(content); + // first hit _should_ be the series overview + // so we'll ignore it and move on... + + while ((result = articleRE.exec(content)) !== null) { + output.push({ + title: result[1].trim(), + filePath: result[2] + }); + } + + return output; +} \ No newline at end of file diff --git a/build/build.js b/build/build.js new file mode 100644 index 00000000000..ffb5dbf5268 --- /dev/null +++ b/build/build.js @@ -0,0 +1,5 @@ +const blueprints = require('./blueprints/build'); +const main = require('./main/build'); + +main(); +blueprints(); \ No newline at end of file diff --git a/build/main/build.js b/build/main/build.js new file mode 100644 index 00000000000..70fabde412e --- /dev/null +++ b/build/main/build.js @@ -0,0 +1,32 @@ +var path = require('path'); +var fs = require('fs'); + +var yamlFront = require('yaml-front-matter'); +var Liquid = require('shopify-liquid'); + +var series = require('./series'); + +var engine = Liquid({ + root: path.resolve(__dirname, 'templates'), + extname: '.liquid' +}); + +module.exports = function () { + var dataPath = path.resolve(__dirname, '../../docs/index.yml'); + var content = fs.readFileSync(dataPath, 'utf8'); + var yml = yamlFront.loadFront(content); + + ['index'].forEach(x => { + console.log(`templated page: ${x}`); + var templatePath = path.resolve(__dirname, `../../docs/${x}.liquid.md`); + var outputhPath = path.resolve(__dirname, `../../docs/${x}.md`); + var template = engine.parse(fs.readFileSync(templatePath, 'utf8')); + + return engine.render(template, yml) + .then(function (markdown) { + fs.writeFile(outputhPath, markdown); + }); + }); + + ['multitenant-identity', 'elasticsearch'].forEach(slug => series(slug)); +} diff --git a/build/main/createSeriesModel.js b/build/main/createSeriesModel.js new file mode 100644 index 00000000000..eb3ba809348 --- /dev/null +++ b/build/main/createSeriesModel.js @@ -0,0 +1,86 @@ +var commonmark = require('commonmark'); + +function getHeadingText(node) { + var walker = node.walker(); + while ((event = walker.next())) { + node = event.node; + if (event.entering && node.type === 'text') { + return node._literal; + } + } + return null; +} + +function getHeadingLink(node) { + var walker = node.walker(); + while ((event = walker.next())) { + node = event.node; + if (event.entering && node._destination) { + return node._destination; + } + } + return null; +} + +function adorn(model) { + var articles = model.articles; + + articles.forEach((article, i) => { + var prevIndex = i - 1; + var nextIndex = i + 1; + if (prevIndex >= 0) { + article.prev = articles[prevIndex].url; + } + if (nextIndex < articles.length) { + article.next = articles[nextIndex].url; + } + }); +} + +function resolveUrl(url) { + return url.replace('.md', '').replace('./', ''); +} + +function getArticleByFile(file) { + var result = this.articles.find(x => (x.filePath.indexOf(file) >= 0)); + if (result) return result; + throw Error(`${file} was not found in articles for ${this.root}.`); +} + +module.exports = function (toc, slug) { + + var model = { + root: slug, + title: null, + articles: [], + getArticleByFile: getArticleByFile + }; + + var reader = new commonmark.Parser(); + var parsed = reader.parse(toc); + + var walker = parsed.walker(); + var event, node; + + while ((event = walker.next())) { + node = event.node; + if (event.entering && node.type === 'heading') { + if (node.level === 1) { + model.title = getHeadingText(node); + } + if (node.level === 2) { + var title = getHeadingText(node); + var filePath = getHeadingLink(node); + + var article = { + title: title, + filePath: filePath, + url: resolveUrl(filePath) + }; + model.articles.push(article); + } + } + } + adorn(model); + return model; +} \ No newline at end of file diff --git a/build/main/listFiles.js b/build/main/listFiles.js new file mode 100644 index 00000000000..0f8ada6facd --- /dev/null +++ b/build/main/listFiles.js @@ -0,0 +1,17 @@ +const Glob = require('glob').Glob; + +module.exports = ctx => { + + return new Promise((resolve, reject) => { + + var globbing = new Glob(ctx.pattern, { cwd: ctx.cwd }, (err, files) => { + + if (err) { + reject(err); + return; + } + + resolve({ files: files, ctx: ctx }); + }); + }); +}; \ No newline at end of file diff --git a/build/main/series.js b/build/main/series.js new file mode 100644 index 00000000000..6ad75d1a92f --- /dev/null +++ b/build/main/series.js @@ -0,0 +1,43 @@ +var createSeriesModel = require('./createSeriesModel'); +var lec = require('line-ending-corrector').LineEndingCorrector.correctSync; +var yamlFront = require('yaml-front-matter'); +var path = require('path'); +var fs = require('fs'); +var os = require('os'); + +module.exports = function (seriesSlug) { + console.log(`series: ${seriesSlug}`); + + var seriesPath = path.resolve(__dirname, `../../docs/${seriesSlug}`); + var tocPath = path.resolve(seriesPath, `toc.md`); + + var toc = fs.readFileSync(tocPath, 'utf8'); + + var series = createSeriesModel(toc, seriesSlug); + + var files = fs.readdirSync(seriesPath); + files.forEach(file => { + if (file.substr(-3) != ".md") return; + if (file === "toc.md") return; + + var filePath = path.resolve(seriesPath, file); + var content = fs.readFileSync(filePath, 'utf8'); + + var yml = yamlFront.loadFront(content); + var original = yml.__content; + delete yml.__content; + + var article = series.getArticleByFile(file); + + if (article.prev) { + yml['pnp.series.prev'] = article.prev; + } + if (article.next) { + yml['pnp.series.next'] = article.next; + } + yml['pnp.series.title'] = series.title; + + var updated = lec('---' + os.EOL + yamlFront.dump(yml) + '---' + original, { eolc: 'CRLF' }); + fs.writeFileSync(filePath, updated[1]); + }); +} \ No newline at end of file diff --git a/build/main/templates/featured-card.liquid b/build/main/templates/featured-card.liquid new file mode 100644 index 00000000000..089ebaf938d --- /dev/null +++ b/build/main/templates/featured-card.liquid @@ -0,0 +1,17 @@ + +
+
+
+
+
+ +
+
+
+

{{ item.title }}

+

{{ item.description }}

+
+
+
+
+
\ No newline at end of file diff --git a/build/main/templates/two-column-card.liquid b/build/main/templates/two-column-card.liquid new file mode 100644 index 00000000000..cf461d10557 --- /dev/null +++ b/build/main/templates/two-column-card.liquid @@ -0,0 +1,17 @@ + +
+
+
+
+
+ {{ item.title }} +
+
+
+

{{ item.title }}

+

{{ item.description }}

+
+
+
+
+
\ No newline at end of file diff --git a/build/patterns/build.js b/build/patterns/build.js new file mode 100644 index 00000000000..ac8b23af2fa --- /dev/null +++ b/build/patterns/build.js @@ -0,0 +1,39 @@ +const path = require('path'); +const fs = require('fs'); + +const yamlFront = require('yaml-front-matter'); +const Liquid = require('shopify-liquid'); + +const patterns = require('./patterns'); +const categories = require('./categories'); + +const engine = Liquid({ + root: path.resolve(__dirname, 'templates'), + extname: '.liquid' +}); + +var dataPath = path.resolve(__dirname, '../../docs/patterns/_categories.yml'); +var content = fs.readFileSync(dataPath, 'utf8'); +var yml = yamlFront.loadFront(content); +yml.patterns = patterns(); + +// render landing page +['index'].forEach(x => { + console.log(`templated page: ${x}`); + var templatePath = path.resolve(__dirname, `../../docs/patterns/${x}.liquid.md`); + var outputhPath = path.resolve(__dirname, `../../docs/patterns/${x}.md`); + var template = engine.parse(fs.readFileSync(templatePath, 'utf8')); + + engine.render(template, yml) + .then(markdown => { + fs.writeFile(outputhPath, markdown); + }); +}); + +// render category pages +var template = engine.parse("{%- include 'category-index' -%}"); +categories(yml).forEach(c => { + var outputhPath = path.resolve(__dirname, `../../docs/patterns/category/${c.url}.md`); + engine.render(template, c) + .then(markdown => { fs.writeFile(outputhPath, markdown); }); +}); \ No newline at end of file diff --git a/build/patterns/categories.js b/build/patterns/categories.js new file mode 100644 index 00000000000..897ac442487 --- /dev/null +++ b/build/patterns/categories.js @@ -0,0 +1,11 @@ +module.exports = function (data) { + var categories = data.categories; + var patterns = data.patterns; + + // associate individual patterns with categories using their metadata + categories.forEach(c => { + c.patterns = patterns.filter(p => p.categories.includes(c.url)); + }); + + return categories; +}; \ No newline at end of file diff --git a/build/patterns/patterns.js b/build/patterns/patterns.js new file mode 100644 index 00000000000..17e9cb385ae --- /dev/null +++ b/build/patterns/patterns.js @@ -0,0 +1,28 @@ +var yamlFront = require('yaml-front-matter'); +var path = require('path'); +var fs = require('fs'); + +module.exports = function () { + + var model = []; + var patternsPath = path.resolve(__dirname, `../../docs/patterns`); + + var files = fs.readdirSync(patternsPath); + files.forEach(file => { + if (file.substr(-3) != ".md") return; + if (['index.md','index.liquid.md','toc.md'].indexOf(file) > -1) return; + + var filePath = path.resolve(patternsPath, file); + var content = fs.readFileSync(filePath, 'utf8'); + var yml = yamlFront.loadFront(content); + + model.push({ + title: yml['title'], + description: yml['description'], + file: file, + categories: yml['pnp.pattern.categories'] + }); + }); + + return model; +}; \ No newline at end of file diff --git a/build/patterns/templates/category-card.liquid b/build/patterns/templates/category-card.liquid new file mode 100644 index 00000000000..1b45ee1571c --- /dev/null +++ b/build/patterns/templates/category-card.liquid @@ -0,0 +1,10 @@ + + + {{ category.title }} + +
+

{{ category.title }}

+

{{ category.description }}

+
+ +
\ No newline at end of file diff --git a/build/patterns/templates/category-index.liquid b/build/patterns/templates/category-index.liquid new file mode 100644 index 00000000000..4a730942631 --- /dev/null +++ b/build/patterns/templates/category-index.liquid @@ -0,0 +1,21 @@ +--- +title: {{ title }} patterns +description: {{ description }} +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +--- + +# {{ title }} patterns + +[!INCLUDE [header](../../_includes/header.md)] + +{{ description }} + +| Pattern | Summary | +| ------- | ------- | +{%- for pattern in patterns %} +| [{{ pattern.title }}](../{{ pattern.file }}) | {{ pattern.description }} | +{%- endfor %} \ No newline at end of file diff --git a/docs/_bread/toc.yml b/docs/_bread/toc.yml new file mode 100644 index 00000000000..9367c108f06 --- /dev/null +++ b/docs/_bread/toc.yml @@ -0,0 +1,32 @@ +- name: Azure + tocHref: /azure/ + topicHref: /azure + items: + - name: Architecture + tocHref: /azure/architecture/ + topicHref: /azure/architecture + items: + - name: Best Practices + tocHref: /azure/architecture/best-practices + topicHref: /azure/architecture/best-practices + - name: Azure for AWS Professionals + tocHref: /azure/architecture/aws-professional + topicHref: /azure/architecture/aws-professional + - name: Design for Resiliency + tocHref: /azure/architecture/resiliency + topicHref: /azure/architecture/resiliency + - name: Design Review Checklists + tocHref: /azure/architecture/checklist + topicHref: /azure/architecture/checklist + - name: Elasticsearch + tocHref: /azure/architecture/elasticsearch + topicHref: /azure/architecture/elasticsearch + - name: Manage Identity in Multitenant Applications + tocHref: /azure/architecture/multitenant-identity + tocHref: /azure/architecture/multitenant-identity + - name: Blueprints + tocHref: /azure/architecture/blueprints + topicHref: /azure/architecture/blueprints + - name: Cloud Design Patterns + tocHref: /azure/architecture/patterns + topicHref: /azure/architecture/patterns \ No newline at end of file diff --git a/docs/_css/hubCards.css b/docs/_css/hubCards.css new file mode 100644 index 00000000000..44107a5fcd8 --- /dev/null +++ b/docs/_css/hubCards.css @@ -0,0 +1,487 @@ +.card { + overflow: hidden; + position: relative; + border: 1px solid #dbdbdb; + padding: 6px 12px 6px 12px; + box-shadow: 0px 2px 5px #e8e8e8; +} + +html.theme_night .card { + border: 1px solid #666; + box-shadow: 0px 2px 5px #444; +} + +.card:hover, .card:active { + border: 1px solid #5493d1; +} + +.card a { + text-decoration: none; +} + +.card h3 { + font-size: 1rem; + margin: 0; + line-height: 1.125; + color: $text; +} + +html.theme_night .card h3{ + color:#ccc; +} + +a .card:hover h3, a .card:active h3, html.theme_night .card:hover h3, html.theme_night .card:active h3 { + color: $blue; +} + +a .card:hover h3:hover, a .card:active h3:active, .card h3 a:hover, .card h3 a:active { + text-decoration:underline; +} + +.card p { + font-size: 0.9375rem; + line-height: 1.33; + color: $text-subtle; + margin: 6px 0 0 0; + padding: 0; +} + +html.theme_night .card p{ + color:#ccc; +} + +.cardPadding { + padding: 0 12px 22px 12px; +} + +.cardSize { + width: 100%; + overflow: hidden; +} + +.cardScaleImage { + background-repeat:no-repeat; + background-size: contain; + background-position: center center; + height:100%; + width:100%; +} + +.cardText a, +.cardText a:visited { + color: $text-subtle; + text-decoration: none; +} + +.cardText h3 a, +.cardText h3 a:visited { + color: $text; + text-decoration: none; +} + +html.theme_night .cardText h3 a, +html.theme_night .cardText h3 a:visited { + color: #ccc; + text-decoration: none; +} + +a .card:hover .cardText, +a .card:active .cardText, +.cardText a:hover, +.cardText a:active, +html.theme_night .cardText h3 a:hover, +html.theme_night .cardText h3 a:active { + color: $blue; +} + +/* styleA */ +.cardsA .cardSize { + min-width: 260px; +} + +.cardsA .card { + min-height: 176px; +} + +.cardsA .cardImageOuter { + width:100%; +} +.cardsA .cardImage { + width:68px; + height:68px; + margin:0 auto 8px auto; +} + +/* styleB */ +.cardsB .cardSize { + min-width: 260px; +} + +.cardsB .card { + height: 176px; +} + +.cardsB .cardImageOuter { + float:left; +} + +.cardsB .cardImage { + width:68px; + height:68px; + margin:0 auto 8px auto; +} + +.cardsB .cardText { + padding-left: 80px; + padding-top:12px; +} + +/* styleC */ +.cardsC .cardSize { + min-width: 260px; +} + +.cardsC .card { + padding: 0 0 6px 0; + min-height: 250px; +} + +.cardsC .cardImageOuter { + width: 100%; + height: 140px; + overflow: hidden; +} + +.cardsC .cardImage img { + width: 100%; + max-width: 400px; +} + +.cardsC .cardText { + padding: 6px 12px 0 12px; +} + +/* styleD */ +.cardsD .cardSize { + min-width: 260px; +} + +.cardsD .card { + padding: 0 0 6px 0; + box-shadow: none; + border: 0; +} + +html.theme_night .cardsD .card { + box-shadow: none; + border: 0; +} + +.cardsD .cardImageOuter { + width: 100%; + height: 180px; +} + +.cardsD .cardText { + padding: 12px; +} + +/* style E */ +.cardsE .cardSize { + min-width: 260px; +} + +.cardsE .card { + height: 180px; + padding: 0 0 0 12px; + box-shadow: none; + border: 0; + border-left: 2px solid #dbdbdb; +} + +html.theme_night .cardsE .card { + box-shadow: none; + border: 0; + border-left: 2px solid #333; +} + +html[dir='rtl'] .cardsE .card { + border: 0; + border-right: 2px solid #dbdbdb; + padding: 0 12px 0 0; +} + +html[dir='rtl'].theme_night .cardsE .card { + border-right: 2px solid #333; +} + +.cardsE .cardImageOuter { + width:100%; +} + +.cardsE .cardImage { + width:68px; + height:68px; + margin:0 0 8px 0; +} + +/* style F */ +.cardsF .cardSize { + min-width: 260px; +} + +.cardsF .card { + padding: 0 0 6px 0; + box-shadow: none; + border: 0; +} + +html.theme_night .cardsF .card { + box-shadow: none; + border: 0; +} + +.cardsF .cardImageOuter { + float:left; +} + +.cardsF .cardImage { + width:80px; + height:80px; + overflow:hidden; + margin:0 auto; +} + +.cardsF .cardText { + padding-left: 98px; +} + +/* style F Title */ +.cardsFTitle .cardSize { + min-width: 260px; +} + +.cardsFTitle .card { + padding: 0 0 6px 0; + box-shadow: none; + border: 0; +} + +html.theme_night .cardsFTitle .card { + box-shadow: none; + border: 0; +} + +.cardsFTitle .cardImageOuter { + float:left; +} + +.cardsFTitle .cardImage { + width:50px; + height:50px; + overflow:hidden; + margin:0 auto; +} + +.cardsFTitle .cardText { + padding-left: 62px; +} + +.cardsFTitle .cardText h3 { + font-family: segoe-ui_normal, 'Segoe UI', Segoe, 'Segoe WP', 'Helvetica Neue', Helvetica, sans-serif; + font-size: 1rem; + line-height: 1.25; +} + +.cardsFTitle .cardText p{ + display:none; +} + +/* style G */ +.cardsG .cardSize { + min-width: 260px; +} + +.cardsG .card { + padding: 0 0 6px 0; + box-shadow: none; + border: 0; +} + +html.theme_night .cardsG .card { + box-shadow: none; + border: 0; +} + +.cardsG .cardImageOuter { + float:left; +} + +.cardsG .cardImage { + width:150px; + height:105px; + overflow:hidden; + margin:0 auto 8px auto; +} + +.cardsG .cardImage img { + width: 100%; +} + +.cardsG .cardText { + padding-left: 162px; +} + +@media only screen and (min-width: $desktop) { + .cardsG > li { + -webkit-flex: 0 1 50%; + -ms-flex: 0 1 50%; + flex: 0 1 50%; + } +} + +/* landing pages */ +.cardsW .card { + box-shadow: none; + border: 0; + padding:0; +} + +html.theme_night .cardsW .card { + box-shadow: none; + border: 0; +} + +.cardsW .card h3 { + font-size:1.188rem; +} + +.cardsW .card p { + font-size:1rem; + margin: 10px 0 0; +} + +/* site home */ +.cardsX .cardSize { + position: relative; +} + +.cardsX .card { + min-height: 92px; + padding:0; +} + +.noTouch .cardsX .card:hover, .noTouch .cardsX .card:active { + border-color: transparent; +} + +.cardsX .cardImageOuter { + margin:0; +} + +.cardsX .cardImage { + width:300px; + height:92px; + margin:auto; +} + +.cardsX .cardText { + display:none; +} + +.cardsX .card h3 { + display:none; +} + +@media only screen and (min-width: $tablet) { + .cardsX li { + margin-bottom: 32px; + } +} + +/* featured */ +.cardsY .card { + padding: 6px; +} + +.cardsY .cardImageOuter { + float:left; + margin:6px 0 6px 4px; +} + +.cardsY .cardImage { + width:50px; + height:50px; +} + +.cardsY .cardText { + padding-left: 64px; + margin-top:12px; +} + +.cardsY .card h3 { + font-size: 1rem; + font-family: segoe-ui_normal, 'Segoe UI', Segoe, 'Segoe WP', 'Helvetica Neue', Helvetica, sans-serif; + line-height:1.28; +} + +.cardsY .card p { + font-size: 0.875rem; + margin:0; +} + +/* no image */ +.cardsZ .cardImageOuter { + display:none; +} + +.cardsZ .card { + min-height: 116px; + padding-top: 24px; +} + +.directory > li { + float:left; + width: 50%; +} + +.directory > li > .group { + float:left; + min-width: 240px; + width: 50%; +} + +.directory h3 { + margin-top:4px; +} + +.directory > li ul { + list-style-type:none; + margin-bottom:32px; +} + +.directory > li ul img { + float:left; + height:24px; + width:24px; +} + +@media only screen and (min-width: $desktop) { + .panelItem > .directory > li { + flex: 0 1 50%; + } +} + +.directory > li ul a { + color: $text-subtle; +} + +.group li p { + margin-left:36px; + color: $text-subtle; +} + +.group li:hover p, .group li:active p { + color: $blue; + text-decoration:underline +} \ No newline at end of file diff --git a/docs/_css/pnp.css b/docs/_css/pnp.css new file mode 100644 index 00000000000..151f1784802 --- /dev/null +++ b/docs/_css/pnp.css @@ -0,0 +1,147 @@ +h1 { + background-color: #007bb8; + color: white; + padding: 16px; + padding-top: 90px; + margin-bottom: 4px; +} +h1 > span.series { + font-size: 20px; + display: block; +} +.metadata { + display: none; +} +.content p:empty { + display: none; +} + +nav.series { + display: flex; + width: calc(100% - 12px); + margin: 0 auto; + margin-top: -12px; +} +nav.series a { + flex-basis: 100%; +} +nav.series .next { + text-align: right; +} +nav.series .prev { + text-align: left; +} + +nav.categories { + display: flex; + margin: 0 auto; + margin-bottom: -12px; +} +nav.categories a { + margin-right: 8px; +} +nav.categories a img { + width: 48px; +} + +.pnp .frontmatter { + margin-bottom: 12px; +} +.pnp p { + display: inline-block !important; +} +.panel { + display: flex; + flex-wrap: wrap; +} +.panel li { + list-style-type: none; +} +.pnp #featured>li { + flex: 0 1 33%; +} +.pnp #featured>li img { + max-width: 260px; + /* This is a hack I added because I couldn't get the
  • to shrink down to 33% in Edge */ +} +@media all and (max-width: 800px) { + .pnp #featured>li { + flex: 0 1 100%; + } +} +.panel.secondary li { + flex: 0 1 50%; +} +.panel { + margin-bottom: 0; +} +.pnp hr { + margin: 16px 0 32px 0; +} + +#categories>li { + flex: 0 1 100%; + margin-left: 0 !important; + margin-bottom: 18px !important; + list-style-type: none; +} +#categories>li a{ + display: flex; + flex-wrap: nowrap; + align-items: flex-start; +} +#categories>li a img { + margin-right: 12px; + max-width: 64px; +} +@media all and (max-width: 800px) { + #categories>li a img { + max-width: 32px; + } +} +#categories>li a .cardText h3 { + color: rgb(34,34,34); + margin-top: 0; +} +#categories>li a .cardText p { + font-size: 1rem; + margin: 10px 0 0; + font-size: .9375rem; + line-height: 1.33; + color: #6e6e6e; + padding: 0; +} +#categories>li a .cardText h3 { + font-size: 1.188rem; + margin: 0; + line-height: 1.125; + color: #222; +} + +.panel li { + list-style-type: none; + flex: 0 1 33%; +} +.panel.x2 li { + flex-basis: 50%; +} +.panel.x3 li { + flex-basis: 33%; +} +.panel.x4 li { + flex-basis: 25%; +} +.panel .cardsD .cardSize { + min-width: 240px !important; +} + +section.series h2 { + margin-top: 0; +} +section.series .panel{ + margin-bottom: -24px; +} +section.series .links { + text-align: right; + margin-bottom: 24px; +} \ No newline at end of file diff --git a/docs/_images/aws-professional.svg b/docs/_images/aws-professional.svg new file mode 100644 index 00000000000..ebc47ee7f18 --- /dev/null +++ b/docs/_images/aws-professional.svg @@ -0,0 +1,57 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/_images/azure-arch-1.svg b/docs/_images/azure-arch-1.svg new file mode 100644 index 00000000000..f97e63382b4 --- /dev/null +++ b/docs/_images/azure-arch-1.svg @@ -0,0 +1,221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_images/azure-arch-2.svg b/docs/_images/azure-arch-2.svg new file mode 100644 index 00000000000..5f628d42f04 --- /dev/null +++ b/docs/_images/azure-arch-2.svg @@ -0,0 +1,202 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_images/azure-arch-3.svg b/docs/_images/azure-arch-3.svg new file mode 100644 index 00000000000..a300476e886 --- /dev/null +++ b/docs/_images/azure-arch-3.svg @@ -0,0 +1,724 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_images/azure-arch-4.svg b/docs/_images/azure-arch-4.svg new file mode 100644 index 00000000000..18f60f6e3e2 --- /dev/null +++ b/docs/_images/azure-arch-4.svg @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_images/azure-arch-5.svg b/docs/_images/azure-arch-5.svg new file mode 100644 index 00000000000..522f891f5e3 --- /dev/null +++ b/docs/_images/azure-arch-5.svg @@ -0,0 +1,167 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_images/azurecat.svg b/docs/_images/azurecat.svg new file mode 100644 index 00000000000..f2a13c0ea73 --- /dev/null +++ b/docs/_images/azurecat.svg @@ -0,0 +1,79 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/_images/checklist.svg b/docs/_images/checklist.svg new file mode 100644 index 00000000000..a9c3c4225c1 --- /dev/null +++ b/docs/_images/checklist.svg @@ -0,0 +1,196 @@ + + + +image/svg+xmlPage-1Task list or BacklogDocument (stackable)Sheet.62Sheet.63Sheet.64Sheet.65Sheet.66Sheet.67Sheet.68_x3C_Slice_x3E__179_Sheet.70Sheet.71Sheet.72Sheet.73Checkmark / success.11017Sheet.75Sheet.76Checkmark / success.11024Sheet.78Sheet.79 \ No newline at end of file diff --git a/docs/_images/elasticsearch.svg b/docs/_images/elasticsearch.svg new file mode 100644 index 00000000000..f8bf2d76375 --- /dev/null +++ b/docs/_images/elasticsearch.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_images/github.png b/docs/_images/github.png new file mode 100644 index 00000000000..8b25551a979 Binary files /dev/null and b/docs/_images/github.png differ diff --git a/docs/_images/github.svg b/docs/_images/github.svg new file mode 100644 index 00000000000..05ef379a0d8 --- /dev/null +++ b/docs/_images/github.svg @@ -0,0 +1,63 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/docs/_images/multitenant-identity.svg b/docs/_images/multitenant-identity.svg new file mode 100644 index 00000000000..a9e8e4db582 --- /dev/null +++ b/docs/_images/multitenant-identity.svg @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + diff --git a/docs/_images/pnp-logo.svg b/docs/_images/pnp-logo.svg new file mode 100644 index 00000000000..900524937c3 --- /dev/null +++ b/docs/_images/pnp-logo.svg @@ -0,0 +1,346 @@ + + + + + patterns & practices + + + + + + image/svg+xml + + patterns & practices + http://aka.ms/practices + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_images/resiliency.svg b/docs/_images/resiliency.svg new file mode 100644 index 00000000000..bd112be8a3b --- /dev/null +++ b/docs/_images/resiliency.svg @@ -0,0 +1,70 @@ + + + + + Design for Resiliency + + + + + + image/svg+xml + + Design for Resiliency + + + patterns & practices + + + + + + + + + diff --git a/docs/_images/sqlcat.svg b/docs/_images/sqlcat.svg new file mode 100644 index 00000000000..1812bf6dfbf --- /dev/null +++ b/docs/_images/sqlcat.svg @@ -0,0 +1,71 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/_includes/header.md b/docs/_includes/header.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/docs/aws-professional/images/azure-aws-account-compare.png b/docs/aws-professional/images/azure-aws-account-compare.png new file mode 100644 index 00000000000..48bda918b46 Binary files /dev/null and b/docs/aws-professional/images/azure-aws-account-compare.png differ diff --git a/docs/aws-professional/images/three-tier-example.png b/docs/aws-professional/images/three-tier-example.png new file mode 100644 index 00000000000..46a5739e51a Binary files /dev/null and b/docs/aws-professional/images/three-tier-example.png differ diff --git a/docs/aws-professional/images/zone-fault-domains.png b/docs/aws-professional/images/zone-fault-domains.png new file mode 100644 index 00000000000..d0a828798ed Binary files /dev/null and b/docs/aws-professional/images/zone-fault-domains.png differ diff --git a/docs/aws-professional/index.md b/docs/aws-professional/index.md new file mode 100644 index 00000000000..87d4b766b30 --- /dev/null +++ b/docs/aws-professional/index.md @@ -0,0 +1,678 @@ +--- +title: Introduction to Azure for AWS experts | Microsoft Docs +description: Understand the basics of Microsoft Azure accounts, platform, and services. Also learn key similarities and differences between the AWS and Azure platforms. +services: '' +documentationcenter: '' +keywords: AWS experts, Azure comparison, AWS comparison, difference between azure and aws, azure and aws +author: lbrader +manager: christb + +ms.assetid: +ms.service: multiple +ms.workload: na +ms.tgt_pltfrm: na +ms.devlang: na +ms.topic: article +ms.date: 11/21/2016 +ms.author: lbrader + +--- + +# Introduction to Microsoft Azure accounts, platform, and services for AWS experts + +This article helps Amazon Web Services (AWS) experts understand the basics of Microsoft Azure accounts, platform, and services. It also covers key similarities and differences between the AWS and Azure platforms. + +You'll learn: + +* How accounts and resources are organized in Azure. +* How available solutions are structured in Azure. +* How the major Azure services differ from AWS services. + + Azure and AWS built their capabilities independently over time so that each has important implementation and design differences. + +## Overview + +Like AWS, Microsoft Azure is built around a core set of compute, storage, +database, and networking services. In many cases, both platforms offer a basic +equivalence between the products and services they offer. Both AWS and Azure +allow you to build highly available solutions based on Windows or Linux hosts. So, if you're used to development using Linux and OSS technology, both platforms +can do the job. + +While the capabilities of both platforms are similar, the resources that provide +those capabilities are often organized differently. Exact one-to-one +relationships between the services required to build a solution are not always +clear. There are also cases where a particular service might be offered on one +platform, but not the other. See [charts of comparable Azure and AWS services](services.md). + +## Accounts and subscriptions + +Azure services can be purchased using several pricing options, depending on your +organization's size and needs. See the [pricing +overview](https://azure.microsoft.com/pricing/) page for details. + +[Azure +subscriptions](https://azure.microsoft.com/documentation/articles/virtual-machines-linux-infrastructure-subscription-accounts-guidelines/) +are a grouping of resources with an assigned owner responsible for billing and +permissions management. Unlike AWS, where any resources created under the AWS +account are tied to that account, subscriptions exist independently of their +owner accounts, and can be reassigned to new owners as needed. + +![Comparison of structure and ownership of AWS accounts and Azure subscriptions](./images/azure-aws-account-compare.png "Comparison of structure and ownership of AWS accounts and Azure subscriptions") +
    *Comparison of structure and ownership of AWS accounts and Azure subscriptions* +

    + +Subscriptions are assigned three types of administrator accounts: + +- **Account Administrator** - The subscription owner and the + account billed for the resources used in the subscription. The account + administrator can only be changed by transferring ownership of the + subscription. + +- **Service Administrator** - This account has rights to create and manage + resources in the subscription, but is not responsible for billing. By + default, the account administrator and service administrator are assigned to + the same account. The account administrator can assign a separate user to + the service administrator account for managing the technical and operational + aspects of a subscription. There is only one service administrator per + subscription. + +- **Co-administrator** - There can be multiple co-administrator accounts + assigned to a subscription. Co-administrators cannot change the service + administrator, but otherwise have full control over subscription resources + and users. + +Below the subscription level user roles and individual permissions can also be assigned to specific resources, similarly to how permissions are granted to IAM users and groups in AWS. In Azure all user accounts are associated with either a Microsoft Account or Organizational Account (an account managed through an Azure Active Directory). + +Like AWS accounts, subscriptions have default service quotas and limits. For a +full list of these limits, see [Azure subscription and service limits, quotas, +and +constraints](https://azure.microsoft.com/documentation/articles/azure-subscription-service-limits/). +These limits can be increased up to the maximum by [filing a support request in +the management +portal](https://blogs.msdn.microsoft.com/girishp/2015/09/20/increasing-core-quota-limits-in-azure/). + +### See also + +- [How to add or change Azure administrator + roles](https://azure.microsoft.com/documentation/articles/billing-add-change-azure-subscription-administrator/) + +- [How to download your Azure billing invoice and daily usage + data](https://azure.microsoft.com/documentation/articles/billing-download-azure-invoice-daily-usage-date/) + +## Resource management + +The term "resource" in Azure is used in the same way as in AWS, meaning any +compute instance, storage object, networking device, or other entity you can +create or configure within the platform. + +Azure resources are deployed and managed using one of two models: [Azure +Resource Manager, or the older Azure [classic deployment model](/azure/azure-resource-manager/resource-manager-deployment-model). +Any new resources are created using the Resource Manager model. + +### Resource groups + +Both Azure and AWS have entities called "resource groups" that organize resources such as VMs, storage, and virtual networking devices. However, [Azure resource groups](https://azure.microsoft.com/documentation/articles/virtual-machines-windows-infrastructure-resource-groups-guidelines/) are not directly comparable to AWS resource groups. + +While AWS allows a resource to be tagged into multiple resource groups, an Azure +resource is always associated with one resource group. A resource created in one +resource group can be moved to another group, but can only be in one resource +group at a time. Resource groups are the fundamental grouping used by Azure +Resource Manager. + +Resources can also be organized using +[tags](https://azure.microsoft.com/documentation/articles/resource-group-using-tags/). +Tags are key-value pairs that allow you to group resources across your +subscription irrespective of resource group membership. + +### Management interfaces + +Azure offers several ways to manage your resources: + +- [Web + interface](https://azure.microsoft.com/documentation/articles/resource-group-portal/). + Like the AWS Dashboard, the Azure portal provides a full web-based + management interface for Azure resources. + +- [REST + API](https://azure.microsoft.com/documentation/articles/resource-manager-rest-api/). + The Azure Resource Manager REST API provides programmatic access to most of + the features available in the Azure portal. + +- [Command + Line](https://azure.microsoft.com/documentation/articles/xplat-cli-azure-resource-manager/). + The Azure CLI tool provides a command-line interface capable of creating and + managing Azure resources. Azure CLI is available for [Windows, Linux, and + Mac OS](https://github.com/azure/azure-xplat-cli). + +- [PowerShell](https://azure.microsoft.com/documentation/articles/powershell-azure-resource-manager/). + The Azure modules for PowerShell allow you to execute automated management + tasks using a script. PowerShell is available for [Windows, Linux, and Mac + OS](https://github.com/PowerShell/PowerShell). + +- [Templates](https://azure.microsoft.com/documentation/articles/resource-group-authoring-templates/). + Azure Resource Manager templates provide similar JSON template-based + resource management capabilities to the AWS CloudFormation service. + +In each of these interfaces, the resource group is central to how Azure +resources get created, deployed, or modified. This is similar to the role a +"stack" plays in grouping AWS resources during CloudFormation deployments. + +The syntax and structure of these interfaces are different from their AWS +equivalents, but they provide comparable capabilities. In addition, many third +party management tools used on AWS, like [Hashicorp's +Terraform](https://www.terraform.io/docs/providers/azurerm/) and [Netflix +Spinnaker](http://www.spinnaker.io/), are also available on Azure. + +### See also + +- [Azure resource group + guidelines](https://azure.microsoft.com/documentation/articles/virtual-machines-windows-infrastructure-resource-groups-guidelines/) + +## Regions and zones (high availability) + +In AWS, availability centers around the concept of Availability Zones. In Azure, +fault domains and availability sets are all involved in building highly +available solutions. Paired regions provide additional disaster recovery +capabilities. + +### Availability Zones, Azure fault domains, and availability sets + +In AWS, a region is divided into two or more Availability Zones. An Availability +Zone corresponds with a physically isolated datacenter in the geographic region. +If you deploy your application servers to separate Availability Zones, a +hardware or connectivity outage affecting one zone does not impact any servers +hosted in other zones. + +In Azure, a [fault +domain](https://azure.microsoft.com/documentation/articles/virtual-machines-linux-manage-availability/) +defines a group of VMs that shares a physical power source and network switch. +You use [availability +sets](https://azure.microsoft.com/documentation/articles/virtual-machines-windows-manage-availability/) +to spread VMs across multiple fault domains. When instances are assigned to the +same availability set, Azure distributes them evenly across several fault +domains. If a power failure or network outage occurs in one fault domain, at +least some of the set's VMs are in another fault domain and unaffected by the +outage. + +![AWS Availability Zones comparison to Azure fault domains and availability sets](./images/zone-fault-domains.png "AWS Availability Zones compared with Azure fault domains and availability sets") +
    *AWS Availability Zones compared with Azure fault domains and availability sets* +

    + +Availability sets should be organized by the instance's role in your application +to ensure one instance in each role is operational. For example, in a standard +three-tier web application, you would want to create a separate availability set +for front-end, application, and data instances. + +![Azure availability sets for each application role](./images/three-tier-example.png "Availability sets for each application role") +
    *Azure availability sets for each application role* +

    + +When VM instances are added to availability sets, they are also assigned an +[update +domain](https://azure.microsoft.com/documentation/articles/virtual-machines-linux-manage-availability/). +An update domain is a group of VMs that are set for planned maintenance events +at the same time. Distributing VMs across multiple update domains ensures that +planned update and patching events affect only a subset of these VMs at any +given time. + +### Paired regions + +In Azure, you use [paired +regions](https://azure.microsoft.com/documentation/articles/best-practices-availability-paired-regions/) +to support redundancy across two predefined geographic regions, ensuring that +even if an outage affects an entire Azure region, your solution is still +available. + +Unlike AWS Availability Zones, which are physically separate datacenters but may +be in relatively nearby geographic areas, paired regions are usually separated +by at least 300 miles. This is intended to ensure larger scale disasters only impact one of the regions in the pair. Neighboring pairs can be set to sync +database and storage service data, and are configured so that platform updates +are rolled out to only one region in the pair at a time. + +Azure [geo-redundant +storage](https://azure.microsoft.com/documentation/articles/storage-redundancy/#geo-redundant-storage) +is automatically backed up to the appropriate paired region. For all other +resources, creating a fully redundant solution using paired regions means +creating a full copy of your solution in both regions. + +### See also + +- [Regions and availability for virtual machines in + Azure](https://azure.microsoft.com/documentation/articles/virtual-machines-linux-regions-and-availability/) + +- [Disaster recovery and high availability for applications built on Microsoft + Azure](https://azure.microsoft.com/documentation/articles/resiliency-disaster-recovery-high-availability-azure-applications/) + +- [Planned maintenance for Linux virtual machines in + Azure](https://azure.microsoft.com/documentation/articles/virtual-machines-linux-planned-maintenance/) + +## Services + +Consult the [complete AWS and Azure service comparison matrix](https://aka.ms/azure4aws-services) for a full listing of how all services map between platforms. + +Not all Azure products and +services are available in all regions. Consult the [Products by +Region](https://azure.microsoft.com/regions/services/) page for details. You can find the uptime guarantees and downtime credit policies for each Azure +product or service on the [Service Level +Agreements](https://azure.microsoft.com/support/legal/sla/) page. + +The following sections provide a brief explanation of how commonly used features and services differ between the AWS and Azure platforms. + +### Compute services + +#### EC2 Instances and Azure virtual machines + +Although AWS instance types and Azure virtual machine sizes breakdown in a +similar way, there are differences in the RAM, CPU, and storage capabilities. + +- [Amazon EC2 Instance Types](https://aws.amazon.com/ec2/instance-types/) + +- [Sizes for virtual machines in Azure + (Windows)](https://azure.microsoft.com/documentation/articles/virtual-machines-windows-sizes/) + +- [Sizes for virtual machines in Azure + (Linux)](https://azure.microsoft.com/documentation/articles/virtual-machines-linux-sizes/) + +Unlike AWS' hourly billing, Azure on-demand VMs are billed by the minute. + +Azure has no equivalent to EC2 Spot Instances, Reserved Instances, or Dedicated +Hosts. + +#### EBS and Azure Storage for VM disks + +Durable data storage for Azure VMs is provided by [data +disks](https://azure.microsoft.com/documentation/articles/virtual-machines-linux-about-disks-vhds/) +residing in blob storage. This is similar to how EC2 instances store disk +volumes on Elastic Block Store (EBS). [Azure temporary +storage](https://blogs.msdn.microsoft.com/mast/2013/12/06/understanding-the-temporary-drive-on-windows-azure-virtual-machines/) +also provides VMs the same low-latency temporary read-write storage as EC2 +Instance Storage (also called ephemeral storage). + +Higher performance disk IO is supported using [Azure premium +storage](https://docs.microsoft.com/azure/storage/storage-premium-storage). +This is similar to the Provisioned IOPS storage options provided by AWS. + +#### Lambda, Azure Functions, Azure Web-Jobs, and Azure Logic Apps + +[Azure Functions](https://azure.microsoft.com/services/functions/) is the +primary equivalent of AWS Lambda in providing serverless, on-demand code. +However, Lambda functionality also overlaps with other Azure services: + +- [WebJobs](https://azure.microsoft.com/documentation/articles/web-sites-create-web-jobs/) - allow you to create scheduled or continuously running background tasks. + +- [Logic Apps](https://azure.microsoft.com/services/logic-apps/) - provides communications, integration, and business rule management services. + +#### Autoscaling, Azure VM scaling, and Azure App Service Autoscale + +Autoscaling in Azure is handled by two services: + +- [VM scale + sets](https://azure.microsoft.com/documentation/articles/virtual-machine-scale-sets-overview/) - allow you to deploy and manage an identical set of VMs. The number of + instances can autoscale based on performance needs. + +- [App Service + Autoscale](https://azure.microsoft.com/documentation/articles/web-sites-scale/) - provides the capability to autoscale Azure App Service solutions. + + +#### Container Service +The [Azure Container Service](https://docs.microsoft.com/azure/container-service/container-service-intro) supports Docker containers managed through Docker Swarm, Kubernetes, or DC/OS. + +#### Other compute services + + +Azure offers several compute services that do not have direct equivalents in +AWS: + +- [Azure + Batch](https://azure.microsoft.com/documentation/articles/batch-technical-overview/) - allows you to manage compute-intensive work across a scalable collection + of virtual machines. + +- [Service + Fabric](https://azure.microsoft.com/documentation/articles/service-fabric-overview/) - platform for developing and hosting scalable + [microservice](https://azure.microsoft.com/documentation/articles/service-fabric-overview-microservices/) + solutions. + +#### See also + +- [Create a Linux VM on Azure using the + Portal](https://azure.microsoft.com/documentation/articles/virtual-machines-linux-quick-create-portal/) + +- [Azure Reference Architecture: Running a Linux VM on + Azure](https://azure.microsoft.com/documentation/articles/guidance-compute-single-vm-linux/) + +- [Get started with Node.js web apps in Azure App + Service](https://azure.microsoft.com/documentation/articles/app-service-web-nodejs-get-started/) + +- [Azure Reference Architecture: Basic web + application](https://azure.microsoft.com/documentation/articles/guidance-web-apps-basic/) + +- [Create your first Azure + Function](https://azure.microsoft.com/documentation/articles/functions-create-first-azure-function/) + +### Storage + +#### S3/EBS/EFS and Azure Storage + +In the AWS platform, cloud storage is primarily broken down into three services: + +- **Simple Storage Service (S3)** - basic object storage. Makes data available + through an Internet accessible API. + +- **Elastic Block Storage (EBS)** - block level storage, intended for access + by a single VM. + +- **Elastic File System (EFS)** - file storage meant for use as shared storage + for up to thousands of EC2 instances. + +In Azure Storage, subscription-bound [storage +accounts](https://azure.microsoft.com/documentation/articles/storage-create-storage-account/) +allow you to create and manage the following storage services: + +- [Blob + storage](https://azure.microsoft.com/documentation/articles/storage-create-storage-account/) - stores any type of text or binary data, such as a document, media file, or + application installer. You can set Blob storage for private access or share + contents publicly to the Internet. Blob storage serves the same purpose as + both AWS S3 and EBS. + +- [Table + storage](https://azure.microsoft.com/documentation/articles/storage-nodejs-how-to-use-table-storage/) - stores structured datasets. Table storage is a NoSQL key-attribute data + store that allows for rapid development and fast access to large quantities + of data. Similar to AWS' SimpleDB and DynamoDB services. + +- [Queue + storage](https://azure.microsoft.com/documentation/articles/storage-nodejs-how-to-use-queues/) - provides messaging for workflow processing and for communication between + components of cloud services. + +- [File + storage](https://azure.microsoft.com/documentation/articles/storage-java-how-to-use-file-storage/) - offers shared storage for legacy applications using the standard server + message block (SMB) protocol. File storage is used in a similar manner to + EFS in the AWS platform. + +#### Glacier and Azure Storage + +Azure Storage does not offer a direct equivalent to AWS' long-term archival +Glacier storage. For data that is infrequently accessed and long-lived Azure +offers the [Azure cool blob storage +tier](https://azure.microsoft.com/documentation/articles/storage-blob-storage-tiers/). +Cool storage provides cheaper, lower performance storage than standard blob +storage and is comparable to AWS' S3 - Infrequent Access. + +#### See also + +- [Microsoft Azure Storage Performance and Scalability + Checklist](https://azure.microsoft.com/documentation/articles/storage-performance-checklist/) + +- [Azure Storage security + guide](https://azure.microsoft.com/documentation/articles/storage-security-guide/) + +- [Patterns & Practices: Content Delivery Network (CDN) + guidance](https://azure.microsoft.com/documentation/articles/best-practices-cdn/) + +### Networking + +#### Elastic Load Balancing, Azure Load Balancer, and Azure Application Gateway + +The Azure equivalents of the two Elastic Load Balancing services are: + +- [Load + Balancer](https://azure.microsoft.com/documentation/articles/load-balancer-overview/) - provides the same capabilities as the AWS Classic Load Balancer, allowing + you to distribute traffic for multiple VMs at the network level. It also + provides failover capability. + +- [Application + Gateway](https://azure.microsoft.com/documentation/articles/application-gateway-introduction/) - offers application-level rule-based routing comparable to the AWS + Application Load Balancer. + +#### Route 53, Azure DNS, and Azure Traffic Manager + +In AWS, Route 53 provides both DNS name management and DNS-level traffic routing +and failover services. In Azure this is handled through two services: + +- [Azure DNS](https://azure.microsoft.com/documentation/services/dns/) - provides domain and DNS management. + +- [Traffic + Manager](https://azure.microsoft.com/documentation/articles/traffic-manager-overview/) - provides DNS level traffic routing, load balancing, and failover + capabilities. + +#### Direct Connect and Azure ExpressRoute + +Azure provides similar site-to-site dedicated connections through its +[ExpressRoute](https://azure.microsoft.com/documentation/services/expressroute/) +service. ExpressRoute allows you to connect your local network directly to Azure +resources using a dedicated private network connection. Azure also offers more +conventional [site-to-site VPN +connections](https://azure.microsoft.com/documentation/articles/vpn-gateway-howto-site-to-site-resource-manager-portal/) +at a lower cost. + +#### See also + +- [Create a virtual network using the Azure + portal](https://azure.microsoft.com/documentation/articles/virtual-networks-create-vnet-arm-pportal/) + +- [Plan and design Azure Virtual + Networks](https://azure.microsoft.com/documentation/articles/virtual-network-vnet-plan-design-arm/) + +- [Azure Network Security Best + Practices](https://azure.microsoft.com/documentation/articles/azure-security-network-security-best-practices/) + +### Database services + +#### RDS and Azure SQL Database service + +AWS and Azure have different approaches on relational database offerings in the +cloud. AWS' Relational Database Service (RDS) supports creating instances using +several different database engines, such as Oracle and MySQL. + +[SQL +Database](https://azure.microsoft.com/documentation/articles/sql-database-technical-overview/) +is Azure's cloud database offering. It provides highly scalable relational data +storage, through a managed service. SQL Database uses its own engine, and does +not support the creation of other database types. Other database engines such as +[SQL +Server](https://azure.microsoft.com/services/virtual-machines/sql-server/), +[Oracle](https://azure.microsoft.com/campaigns/oracle/), or +[MySQL](https://azure.microsoft.com/documentation/articles/virtual-machines-windows-classic-mysql-2008r2/) +can be deployed using Azure VM Instances. + +Costs for AWS RDS are determined by the amount of hardware resources that your +instance uses, like CPU, RAM, storage, and network bandwidth. In the SQL +Database service, cost depends on your database size, concurrent connections, +and throughput levels. + +#### See also + +- [Azure SQL Database + Tutorials](https://azure.microsoft.com/documentation/articles/sql-database-explore-tutorials/) + +- [Configure geo-replication for Azure SQL Database with the Azure + portal](https://azure.microsoft.com/documentation/articles/sql-database-geo-replication-portal/) + +- [Introduction to DocumentDB: A NoSQL JSON + Database](https://azure.microsoft.com/documentation/articles/documentdb-introduction/) + +- [How to use Azure Table storage from + Node.js](https://azure.microsoft.com/documentation/articles/storage-nodejs-how-to-use-table-storage/) + +### Security and identity + +#### Directory service and Azure Active Directory + +Azure splits up directory services into the following offerings: + +- [Azure Active + Directory](https://azure.microsoft.com/documentation/articles/active-directory-whatis/) - cloud based directory and identity management service. + +- [Azure Active Directory + B2B](https://azure.microsoft.com/documentation/articles/active-directory-b2b-collaboration-overview/) - enables access to your corporate applications from partner-managed + identities. + +- [Azure Active Directory + B2C](https://azure.microsoft.com/documentation/articles/active-directory-b2c-overview/) - service offering support for single sign-on and user management for + consumer facing applications. + +- [Azure Active Directory Domain + Services](https://azure.microsoft.com/documentation/articles/active-directory-ds-overview/) - hosted domain controller service, allowing Active Directory compatible + domain join and user management functionality. + +#### Web application firewall + +In addition to the [Application Gateway Web Application +Firewall](https://azure.microsoft.com/documentation/articles/application-gateway-webapplicationfirewall-overview/), +you can also [use web application +firewalls](https://azure.microsoft.com/documentation/articles/application-gateway-webapplicationfirewall-overview/) +from third-party vendors like [Barracuda +Networks](https://azure.microsoft.com/marketplace/partners/barracudanetworks/waf/). + +#### See also + +- [Getting started with Microsoft Azure + security](https://azure.microsoft.com/documentation/articles/azure-security-getting-started/) + +- [Azure Identity Management and access control security best + practices](https://azure.microsoft.com/documentation/articles/azure-security-identity-management-best-practices/) + +### Application and messaging services + +#### Simple Email Service + +AWS provides the Simple Email Service (SES) for sending notification, +transactional, or marketing emails. In Azure, third-party solutions like +[Sendgrid](https://sendgrid.com/partners/azure/) provide email services. + +#### Simple Queueing Service + +AWS Simple Queueing Service (SQS) provides a messaging system for connecting +applications, services, and devices within the AWS platform. Azure has two +services that provide similar functionality: + +- [Queue + storage](https://azure.microsoft.com/documentation/articles/storage-nodejs-how-to-use-queues/) - a cloud messaging service that allows communication between application + components within the Azure platform. + +- [Service + Bus](https://azure.microsoft.com/en-us/services/service-bus/) - a more robust messaging system for connecting applications, services, and + devices. Using the related [Service Bus + relay](https://docs.microsoft.com/en-us/azure/service-bus-relay/relay-what-is-it), + Service Bus can also connect to remotely hosted applications and services. + +#### Device Farm + +The AWS Device Farm provides cross-device testing services. In Azure, [Xamarin +Test Cloud](https://www.xamarin.com/test-cloud) provides similar cross-device +front-end testing for mobile devices. + +In addition to front-end testing, the [Azure DevTest +Labs](https://azure.microsoft.com/services/devtest-lab/) provides back end +testing resources for Linux and Windows environments. + +#### See also + +- [How to use Queue storage from + Node.js](https://azure.microsoft.com/documentation/articles/storage-nodejs-how-to-use-queues/) + +- [How to use Service Bus + queues](https://azure.microsoft.com/documentation/articles/service-bus-nodejs-how-to-use-queues/) + +### Analytics and big data + +[The Cortana Intelligence +Suite](https://azure.microsoft.com/suites/cortana-intelligence-suite/) is +Azure's package of products and services designed to capture, organize, analyze, +and visualize large amounts of data. The Cortana suite consists of the following +services: + +- [HDInsight](https://azure.microsoft.com/documentation/services/hdinsight/) - managed Apache distribution that includes Hadoop, Spark, Storm, or HBase. + +- [Data + Factory](https://azure.microsoft.com/documentation/services/data-factory/) - provides data orchestration and data pipeline functionality. + +- [SQL Data + Warehouse](https://azure.microsoft.com/documentation/services/sql-data-warehouse/) - large-scale relational data storage. + +- [Data Lake + Store](https://azure.microsoft.com/documentation/services/data-lake-store/) - large-scale storage optimized for big data analytics workloads. + +- [Machine + Learning](https://azure.microsoft.com/documentation/services/machine-learning/) - used to build and apply predictive analytics on data. + +- [Stream + Analytics](https://azure.microsoft.com/documentation/services/stream-analytics/) - real-time data analysis. + +- [Data Lake + Analytics](https://azure.microsoft.com/documentation/articles/data-lake-analytics-overview/) - large-scale analytics service optimized to work with Data Lake Store + +- [PowerBI](https://powerbi.microsoft.com/) - used to power data + visualization. + +#### See also + +- [Cortana Intelligence Gallery](https://gallery.cortanaintelligence.com/) + +- [Understanding Microsoft big data + solutions](https://msdn.microsoft.com/library/dn749804.aspx) + +- [Azure Data Lake & Azure HDInsight + Blog](https://blogs.msdn.microsoft.com/azuredatalake/) + +### Internet of Things + +#### See also + +- [Get started with Azure IoT + Hub](https://azure.microsoft.com/documentation/articles/iot-hub-csharp-csharp-getstarted/) + +- [Comparison of IoT Hub and Event + Hubs](https://azure.microsoft.com/documentation/articles/iot-hub-compare-event-hubs/) + +### Mobile services + +#### Notifications + +Notification Hubs do not support sending SMS or email messages, so third-party +services are needed for those delivery types. + +#### See also + +- [Create an Android + app](https://azure.microsoft.com/documentation/articles/app-service-mobile-android-get-started/) + +- [Authentication and Authorization in Azure Mobile + Apps](https://azure.microsoft.com/documentation/articles/app-service-mobile-auth/) + +- [Sending push notifications with Azure Notification + Hubs](https://azure.microsoft.com/documentation/articles/notification-hubs-android-push-notification-google-fcm-get-started/) + +### Management and monitoring + +#### See also +- [Monitoring and diagnostics + guidance](https://azure.microsoft.com/documentation/articles/best-practices-monitoring/) + +- [Best practices for creating Azure Resource Manager + templates](https://azure.microsoft.com/documentation/articles/resource-manager-template-best-practices/) + +- [Azure Resource Manager Quickstart + templates](https://azure.microsoft.com/documentation/templates/) + + +## Next steps + +- [Complete AWS and Azure service comparison + matrix](https://aka.ms/azure4aws-services) + +- [Interactive Azure Platform Big + Picture](http://azureplatform.azurewebsites.net/) + +- [Get started with Azure](https://azure.microsoft.com/get-started/) + +- [Azure solution + architectures](https://azure.microsoft.com/solutions/architecture/) + +- [Azure Reference + Architectures](https://azure.microsoft.com/documentation/articles/guidance-architecture/) + +- [Patterns & Practices: Azure + Guidance](https://azure.microsoft.com/documentation/articles/guidance/) + +- [Free Online Course: Microsoft Azure for AWS + Experts](http://aka.ms/azureforaws) diff --git a/docs/aws-professional/services.md b/docs/aws-professional/services.md new file mode 100644 index 00000000000..b4741e6f3fb --- /dev/null +++ b/docs/aws-professional/services.md @@ -0,0 +1,160 @@ +--- +title: Azure and AWS services compared - multicloud +description: See how Microsoft Azure cloud services compare to Amazon Web Services (AWS) for multicloud solutions or migration to Azure. Learn the IT capabilities of each. +services: '' +documentationcenter: '' +keywords: cloud services comparison, cloud services compared, multicloud, compare azure aws, compare azure and aws, compare aws and azure, IT capabilities +author: lbrader +manager: christb + +pnp.series.title: Azure for AWS Professionals + +ms.assetid: 02488dea-711a-4618-8c51-667286008989 +ms.service: multiple +ms.workload: na +ms.tgt_pltfrm: na +ms.devlang: na +ms.topic: article +ms.date: 12/29/2016 +ms.author: lbrader + +--- +# Services comparison +[!INCLUDE [header](../_includes/header.md)] + +This article helps you understand how Microsoft Azure services compare to Amazon Web Services (AWS). Whether you are planning a multicloud solution with Azure and AWS, or migrating to Azure, you can compare the IT capabilities of Azure and AWS services in all categories. + +In the tables following, there are multiple Azure services listed for some AWS services. The Azure services are similar to one another, but depth and breadth of capabilities vary. + +## Azure and AWS for multicloud solutions + +As the leading public cloud platforms, Microsoft Azure and Amazon Web Services (AWS) each offer businesses a broad and deep set of capabilities with global coverage. Yet many organizations choose to use both platforms together for greater choice and flexibility, as well as to spread their risk and dependencies with a multicloud approach. Consulting companies and software vendors might also build on and use both Azure and AWS, as these platforms represent most of the cloud market demand. + +For an overview of Azure for AWS users, see [Introduction to Azure for AWS experts](index.md). + + +## Compute services + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Virtual servers|EC2|[Virtual Machines](https://azure.microsoft.com/services/virtual-machines/)|Virtual servers allow users to deploy, manage, and maintain OS and server software. Instance types provide combinations of CPU/RAM. Users pay for what they use with the flexibility to change sizes.| +|Container management|EC2 Container Service|[Container Service](http://azure.microsoft.com/services/container-service/)|A container management service that supports Docker containers and allows users to run applications on managed instance clusters. It eliminates the need to operate cluster management software or design fault-tolerant cluster architectures.| +|Web application|Elastic Beanstalk|- [Web Apps](https://azure.microsoft.com/services/app-service/web/)
    - [Cloud Services](https://azure.microsoft.com/services/cloud-services/)|A fully managed web infrastructure that provides the underlying web server instances and surrounding security, management, resilience, and shared storage capabilities.| +|Auto scale|Auto Scaling|- [VM Scale Sets](https://azure.microsoft.com/services/virtual-machine-scale-sets/)
    - [App Service AutoScaling](https://docs.microsoft.com/azure/app-service/app-service-environment-auto-scale)|Lets you automatically change the number of instances providing a particular compute workload. You set defined metric and thresholds that determine if the platform adds or removes instances.| +|Virtual server disk infrastructure|Elastic Block Store (EBS)|- [Page Blobs](https://docs.microsoft.com/azure/storage/storage-introduction#blob-storage)
    - [Premium Storage](https://docs.microsoft.com/azure/storage/storage-premium-storage)|Provides persistent, durable storage volumes for use with virtual machines, and offers the option to select different underlying physical storage types and performance characteristics.| +|Backend process logic|Lambda|- [Functions](https://azure.microsoft.com/services/functions/)
    - [Web Jobs](https://docs.microsoft.com/azure/app-service-web/web-sites-create-web-jobs)
    - [Logic Apps](https://azure.microsoft.com/services/logic-apps/) |Used to integrate systems and run backend processes in response to events or schedules without provisioning or managing servers.| +|Job-based applications|** **|[Batch](https://azure.microsoft.com/services/batch/)|Orchestration of the tasks and interactions between compute resources that are needed when you require processing across hundreds or thousands of compute nodes.| +|Microservice-based applications|** **|[Service Fabric](https://azure.microsoft.com/services/service-fabric/)|A compute service that orchestrates and manages the execution, lifetime, and resilience of complex, inter-related code components that can be either stateless or stateful.| +|API-based application runtime|** **|[API Apps](https://azure.microsoft.com/services/app-service/api/)|Build, manage, and host APIs enabling a variety of languages and SDKs with built-in authentication and analytics.| +|Disaster recovery|** **|[Site recovery](https://azure.microsoft.com/services/site-recovery/)|Automates protection and replication of virtual machines. Offers health monitoring, recovery plans, and recovery plan testing.| +|Predefined templates|AWS Quick Start|[Azure Quickstart Templates](https://azure.microsoft.com/resources/templates/)|Community-led templates for creating and deploying virtual machine-based solutions.| +|Marketplace|AWS Marketplace|[Azure Marketplace](https://azure.microsoft.com/marketplace/)|Easy-to-deploy and automatically configured third-party applications, including single virtual machine or multiple virtual machine solutions.| + +## Storage and content delivery services + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Object storage|S3|[Blob Storage](https://azure.microsoft.com/services/storage/blobs/)|Object storage service, for use cases including cloud applications, content distribution, backup, archiving, disaster recovery, and big data analytics.| +|Shared file storage|Elastic File System (Preview)|[File Storage](https://azure.microsoft.com/services/storage/files/)|Provides a simple interface to create and configure file systems quickly, and share common files. It’s shared file storage without the need for a supporting virtual machine, and can be used with traditional protocols that access files over a network.| +|Archiving and backup|N/A (software)
    Glacier and S3 (storage)|- [Backup (software)](https://azure.microsoft.com/services/backup/)
    - [Blob Storage (storage)](https://azure.microsoft.com/services/storage/blobs/)|Backup and archival solutions allow files and folders to be backed up and recovered from the cloud, and provides off-site protection against data loss. There are two components of backup software service that orchestrates backup/retrieval and the underlying backup storage infrastructure.| +|Hybrid storage|Storage Gateway|[StorSimple](https://azure.microsoft.com/services/storsimple/)|Integrates on-premises IT environments with cloud storage. Automates data management and storage, plus supports in disaster recovery.| +|Data transport|Import/Export Snowball|[Import/Export](https://azure.microsoft.com/pricing/details/storage-import-export/)|A data transport solution that uses secure disks and appliances to transfer large amounts of data. Also offers data protection during transit.| +|Content delivery|CloudFront|[Content Delivery Network](https://azure.microsoft.com/services/cdn/)|A global content delivery network that delivers audio, video, applications, images, and other files.| + +## Databases + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Relational database|RDS|[SQL Database](https://azure.microsoft.com/services/sql-database/)|Relational database-as-a-service (DBaaS) where the database resilience, scale, and maintenance are primarily handled by the platform.| +|NoSQL database|DynamoDB|[DocumentDB](https://azure.microsoft.com/services/documentdb/)|A globally distributed NoSQL database service that supports elastically scaling throughput and storage across multiple regions, supports multiple well-defined consistency models, and is capable of automatically indexing data to serve SQL and MongoDB APIs.| +|Data warehouse|Redshift|[SQL Data Warehouse](https://azure.microsoft.com/services/sql-data-warehouse/) |A fully managed data warehouse that analyzes data using business intelligence tools. It can transact SQL queries across relational and non-relational data.| +|Table storage|DynamoDB
    SimpleDB|[Table Storage](https://azure.microsoft.com/services/storage/tables/)|A non-relational data store for semi-structured data. Developers store and query data items via web services requests.| +|Caching|ElastiCache|[Azure Redis Cache](https://azure.microsoft.com/services/cache/)|An in-memory based, distributed caching service that provides a high-performance store typically used to offload non-transactional work from a database.| +|Database migration|Database Migration Service (Preview)|[SQL Database Migration Wizard](https://sqlazuremw.codeplex.com/)|Typically is focused on the migration of database schema and data from one database format to a specific database technology in the cloud.| + +## Networking services + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Networking|Virtual Private Cloud|[Virtual Network](https://azure.microsoft.com/services/virtual-network/)|Provides an isolated, private environment in the cloud. Users have control over their virtual networking environment, including selection of their own IP address range, creation of subnets, and configuration of route tables and network gateways.| +|Domain name system (DNS)|Route 53|- [DNS](https://azure.microsoft.com/services/dns/)
    - [Traffic Manager](https://azure.microsoft.com/services/traffic-manager/)|A service that hosts domain names, plus routes users to Internet applications, connects user requests to datacenters, manages traffic to apps, and improves app availability with automatic failover.| +|Dedicated network|Direct Connect|[ExpressRoute](https://azure.microsoft.com/services/expressroute/)|Establishes a dedicated, private network connection from a location to the cloud provider (not over the Internet).| +|Load balancing|Elastic Load Balancing|- [Load Balancer](https://azure.microsoft.com/services/load-balancer/)
    - [Application Gateway](https://azure.microsoft.com/services/application-gateway/)|Automatically distributes incoming application traffic to add scale, handle failover, and route to a collection of resources.| + +## Developer tools + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +| Development tools |AWS Toolkit for Microsoft Visual Studio
    AWS Toolkit for Eclipse | [Visual Studio](https://www.visualstudio.com/vs/azure-tools/) |Development tools to help build, manage, and deploy cloud applications. | +| Dev-Test | Development and Test | [Development and Test](https://azure.microsoft.com/solutions/dev-test/)| Creates consistent development and test environments through a scalable, on-demand infrastructure. | + +## Management tools + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Deployment orchestration|OpsWorks
    CloudFormation|- [Resource Manager](https://docs.microsoft.com/azure/azure-resource-manager/resource-group-overview)
    - [Automation](https://azure.microsoft.com/services/automation/)
    - [VM extensions](https://docs.microsoft.com/azure/virtual-machines/virtual-machines-windows-extensions-features)|Configures and operates applications of all shapes and sizes, and provides templates to create and manage a collection of resources.| +|Management and monitoring|CloudWatch
    CloudTrail|- [Log Analytics](https://azure.microsoft.com/services/log-analytics/)
    - [Azure portal](https://azure.microsoft.com/features/azure-portal/)
    - [Application Insights](https://azure.microsoft.com/services/application-insights/)|Management and monitoring services for cloud resources and applications to collect, track, store, analyze, and deliver metrics and log files.| +|Optimization|Trusted Advisor|[Advisor (preview)](https://azure.microsoft.com/services/advisor)|Provides analysis of cloud resource configuration and security so subscribers can ensure they’re using best practices and optimum configurations.| +|Job scheduling|** **|[Scheduler](https://azure.microsoft.com/services/scheduler/)|Runs jobs on simple or complex recurring schedules—now, later, or recurring.| +|Catalog service|Service Catalog|** **|Creates and manages catalogs of approved IT services so users can quickly find and deploy them.| +|Administration|Config|[Azure portal (audit logs)](https://docs.microsoft.com/azure/azure-resource-manager/resource-group-audit)|Provides resource inventory, configuration history, and configuration change notifications for security and governance.| +|Programmatic access|Command Line Interface|- [Azure Command Line Interface (CLI)](https://docs.microsoft.com/azure/xplat-cli-install)
    - [Azure PowerShell](https://docs.microsoft.com/azure/powershell-install-configure)|Built on top of the native REST API across all cloud services, various programming language-specific wrappers provide easier ways to create solutions.| + +## Security and identity services + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Authentication and authorization|Identity and Access Management
    Multi-Factor Authentication|- [Azure AD/Role-based access control](https://docs.microsoft.com/azure/active-directory/role-based-access-control-what-is)
    - [Multi-Factor Authentication](https://azure.microsoft.com/services/multi-factor-authentication/)|Lets users securely control access to services and resources while offering data security and protection. Create and manage users and groups, and use permissions to allow and deny access to resources.| +|Encryption|Key Management Service
    CloudHSM|[Key Vault](https://azure.microsoft.com/services/key-vault/)|Creates, controls, and protects the encryption keys used to encrypt data. HSM provides hardware-based key storage.| +|Firewall|Web Application Firewall|[Web Application Firewall (preview)](https://docs.microsoft.com/azure/application-gateway/application-gateway-webapplicationfirewall-overview)|A firewall that protects web applications from common web exploits. Users can define customizable web security rules.| +|Security|Inspector (Preview)|[Security Center](https://azure.microsoft.com/services/security-center/) |An automated security assessment service that improves the security and compliance of applications. Automatically assess applications for vulnerabilities or deviations from best practices.| +|Directory|Directory Service|- [Azure Active Directory](https://azure.microsoft.com/services/active-directory/)
    - [Azure Active Directory B2C](https://azure.microsoft.com/services/active-directory-b2c/)
    - [Azure Active Directory Domain Services](https://azure.microsoft.com/services/active-directory-ds/)|Typically provides user/group properties that can be queried and used in applications. Also can provide capabilities to integrate to on-premises Active Directory services for single sign-on scenarios and SaaS management.| + +## Analytics services + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Big data processing|Elastic MapReduce (EMR)|[HDInsight](https://azure.microsoft.com/services/hdinsight/)|Supports technologies that break up large data processing tasks into multiple jobs, and then combine the results together to enable massive parallelism.| +|Data orchestration|Data Pipeline|[Data Factory](https://azure.microsoft.com/services/data-factory/)|Processes and moves data between different compute and storage services, as well as on-premises data sources at specified intervals. Users can create, schedule, orchestrate, and manage data pipelines.| +|Analytics|Kinesis Analytics (Preview)|- [Stream Analytics](https://azure.microsoft.com/services/stream-analytics/)
    - [Data Lake Analytics](https://azure.microsoft.com/services/data-lake-analytics/)
    - [Data Lake Store](https://azure.microsoft.com/services/data-lake-store/) |Storage and analysis platforms that creates insights from large quantities of data, or data that originates from many sources.| +|Visualization|QuickSight (Preview)|[PowerBI](https://powerbi.microsoft.com/)|Business intelligence tools that build visualizations, perform ad-hoc analysis, and develop business insights from data.| +|Machine learning|Machine Learning|[Machine Learning](https://azure.microsoft.com/services/machine-learning/)|Produces an end-to-end workflow to create, process, refine, and publish predictive models that can be used to understand what might happen from complex data sets.| +|Search|Elasticsearch Service|[Search](https://azure.microsoft.com/services/search/)|Delivers full-text search and related search analytics and capabilities.| +|Data discovery|** **|[Data Catalog](https://azure.microsoft.com/services/data-catalog/) |Provides the ability to better register, enrich, discover, understand, and consume data sources.| + +## Mobile services + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Pro app development|Mobile Hub (Beta)
    Cognito|[Mobile Apps](https://azure.microsoft.com/services/app-service/mobile/)|Backend mobile services for rapid development of mobile solutions, plus provide identity management, data synchronization, and storage and notifications across devices.| +|High-level app development|** **|[PowerApps](https://powerapps.microsoft.com/)|Model-driven application development for business applications with SaaS integration.| +|Analytics|Mobile Analytics|[Mobile Engagement](https://azure.microsoft.com/services/mobile-engagement/)|Provides real-time analytics from mobile apps data, highlights app users’ behavior, measures app usage, and tracks key trends.| +|Notification|Simple Notification Service|[Notification Hubs](https://azure.microsoft.com/services/notification-hubs/)|A push notification service that delivers messages instantly to applications or users. Messages can be sent to individual devices or can be broadcasted.| + +## Application services + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Email|Simple Email Service | |Lets users send transactional email, marketing messages, or any other type of content to customers.| +|Messaging|Simple Queue Service|- [Queue Storage](https://azure.microsoft.com/services/storage/queues/)
    - [Service Bus queues](https://docs.microsoft.com/azure/service-bus-messaging/service-bus-dotnet-get-started-with-queues)
    - [Service Bus topics](https://docs.microsoft.com/azure/service-bus-messaging/service-bus-dotnet-how-to-use-topics-subscriptions)
    - [Service Bus relay](https://docs.microsoft.com/azure/service-bus-relay/relay-what-is-it)|Stores large numbers of messages that can be accessed from anywhere through authenticated calls using HTTP or HTTPS. A queue can contain millions of messages, up to the total capacity limit of a storage account, and may also support more complex topologies such as publish/subscribe.| +|Workflow|Simple Workflow Service|[Logic Apps](https://azure.microsoft.com/services/logic-apps/)|A state tracker and task coordinator service that allows developers to build, run, and scale background activities using a visual processes flow creation.| +|App testing|Device Farm (Front End)|- [Xamarin Test Cloud (Front End)](https://www.xamarin.com/test-cloud)
    - [Azure DevTest Labs (Back End)](https://azure.microsoft.com/services/devtest-lab/)|A range of services geared toward the orchestration of dev/test backend server and service application infrastructure, as well as front-end client device and software testing and simulation.| +|API management|API Gateway|[API Management](https://azure.microsoft.com/services/api-management/)|Allows developers to create, publish, maintain, monitor, and secure APIs. Handles processing concurrent API calls, including traffic management, authorization, access control, monitoring, and API version management.| +|Application streaming|AppStream|[RemoteApp](https://www.remoteapp.windowsazure.com/Default.aspx)|Streams and delivers existing applications from the cloud to reach more users on more devices—without any code modifications.| +|Search|CloudSearch|[Search](https://azure.microsoft.com/services/search/)|Sets up, manages, and scales a search solution for websites and applications.| +|Media transcoding|Elastic Transcoder|[Encoding](https://azure.microsoft.com/services/media-services/encoding/)|A media transcoding service in the cloud that transcodes media files from their source format into versions that play back on devices such as smartphones, tablets, and PCs.| +|Streaming|** **|[Live and on-demand streaming](https://azure.microsoft.com/services/media-services/live-on-demand/)|Delivers content to virtually any device. Offers scalable streaming.| +|Others|** **|- [Media Player](https://azure.microsoft.com/services/media-services/media-player/)
    - [Media Indexer](https://azure.microsoft.com/services/media-services/media-indexer/)
    - [Content Protection](https://azure.microsoft.com/services/media-services/content-protection/)|Additional services related to the playing, protection, and analysis of the content within the media service.| + +## Enterprise applications + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Productivity software|WorkSpaces
    WorkMail
    WorkDocs|[Office 365](https://products.office.com/)|Provides communication, collaboration, and document management services in the cloud.| + +## Internet of things (IoT) services + +|Subcategory|AWS Service|Azure Service|Description| +|--- |--- |--- |--- | +|Streaming data|Kinesis Firehose
    Kinesis Streams|[Event Hubs](https://azure.microsoft.com/services/event-hubs/)|Services that allow the mass ingestion of small data inputs, typically from devices and sensors, to process and route the data.| +|Internet of Things|IoT (Preview)|[IoT Hub](https://azure.microsoft.com/services/iot-hub/)|Lets connected devices interact with cloud applications and other devices to capture and analyze real-time data.| diff --git a/docs/aws-professional/toc.md b/docs/aws-professional/toc.md new file mode 100644 index 00000000000..884ff7a9b20 --- /dev/null +++ b/docs/aws-professional/toc.md @@ -0,0 +1,3 @@ +# Azure for AWS Professionals +## [Overview](./index.md) +## [Services comparison](./services.md) \ No newline at end of file diff --git a/docs/best-practices/api-design.md b/docs/best-practices/api-design.md new file mode 100644 index 00000000000..c1e72a10690 --- /dev/null +++ b/docs/best-practices/api-design.md @@ -0,0 +1,566 @@ +--- +title: API design guidance +description: Guidance upon how to create a well designed API. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: 19514a32-923a-488c-85f5-b5beec2576de +ms.service: best-practice +ms.devlang: rest-api +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/13/2016 +ms.author: masashin + +--- +# API design +[!INCLUDE [header](../_includes/header.md)] + +Many modern web-based solutions make the use of web services, hosted by web servers, to provide functionality for remote client applications. The operations that a web service exposes constitute a web API. A well-designed web API should aim to support: + +* **Platform independence**. Client applications should be able to utilize the API that the web service provides without requiring how the data or operations that API exposes are physically implemented. This requires that the API abides by common standards that enable a client application and web service to agree on which data formats to use, and the structure of the data that is exchanged between client applications and the web service. +* **Service evolution**. The web service should be able to evolve and add (or remove) functionality independently from client applications. Existing client applications should be able to continue to operate unmodified as the features provided by the web service change. All functionality should also be discoverable, so that client applications can fully utilize it. + +The purpose of this guidance is to describe the issues that you should consider when designing a web API. + +## Introduction to Representational State Transfer (REST) +In his dissertation in 2000, Roy Fielding proposed an alternative architectural approach to structuring the operations exposed by web services; REST. REST is an architectural style for building distributed systems based on hypermedia. A primary advantage of the REST model is that it is based on open standards and does not bind the implementation of the model or the client applications that access it to any specific implementation. For example, a REST web service could be implemented by using the Microsoft ASP.NET Web API, and client applications could be developed by using any language and toolset that can generate HTTP requests and parse HTTP responses. + +> [!NOTE] +> REST is actually independent of any underlying protocol and is not necessarily tied to HTTP. However, most common implementations of systems that are based on REST utilize HTTP as the application protocol for sending and receiving requests. This document focuses on mapping REST principles to systems designed to operate using HTTP. +> +> + +The REST model uses a navigational scheme to represent objects and services over a network (referred to as *resources*). Many systems that implement REST typically use the HTTP protocol to transmit requests to access these resources. In these systems, a client application submits a request in the form of a URI that identifies a resource, and an HTTP method (the most common being GET, POST, PUT, or DELETE) that indicates the operation to be performed on that resource. The body of the HTTP request contains the data required to perform the operation. The important point to understand is that REST defines a stateless request model. HTTP requests should be independent and may occur in any order, so attempting to retain transient state information between requests is not feasible. The only place where information is stored is in the resources themselves, and each request should be an atomic operation. Effectively, a REST model implements a finite state machine where a request transitions a resource from one well-defined non-transient state to another. + +> [!NOTE] +> The stateless nature of individual requests in the REST model enables a system constructed by following these principles to be highly scalable. There is no need to retain any affinity between a client application making a series of requests and the specific web servers handling those requests. +> +> + +Another crucial point in implementing an effective REST model is to understand the relationships between the various resources to which the model provides access. These resources are typically organized as collections and relationships. For example, suppose that a quick analysis of an ecommerce system shows that there are two collections in which client applications are likely to be interested: orders and customers. Each order and customer should have its own unique key for identification purposes. The URI to access the collection of orders could be something as simple as */orders*, and similarly the URI for retrieving all customers could be */customers*. Issuing an HTTP GET request to the */orders* URI should return a list representing all orders in the collection encoded as an HTTP response: + +```HTTP +GET http://adventure-works.com/orders HTTP/1.1 +... +``` + +The response shown below encodes the orders as a JSON list structure: + +```HTTP +HTTP/1.1 200 OK +... +Date: Fri, 22 Aug 2014 08:49:02 GMT +Content-Length: ... +[{"orderId":1,"orderValue":99.90,"productId":1,"quantity":1},{"orderId":2,"orderValue":10.00,"productId":4,"quantity":2},{"orderId":3,"orderValue":16.60,"productId":2,"quantity":4},{"orderId":4,"orderValue":25.90,"productId":3,"quantity":1},{"orderId":5,"orderValue":99.90,"productId":1,"quantity":1}] +``` +To fetch an individual order requires specifying the identifier for the order from the *orders* resource, such as */orders/2*: + +```HTTP +GET http://adventure-works.com/orders/2 HTTP/1.1 +... +``` + +```HTTP +HTTP/1.1 200 OK +... +Date: Fri, 22 Aug 2014 08:49:02 GMT +Content-Length: ... +{"orderId":2,"orderValue":10.00,"productId":4,"quantity":2} +``` + +> [!NOTE] +> For simplicity, these examples show the information in responses being returned as JSON text data. However, there is no reason why resources should not contain any other type of data supported by HTTP, such as binary or encrypted information; the content-type in the HTTP response should specify the type. Also, a REST model may be able to return the same data in different formats, such as XML or JSON. In this case, the web service should be able to perform content negotiation with the client making the request. The request can include an *Accept* header which specifies the preferred format that the client would like to receive and the web service should attempt to honor this format if at all possible. +> +> + +Notice that the response from a REST request makes use of the standard HTTP status codes. For example, a request that returns valid data should include the HTTP response code 200 (OK), while a request that fails to find or delete a specified resource should return a response that includes the HTTP status code 404 (Not Found). + +## Design and structure of a RESTful web API +The keys to designing a successful web API are simplicity and consistency. A Web API that exhibits these two factors makes it easier to build client applications that need to consume the API. + +A RESTful web API is focused on exposing a set of connected resources, and providing the core operations that enable an application to manipulate these resources and easily navigate between them. For this reason, the URIs that constitute a typical RESTful web API should be oriented towards the data that it exposes, and use the facilities provided by HTTP to operate on this data. This approach requires a different mindset from that typically employed when designing a set of classes in an object-oriented API which tends to be more motivated by the behavior of objects and classes. Additionally, a RESTful web API should be stateless and not depend on operations being invoked in a particular sequence. The following sections summarize the points you should consider when designing a RESTful web API. + +### Organizing the web API around resources +> [!TIP] +> The URIs exposed by a REST web service should be based on nouns (the data to which the web API provides access) and not verbs (what an application can do with the data). +> +> + +Focus on the business entities that the web API exposes. For example, in a web API designed to support the ecommerce system described earlier, the primary entities are customers and orders. Processes such as the act of placing an order can be achieved by providing an HTTP POST operation that takes the order information and adds it to the list of orders for the customer. Internally, this POST operation can perform tasks such as checking stock levels, and billing the customer. The HTTP response can indicate whether the order was placed successfully or not. Also note that a resource does not have to be based on a single physical data item. As an example, an order resource might be implemented internally by using information aggregated from many rows spread across several tables in a relational database but presented to the client as a single entity. + +> [!TIP] +> Avoid designing a REST interface that mirrors or depends on the internal structure of the data that it exposes. REST is about more than implementing simple CRUD (Create, Retrieve, Update, Delete) operations over separate tables in a relational database. The purpose of REST is to map business entities and the operations that an application can perform on these entities to the physical implementation of these entities, but a client should not be exposed to these physical details. +> +> + +Individual business entities rarely exist in isolation (although some singleton objects may exist), but instead tend to be grouped together into collections. In REST terms, each entity and each collection are resources. In a RESTful web API, each collection has its own URI within the web service, and performing an HTTP GET request over a URI for a collection retrieves a list of items in that collection. Each individual item also has its own URI, and an application can submit another HTTP GET request using that URI to retrieve the details of that item. You should organize the URIs for collections and items in a hierarchical manner. In the ecommerce system, the URI */customers* denotes the customer’s collection, and */customers/5* retrieves the details for the single customer with the ID 5 from this collection. This approach helps to keep the web API intuitive. + +> [!TIP] +> Adopt a consistent naming convention in URIs; in general it helps to use plural nouns for URIs that reference collections. +> +> + +You also need to consider the relationships between different types of resources and how you might expose these associations. For example, customers may place zero or more orders. A natural way to represent this relationship would be through a URI such as */customers/5/orders* to find all the orders for customer 5. You might also consider representing the association from an order back to a specific customer through a URI such as */orders/99/customer* to find the customer for order 99, but extending this model too far can become cumbersome to implement. A better solution is to provide navigable links to associated resources, such as the customer, in the body of the HTTP response message returned when the order is queried. This mechanism is described in more detail in the section Using the HATEOAS Approach to Enable Navigation To Related Resources later in this guidance. + +In more complex systems there may be many more types of entity, and it can be tempting to provide URIs that enable a client application to navigate through several levels of relationships, such as */customers/1/orders/99/products* to obtain the list of products in order 99 placed by customer 1. However, this level of complexity can be difficult to maintain and is inflexible if the relationships between resources change in the future. Rather, you should seek to keep URIs relatively simple. Bear in mind that once an application has a reference to a resource, it should be possible to use this reference to find items related to that resource. The preceding query can be replaced with the URI */customers/1/orders* to find all the orders for customer 1, and then query the URI */orders/99/products* to find the products in this order (assuming order 99 was placed by customer 1). + +> [!TIP] +> Avoid requiring resource URIs more complex than *collection/item/collection*. +> +> + +Another point to consider is that all web requests impose a load on the web server, and the greater the number of requests the bigger the load. You should attempt to define your resources to avoid “chatty” web APIs that expose a large number of small resources. Such an API may require a client application to submit multiple requests to find all the data that it requires. It may be beneficial to denormalize data and combine related information together into bigger resources that can be retrieved by issuing a single request. However, you need to balance this approach against the overhead of fetching data that might not be frequently required by the client. Retrieving large objects can increase the latency of a request and incur additional bandwidth costs for little advantage if the additional data is not often used. + +Avoid introducing dependencies between the web API to the structure, type, or location of the underlying data sources. For example, if your data is located in a relational database, the web API does not need to expose each table as a collection of resources. Think of the web API as an abstraction of the database, and if necessary introduce a mapping layer between the database and the web API. In this way, if the design or implementation of the database changes (for example, you move from a relational database containing a collection of normalized tables to a denormalized NoSQL storage system such as a document database) client applications are insulated from these changes. + +> [!TIP] +> The source of the data that underpins a web API does not have to be a data store; it could be another service or line-of-business application or even a legacy application running on-premises within an organization. +> +> + +Finally, it might not be possible to map every operation implemented by a web API to a specific resource. You can handle such *non-resource* scenarios through HTTP GET requests that invoke a piece of functionality and return the results as an HTTP response message. A web API that implements simple calculator-style operations such as add and subtract could provide URIs that expose these operations as pseudo resources and utilize the query string to specify the parameters required. For example a GET request to the URI */add?operand1=99&operand2=1* could return a response message with the body containing the value 100, and GET request to the URI */subtract?operand1=50&operand2=20* could return a response message with the body containing the value 30. However, only use these forms of URIs sparingly. + +### Defining operations in terms of HTTP methods +The HTTP protocol defines a number of methods that assign semantic meaning to a request. The common HTTP methods used by most RESTful web APIs are: + +* **GET**, to retrieve a copy of the resource at the specified URI. The body of the response message contains the details of the requested resource. +* **POST**, to create a new resource at the specified URI. The body of the request message provides the details of the new resource. Note that POST can also be used to trigger operations that don't actually create resources. +* **PUT**, to replace or update the resource at the specified URI. The body of the request message specifies the resource to be modified and the values to be applied. +* **DELETE**, to remove the resource at the specified URI. + +> [!NOTE] +> The HTTP protocol also defines other less commonly-used methods, such as PATCH which is used to request selective updates to a resource, HEAD which is used to request a description of a resource, OPTIONS which enables a client information to obtain information about the communication options supported by the server, and TRACE which allows a client to request information that it can use for testing and diagnostics purposes. +> +> + +The effect of a specific request should depend on whether the resource to which it is applied is a collection or an individual item. The following table summarizes the common conventions adopted by most RESTful implementations using the ecommerce example. Note that not all of these requests might be implemented; it depends on the specific scenario. + +| **Resource** | **POST** | **GET** | **PUT** | **DELETE** | +| --- | --- | --- | --- | --- | +| /customers |Create a new customer |Retrieve all customers |Bulk update of customers (*if implemented*) |Remove all customers | +| /customers/1 |Error |Retrieve the details for customer 1 |Update the details of customer 1 if it exists, otherwise return an error |Remove customer 1 | +| /customers/1/orders |Create a new order for customer 1 |Retrieve all orders for customer 1 |Bulk update of orders for customer 1 (*if implemented*) |Remove all orders for customer 1(*if implemented*) | + +The purpose of GET and DELETE requests are relatively straightforward, but there is scope for confusion concerning the purpose and effects of POST and PUT requests. + +A POST request should create a new resource with data provided in the body of the request. In the REST model, you frequently apply POST requests to resources that are collections; the new resource is added to the collection. + +> [!NOTE] +> You can also define POST requests that trigger some functionality (and that don't necessarily return data), and these types of request can be applied to collections. For example you could use a POST request to pass a timesheet to a payroll processing service and get the calculated taxes back as a response. +> +> + +A PUT request is intended to modify an existing resource. If the specified resource does not exist, the PUT request could return an error (in some cases, it might actually create the resource). PUT requests are most frequently applied to resources that are individual items (such as a specific customer or order), although they can be applied to collections, although this is less-commonly implemented. Note that PUT requests are idempotent whereas POST requests are not; if an application submits the same PUT request multiple times the results should always be the same (the same resource will be modified with the same values), but if an application repeats the same POST request the result will be the creation of multiple resources. + +> [!NOTE] +> Strictly speaking, an HTTP PUT request replaces an existing resource with the resource specified in the body of the request. If the intention is to modify a selection of properties in a resource but leave other properties unchanged, then this should be implemented by using an HTTP PATCH request. However, many RESTful implementations relax this rule and use PUT for both situations. +> +> + +### Processing HTTP requests +The data included by a client application in many HTTP requests, and the corresponding response messages from the web server, could be presented in a variety of formats (or media types). For example, the data that specifies the details for a customer or order could be provided as XML, JSON, or some other encoded and compressed format. A RESTful web API should support different media types as requested by the client application that submits a request. + +When a client application sends a request that returns data in the body of a message, it can specify the media types it can handle in the Accept header of the request. The following code illustrates an HTTP GET request that retrieves the details of customer 1 and requests the result to be returned as JSON (the client should still examine the media type of the data in the response to verify the format of the data returned): + +```HTTP +GET http://adventure-works.com/orders/2 HTTP/1.1 +... +Accept: application/json +... +``` + +If the web server supports this media type, it can reply with a response that includes Content-Type header that specifies the format of the data in the body of the message: + +> [!NOTE] +> For maximum interoperability, the media types referenced in the Accept and Content-Type headers should be recognized MIME types rather than some custom media type. +> +> + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/json; charset=utf-8 +... +Date: Fri, 22 Aug 2014 09:18:37 GMT +Content-Length: ... +{"orderID":2,"productID":4,"quantity":2,"orderValue":10.00} +``` + +If the web server does not support the requested media type, it can send the data in a different format. IN all cases it must specify the media type (such as *application/json*) in the Content-Type header. It is the responsibility of the client application to parse the response message and interpret the results in the message body appropriately. + +Note that in this example, the web server successfully retrieves the requested data and indicates success by passing back a status code of 200 in the response header. If no matching data is found, it should instead return a status code of 404 (not found) and the body of the response message can contain additional information. The format of this information is specified by the Content-Type header, as shown in the following example: + +```HTTP +GET http://adventure-works.com/orders/222 HTTP/1.1 +... +Accept: application/json +... +``` + +Order 222 does not exist, so the response message looks like this: + +```HTTP +HTTP/1.1 404 Not Found +... +Content-Type: application/json; charset=utf-8 +... +Date: Fri, 22 Aug 2014 09:18:37 GMT +Content-Length: ... +{"message":"No such order"} +``` + +When an application sends an HTTP PUT request to update a resource, it specifies the URI of the resource and provides the data to be modified in the body of the request message. It should also specify the format of this data by using the Content-Type header. A common format used for text-based information is *application/x-www-form-urlencoded*, which comprises a set of name/value pairs separated by the & character. The next example shows an HTTP PUT request that modifies the information in order 1: + +```HTTP +PUT http://adventure-works.com/orders/1 HTTP/1.1 +... +Content-Type: application/x-www-form-urlencoded +... +Date: Fri, 22 Aug 2014 09:18:37 GMT +Content-Length: ... +ProductID=3&Quantity=5&OrderValue=250 +``` + +If the modification is successful, it should ideally respond with an HTTP 204 status code, indicating that the process has been successfully handled, but that the response body contains no further information. The Location header in the response contains the URI of the newly updated resource: + +```HTTP +HTTP/1.1 204 No Content +... +Location: http://adventure-works.com/orders/1 +... +Date: Fri, 22 Aug 2014 09:18:37 GMT +``` + +> [!TIP] +> If the data in an HTTP PUT request message includes date and time information, make sure that your web service accepts dates and times formatted following the ISO 8601 standard. +> +> + +If the resource to be updated does not exist, the web server can respond with a Not Found response as described earlier. Alternatively, if the server actually creates the object itself it could return the status codes HTTP 200 (OK) or HTTP 201 (Created) and the response body could contain the data for the new resource. If the Content-Type header of the request specifies a data format that the web server cannot handle, it should respond with HTTP status code 415 (Unsupported Media Type). + +> [!TIP] +> Consider implementing bulk HTTP PUT operations that can batch updates to multiple resources in a collection. The PUT request should specify the URI of the collection, and the request body should specify the details of the resources to be modified. This approach can help to reduce chattiness and improve performance. +> +> + +The format of an HTTP POST requests that create new resources are similar to those of PUT requests; the message body contains the details of the new resource to be added. However, the URI typically specifies the collection to which the resource should be added. The following example creates a new order and adds it to the orders collection: + +```HTTP +POST http://adventure-works.com/orders HTTP/1.1 +... +Content-Type: application/x-www-form-urlencoded +... +Date: Fri, 22 Aug 2014 09:18:37 GMT +Content-Length: ... +productID=5&quantity=15&orderValue=400 +``` + +If the request is successful, the web server should respond with a message code with HTTP status code 201 (Created). The Location header should contain the URI of the newly created resource, and the body of the response should contain a copy of the new resource; the Content-Type header specifies the format of this data: + +```HTTP +HTTP/1.1 201 Created +... +Content-Type: application/json; charset=utf-8 +Location: http://adventure-works.com/orders/99 +... +Date: Fri, 22 Aug 2014 09:18:37 GMT +Content-Length: ... +{"orderID":99,"productID":5,"quantity":15,"orderValue":400} +``` + +> [!TIP] +> If the data provided by a PUT or POST request is invalid, the web server should respond with a message with HTTP status code 400 (Bad Request). The body of this message can contain additional information about the problem with the request and the formats expected, or it can contain a link to a URL that provides more details. +> +> + +To remove a resource, an HTTP DELETE request simply provides the URI of the resource to be deleted. The following example attempts to remove order 99: + +```HTTP +DELETE http://adventure-works.com/orders/99 HTTP/1.1 +... +``` + +If the delete operation is successful, the web server should respond with HTTP status code 204, indicating that the process has been successfully handled, but that the response body contains no further information (this is the same response returned by a successful PUT operation, but without a Location header as the resource no longer exists.) It is also possible for a DELETE request to return HTTP status code 200 (OK) or 202 (Accepted) if the deletion is performed asynchronously. + +```HTTP +HTTP/1.1 204 No Content +... +Date: Fri, 22 Aug 2014 09:18:37 GMT +``` + +If the resource is not found, the web server should return a 404 (Not Found) message instead. + +> [!TIP] +> If all the resources in a collection need to be deleted, enable an HTTP DELETE request to be specified for the URI of the collection rather than forcing an application to remove each resource in turn from the collection. +> +> + +### Filtering and paginating data +You should endeavor to keep the URIs simple and intuitive. Exposing a collection of resources through a single URI assists in this respect, but it can lead to applications fetching large amounts of data when only a subset of the information is required. Generating a large volume of traffic impacts not only the performance and scalability of the web server but also adversely affect the responsiveness of client applications requesting the data. + +For example, if orders contain the price paid for the order, a client application that needs to retrieve all orders that have a cost over a specific value might need to retrieve all orders from the */orders* URI and then filter these orders locally. Clearly this process is highly inefficient; it wastes network bandwidth and processing power on the server hosting the web API. + +One solution may be to provide a URI scheme such as */orders/ordervalue_greater_than_n* where *n* is the order price, but for all but a limited number of prices such an approach is impractical. Additionally, if you need to query orders based on other criteria, you can end up being faced with providing with a long list of URIs with possibly non-intuitive names. + +A better strategy to filtering data is to provide the filter criteria in the query string that is passed to the web API, such as */orders?ordervaluethreshold=n*. In this example, the corresponding operation in the web API is responsible for parsing and handling the `ordervaluethreshold` parameter in the query string and returning the filtered results in the HTTP response. + +Some simple HTTP GET requests over collection resources could potentially return a large number of items. To combat the possibility of this occurring you should design the web API to limit the amount of data returned by any single request. You can achieve this by supporting query strings that enable the user to specify the maximum number of items to be retrieved (which could itself be subject to an upperbound limit to help prevent Denial of Service attacks), and a starting offset into the collection. For example, the query string in the URI */orders?limit=25&offset=50* should retrieve 25 orders starting with the 50th order found in the orders collection. As with filtering data, the operation that implements the GET request in the web API is responsible for parsing and handling the `limit` and `offset` parameters in the query string. To assist client applications, GET requests that return paginated data should also include some form of metadata that indicate the total number of resources available in the collection. You might also consider other intelligent paging strategies; for more information, see [API Design Notes: Smart Paging](http://bizcoder.com/api-design-notes-smart-paging) + +You can follow a similar strategy for sorting data as it is fetched; you could provide a sort parameter that takes a field name as the value, such as */orders?sort=ProductID*. However, note that this approach can have a deleterious effect on caching (query string parameters form part of the resource identifier used by many cache implementations as the key to cached data). + +You can extend this approach to limit (project) the fields returned if a single resource item contains a large amount of data. For example, you could use a query string parameter that accepts a comma-delimited list of fields, such as */orders?fields=ProductID,Quantity*. + +> [!TIP] +> Give all optional parameters in query strings meaningful defaults. For example, set the `limit` parameter to 10 and the `offset` parameter to 0 if you implement pagination, set the sort parameter to the key of the resource if you implement ordering, and set the `fields` parameter to all fields in the resource if you support projections. +> +> + +### Handling large binary resources +A single resource may contain large binary fields, such as files or images. To overcome the transmission problems caused by unreliable and intermittent connections and to improve response times, consider providing operations that enable such resources to be retrieved in chunks by the client application. To do this, the web API should support the Accept-Ranges header for GET requests for large resources, and ideally implement HTTP HEAD requests for these resources. The Accept-Ranges header indicates that the GET operation supports partial results, and that a client application can submit GET requests that return a subset of a resource specified as a range of bytes. A HEAD request is similar to a GET request except that it only returns a header that describes the resource and an empty message body. A client application can issue a HEAD request to determine whether to fetch a resource by using partial GET requests. The following example shows a HEAD request that obtains information about a product image: + +```HTTP +HEAD http://adventure-works.com/products/10?fields=productImage HTTP/1.1 +... +``` + +The response message contains a header that includes the size of the resource (4580 bytes), and the Accept-Ranges header that the corresponding GET operation supports partial results: + +```HTTP +HTTP/1.1 200 OK +... +Accept-Ranges: bytes +Content-Type: image/jpeg +Content-Length: 4580 +... +``` + +The client application can use this information to construct a series of GET operations to retrieve the image in smaller chunks. The first request fetches the first 2500 bytes by using the Range header: + +```HTTP +GET http://adventure-works.com/products/10?fields=productImage HTTP/1.1 +Range: bytes=0-2499 +... +``` + +The response message indicates that this is a partial response by returning HTTP status code 206. The Content-Length header specifies the actual number of bytes returned in the message body (not the size of the resource), and the Content-Range header indicates which part of the resource this is (bytes 0-2499 out of 4580): + +```HTTP +HTTP/1.1 206 Partial Content +... +Accept-Ranges: bytes +Content-Type: image/jpeg +Content-Length: 2500 +Content-Range: bytes 0-2499/4580 +... +_{binary data not shown}_ +``` + +A subsequent request from the client application can retrieve the remainder of the resource by using an appropriate Range header: + +```HTTP +GET http://adventure-works.com/products/10?fields=productImage HTTP/1.1 +Range: bytes=2500- +... +``` + +The corresponding result message should look like this: + +```HTTP +HTTP/1.1 206 Partial Content +... +Accept-Ranges: bytes +Content-Type: image/jpeg +Content-Length: 2080 +Content-Range: bytes 2500-4580/4580 +... +``` + +## Using the HATEOAS approach to enable navigation to related resources +One of the primary motivations behind REST is that it should be possible to navigate the entire set of resources without requiring prior knowledge of the URI scheme. Each HTTP GET request should return the information necessary to find the resources related directly to the requested object through hyperlinks included in the response, and it should also be provided with information that describes the operations available on each of these resources. This principle is known as HATEOAS, or Hypertext as the Engine of Application State. The system is effectively a finite state machine, and the response to each request contains the information necessary to move from one state to another; no other information should be necessary. + +> [!NOTE] +> Currently there are no standards or specifications that define how to model the HATEOAS principle. The examples shown in this section illustrate one possible solution. +> +> + +As an example, to handle the relationship between customers and orders, the data returned in the response for a specific order should contain URIs in the form of a hyperlink identifying the customer that placed the order, and the operations that can be performed on that customer. + +```HTTP +GET http://adventure-works.com/orders/3 HTTP/1.1 +Accept: application/json +... +``` + +The body of the response message contains a `links` array (highlighted in the code example) that specifies the nature of the relationship (*Customer*), the URI of the customer (*http://adventure-works.com/customers/3*), how to retrieve the details of this customer (*GET*), and the MIME types that the web server supports for retrieving this information (*text/xml* and *application/json*). This is all the information that a client application needs to be able to fetch the details of the customer. Additionally, the Links array also includes links for the other operations that can be performed, such as PUT (to modify the customer, together with the format that the web server expects the client to provide), and DELETE. + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/json; charset=utf-8 +... +Content-Length: ... +{"orderID":3,"productID":2,"quantity":4,"orderValue":16.60,"links":[(some links omitted){"rel":"customer","href":" http://adventure-works.com/customers/3", "action":"GET","types":["text/xml","application/json"]},{"rel":" +customer","href":" http://adventure-works.com /customers/3", "action":"PUT","types":["application/x-www-form-urlencoded"]},{"rel":"customer","href":" http://adventure-works.com /customers/3","action":"DELETE","types":[]}]} +``` + +For completeness, the Links array should also include self-referencing information pertaining to the resource that has been retrieved. These links have been omitted from the previous example, but are highlighted in the following code. Notice that in these links, the relationship *self* has been used to indicate that this is a reference to the resource being returned by the operation: + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/json; charset=utf-8 +... +Content-Length: ... +{"orderID":3,"productID":2,"quantity":4,"orderValue":16.60,"links":[{"rel":"self","href":" http://adventure-works.com/orders/3", "action":"GET","types":["text/xml","application/json"]},{"rel":" self","href":" http://adventure-works.com /orders/3", "action":"PUT","types":["application/x-www-form-urlencoded"]},{"rel":"self","href":" http://adventure-works.com /orders/3", "action":"DELETE","types":[]},{"rel":"customer", +"href":" http://adventure-works.com /customers/3", "action":"GET","types":["text/xml","application/json"]},{"rel":" customer" (customer links omitted)}]} +``` + +For this approach to be effective, client applications must be prepared to retrieve and parse this additional information. + +## Versioning a RESTful web API +It is highly unlikely that in all but the simplest of situations that a web API will remain static. As business requirements change new collections of resources may be added, the relationships between resources might change, and the structure of the data in resources might be amended. While updating a web API to handle new or differing requirements is a relatively straightforward process, you must consider the effects that such changes will have on client applications consuming the web API. The issue is that although the developer designing and implementing a web API has full control over that API, the developer does not have the same degree of control over client applications which may be built by third party organizations operating remotely. The primary imperative is to enable existing client applications to continue functioning unchanged while allowing new client applications to take advantage of new features and resources. + +Versioning enables a web API to indicate the features and resources that it exposes, and a client application can submit requests that are directed to a specific version of a feature or resource. The following sections describe several different approaches, each of which has its own benefits and trade-offs. + +### No versioning +This is the simplest approach, and may be acceptable for some internal APIs. Big changes could be represented as new resources or new links. Adding content to existing resources might not present a breaking change as client applications that are not expecting to see this content will simply ignore it. + +For example, a request to the URI *http://adventure-works.com/customers/3* should return the details of a single customer containing `id`, `name`, and `address` fields expected by the client application: + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/json; charset=utf-8 +... +Content-Length: ... +{"id":3,"name":"Contoso LLC","address":"1 Microsoft Way Redmond WA 98053"} +``` + +> [!NOTE] +> For the purposes of simplicity and clarity, the example responses shown in this section do not include HATEOAS links. +> +> + +If the `DateCreated` field is added to the schema of the customer resource, then the response would look like this: + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/json; charset=utf-8 +... +Content-Length: ... +{"id":3,"name":"Contoso LLC","dateCreated":"2014-09-04T12:11:38.0376089Z","address":"1 Microsoft Way Redmond WA 98053"} +``` + +Existing client applications might continue functioning correctly if they are capable of ignoring unrecognized fields, while new client applications can be designed to handle this new field. However, if more radical changes to the schema of resources occur (such as removing or renaming fields) or the relationships between resources change then these may constitute breaking changes that prevent existing client applications from functioning correctly. In these situations you should consider one of the following approaches. + +### URI versioning +Each time you modify the web API or change the schema of resources, you add a version number to the URI for each resource. The previously existing URIs should continue to operate as before, returning resources that conform to their original schema. + +Extending the previous example, if the `address` field is restructured into sub-fields containing each constituent part of the address (such as `streetAddress`, `city`, `state`, and `zipCode`), this version of the resource could be exposed through a URI containing a version number, such as http://adventure-works.com/v2/customers/3: + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/json; charset=utf-8 +... +Content-Length: ... +{"id":3,"name":"Contoso LLC","dateCreated":"2014-09-04T12:11:38.0376089Z","address":{"streetAddress":"1 Microsoft Way","city":"Redmond","state":"WA","zipCode":98053}} +``` + +This versioning mechanism is very simple but depends on the server routing the request to the appropriate endpoint. However, it can become unwieldy as the web API matures through several iterations and the server has to support a number of different versions. Also, from a purist’s point of view, in all cases the client applications are fetching the same data (customer 3), so the URI should not really be different depending on the version. This scheme also complicates implementation of HATEOAS as all links will need to include the version number in their URIs. + +### Query string versioning +Rather than providing multiple URIs, you can specify the version of the resource by using a parameter within the query string appended to the HTTP request, such as *http://adventure-works.com/customers/3?version=2*. The version parameter should default to a meaningful value such as 1 if it is omitted by older client applications. + +This approach has the semantic advantage that the same resource is always retrieved from the same URI, but it depends on the code that handles the request to parse the query string and send back the appropriate HTTP response. This approach also suffers from the same complications for implementing HATEOAS as the URI versioning mechanism. + +> [!NOTE] +> Some older web browsers and web proxies will not cache responses for requests that include a query string in the URL. This can have an adverse impact on performance for web applications that use a web API and that run from within such a web browser. +> +> + +### Header versioning +Rather than appending the version number as a query string parameter, you could implement a custom header that indicates the version of the resource. This approach requires that the client application adds the appropriate header to any requests, although the code handling the client request could use a default value (version 1) if the version header is omitted. The following examples utilize a custom header named *Custom-Header*. The value of this header indicates the version of web API. + +Version 1: + +```HTTP +GET http://adventure-works.com/customers/3 HTTP/1.1 +... +Custom-Header: api-version=1 +... +``` + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/json; charset=utf-8 +... +Content-Length: ... +{"id":3,"name":"Contoso LLC","address":"1 Microsoft Way Redmond WA 98053"} +``` + +Version 2: + +```HTTP +GET http://adventure-works.com/customers/3 HTTP/1.1 +... +Custom-Header: api-version=2 +... +``` + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/json; charset=utf-8 +... +Content-Length: ... +{"id":3,"name":"Contoso LLC","dateCreated":"2014-09-04T12:11:38.0376089Z","address":{"streetAddress":"1 Microsoft Way","city":"Redmond","state":"WA","zipCode":98053}} +``` + +Note that as with the previous two approaches, implementing HATEOAS requires including the appropriate custom header in any links. + +### Media type versioning +When a client application sends an HTTP GET request to a web server it should stipulate the format of the content that it can handle by using an Accept header, as described earlier in this guidance. Frequently the purpose of the *Accept* header is to allow the client application to specify whether the body of the response should be XML, JSON, or some other common format that the client can parse. However, it is possible to define custom media types that include information enabling the client application to indicate which version of a resource it is expecting. The following example shows a request that specifies an *Accept* header with the value *application/vnd.adventure-works.v1+json*. The *vnd.adventure-works.v1* element indicates to the web server that it should return version 1 of the resource, while the *json* element specifies that the format of the response body should be JSON: + +```HTTP +GET http://adventure-works.com/customers/3 HTTP/1.1 +... +Accept: application/vnd.adventure-works.v1+json +... +``` + +The code handling the request is responsible for processing the *Accept* header and honoring it as far as possible (the client application may specify multiple formats in the *Accept* header, in which case the web server can choose the most appropriate format for the response body). The web server confirms the format of the data in the response body by using the Content-Type header: + +```HTTP +HTTP/1.1 200 OK +... +Content-Type: application/vnd.adventure-works.v1+json; charset=utf-8 +... +Content-Length: ... +{"id":3,"name":"Contoso LLC","address":"1 Microsoft Way Redmond WA 98053"} +``` + +If the Accept header does not specify any known media types, the web server could generate an HTTP 406 (Not Acceptable) response message or return a message with a default media type. + +This approach is arguably the purest of the versioning mechanisms and lends itself naturally to HATEOAS, which can include the MIME type of related data in resource links. + +> [!NOTE] +> When you select a versioning strategy, you should also consider the implications on performance, especially caching on the web server. The URI versioning and Query String versioning schemes are cache-friendly inasmuch as the same URI/query string combination refers to the same data each time. +> +> The Header versioning and Media Type versioning mechanisms typically require additional logic to examine the values in the custom header or the Accept header. In a large-scale environment, many clients using different versions of a web API can result in a significant amount of duplicated data in a server-side cache. This issue can become acute if a client application communicates with a web server through a proxy that implements caching, and that only forwards a request to the web server if it does not currently hold a copy of the requested data in its cache. +> +> + +## More information +* The [RESTful Cookbook](http://restcookbook.com/) contains an introduction to building RESTful APIs. +* The Web [API Checklist](https://mathieu.fenniak.net/the-api-checklist/) contains a useful list of items to consider when designing and implementing a Web API. diff --git a/docs/best-practices/api-implementation.md b/docs/best-practices/api-implementation.md new file mode 100644 index 00000000000..b3479bd7c17 --- /dev/null +++ b/docs/best-practices/api-implementation.md @@ -0,0 +1,1145 @@ +--- +title: API implementation guidance +description: Guidance upon how to implement an API. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: 93d6a18b-de51-4b3c-9cb1-35eefb6c1747 +ms.service: best-practice +ms.devlang: rest-api +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/13/2016 +ms.author: masashin + +--- +# API implementation +[!INCLUDE [header](../_includes/header.md)] + +A carefully-designed RESTful web API defines the resources, relationships, and navigation schemes that are accessible to client applications. When you implement and deploy a web API, you should consider the physical requirements of the environment hosting the web API and the way in which the web API is constructed rather than the logical structure of the data. This guidance focusses on best practices for implementing a web API and publishing it to make it available to client applications. Security concerns are described separately in the API Security Guidance document. You can find detailed information about web API design in the API Design Guidance document. + +## Considerations for implementing a RESTful web API +The following sections illustrate best practice for using the ASP.NET Web API template to build a RESTful web API. For detailed information on using the Web API template, visit the [Learn About ASP.NET Web API](http://www.asp.net/web-api) page on the Microsoft website. + +## Considerations for implementing request routing +In a service implemented by using the ASP.NET Web API, each request is routed to a method in a *controller* class. The Web API framework provides two primary options for implementing routing; *convention-based* routing and *attribute-based* routing. Consider the following points when you determine the best way to route requests in your web API: + +* **Understand the limitations and requirements of convention-based routing**. + + By default, the Web API framework uses convention-based routing. The Web API framework creates an initial routing table that contains the following entry: + + ```C# + config.Routes.MapHttpRoute( + name: "DefaultApi", + routeTemplate: "api/{controller}/{id}", + defaults: new { id = RouteParameter.Optional } + ); + ``` + + Routes can be generic, comprising literals such as *api* and variables such as *{controller}* and *{id}*. Convention-based routing allows some elements of the route to be optional. The Web API framework determines which method to invoke in the controller by matching the HTTP method in the request to the initial part of the method name in the API, and then by matching any optional parameters. For example, if a controller named *orders* contains the methods *GetAllOrders()* or *GetOrderByInt(int id)* then the GET request *http://www.adventure-works.com/api/orders/* will be directed to the method *GetAlllOrders()* and the GET request *http://www.adventure-works.com/api/orders/99* will be routed to the method *GetOrderByInt(int id)*. If there is no matching method available that begins with the prefix Get in the controller, the Web API framework replies with an HTTP 405 (Method Not Allowed) message. Additionally, name of the parameter (id) specified in the routing table must be the same as the name of the parameter for the *GetOrderById* method, otherwise the Web API framework will reply with an HTTP 404 (Not Found) response. + + The same rules apply to POST, PUT, and DELETE HTTP requests; a PUT request that updates the details of order 101 would be directed to the URI *http://www.adventure-works.com/api/orders/101*, the body of the message will contain the new details of the order, and this information will be passed as a parameter to a method in the orders controller with a name that starts with the prefix *Put*, such as *PutOrder*. + + The default routing table will not match a request that references child resources in a RESTful web API, such as *http://www.adventure-works.com/api/customers/1/orders* (find the details of all orders placed by customer 1). To handle these cases, you can add custom routes to the routing table: + + ```C# + config.Routes.MapHttpRoute( + name: "CustomerOrdersRoute", + routeTemplate: "api/customers/{custId}/orders", + defaults: new { controller="Customers", action="GetOrdersForCustomer" }) + ); + ``` + + This route directs requests that match the URI to the *GetOrdersForCustomer* method in the *Customers* controller. This method must take a single parameter named *custI*: + + ```C# + public class CustomersController : ApiController + { + ... + public IEnumerable GetOrdersForCustomer(int custId) + { + // Find orders for the specified customer + var orders = ... + return orders; + } + ... + } + ``` + + > [!TIP] + > Utilize the default routing wherever possible and avoid defining many complicated custom routes as this can result in brittleness (it is very easy to add methods to a controller that result in ambiguous routes) and reduced performance (the bigger the routing table, the more work the Web API framework has to do to work out which route matches a given URI). Keep the API and routes simple. For more information, see the section Organizing the Web API Around Resources in the API Design Guidance. If you must define custom routes, a preferable approach is to use attribute-based routing described later in this section. + > + > + + For more information about convention-based routing, see the page [Routing in ASP.NET Web API](http://www.asp.net/web-api/overview/web-api-routing-and-actions/routing-in-aspnet-web-api) on the Microsoft website. +* **Avoid ambiguity in routing**. + + Convention-based routing can result in ambiguous pathways if multiple methods in a controller match the same route. In these situations, the Web API framework responds with an HTTP 500 (Internal Server Error) response message containing the text "Multiple actions were found that match the request". +* **Prefer attribute-based routing**. + + Attribute-based routing provides an alternative means for connecting routes to methods in a controller. Rather than relying on the pattern-matching features of convention-based routing, you can explicitly annotate methods in a controller with the details of the route to which they should be associated. This approach help to remove possible ambiguities. Furthermore, as explicit routes are defined at design time this approach is more efficient than convention-based routing at runtime. The following code shows how to apply the *Route* attribute to methods in the Customers controller. These methods also use the HttpGet attribute to indicate that they should respond to *HTTP GET* requests. This attribute enables you to name your methods using any convenient naming scheme rather than that expected by convention-based routing. You can also annotate methods with the *HttpPost*, *HttpPut*, and *HttpDelete* attributes to define methods that respond to other types of HTTP requests. + + ```C# + public class CustomersController : ApiController + { + ... + [Route("api/customers/{id}")] + [HttpGet] + public Customer FindCustomerByID(int id) + { + // Find the matching customer + var customer = ... + return customer; + } + ... + [Route("api/customers/{id}/orders")] + [HttpGet] + public IEnumerable FindOrdersForCustomer(int id) + { + // Find orders for the specified customer + var orders = ... + return orders; + } + ... + } + ``` + + Attribute-based routing also has the useful side-effect of acting as documentation for developers needing to maintain the code in the future; it is immediately clear which method belongs to which route, and the *HttpGet* attribute clarifies the type of HTTP request to which the method responds. + + Attribute-based routing enables you to define constraints which restrict how the parameters are matched. Constraints can specify the type of the parameter, and in some cases they can also indicate the acceptable range of parameter values. In the following example, the id parameter to the *FindCustomerByID* method must be a non-negative integer. If an application submits an HTTP GET request with a negative customer number, the Web API framework will respond with an HTTP 405 (Method Not Allowed) message: + + ```C# + public class CustomersController : ApiController + { + ... + [Route("api/customers/{id:int:min(0)}")] + [HttpGet] + public Customer FindCustomerByID(int id) + { + // Find the matching customer + var customer = ... + return customer; + } + ... + } + ``` + + For more information on attribute-based routing, see the page [Attribute Routing in Web API 2](http://www.asp.net/web-api/overview/web-api-routing-and-actions/attribute-routing-in-web-api-2) on the Microsoft website. +* **Support Unicode characters in routes**. + + The keys used to identify resources in GET requests could be strings. Therefore, in a global application, you may need to support URIs that contain non-English characters. +* **Distinguish methods that should not be routed**. + + If you are using convention-based routing, indicate methods that do not correspond to HTTP actions by decorating them with the *NonAction* attribute. This typically applies to helper methods defined for use by other methods within a controller, and this attribute will prevent these methods from being matched and invoked by an errant HTTP request. +* **Consider the benefits and tradeoffs of placing the API in a subdomain**. + + By default, the ASP.NET web API organizes APIs into the */api* directory in a domain, such as `http://www.adventure-works.com/api/orders`. This directory resides in the same domain as any other services exposed by the same host. It may be beneficial to split the web API out into its own subdomain running on a separate host, with URIs such as `http://api.adventure-works.com/orders`. This separation enables you to partition and scale the web API more effectively without affecting any other web applications or services running in the *www.adventure-works.com* domain. + + However, placing a web API in a different subdomain can also lead to security concerns. Any web applications or services hosted at *www.adventure-works.com* that invoke a web API running elsewhere may violate the same-origin policy of many web browsers. In this situation, it will be necessary to enable cross-origin resource sharing (CORS) between the hosts. For more information, see the API Security Guidance document. + +## Considerations for processing requests +Once a request from a client application has been successfully routed to a method in a web API, the request must be processed in as efficient manner as possible. Consider the following points when you implement the code to handle requests: + +* **GET, PUT, DELETE, HEAD, and PATCH actions should be idempotent**. + + The code that implements these requests should not impose any side-effects. The same request repeated over the same resource should result in the same state. For example, sending multiple DELETE requests to the same URI should have the same effect, although the HTTP status code in the response messages may be different (the first DELETE request might return status code 204 (No Content) while a subsequent DELETE request might return status code 404 (Not Found)). + +> [!NOTE] +> The article [Idempotency Patterns](http://blog.jonathanoliver.com/idempotency-patterns/) on Jonathan Oliver’s blog provides an overview of idempotency and how it relates to data management operations. +> +> + +* **POST actions that create new resources should do so without unrelated side-effects**. + + If a POST request is intended to create a new resource, the effects of the request should be limited to the new resource (and possibly any directly related resources if there is some sort of linkage involved) For example, in an ecommerce system, a POST request that creates a new order for a customer might also amend inventory levels and generate billing information, but it should not modify information not directly related to the order or have any other side-effects on the overall state of the system. +* **Avoid implementing chatty POST, PUT, and DELETE operations**. + + Support POST, PUT and DELETE requests over resource collections. A POST request can contain the details for multiple new resources and add them all to the same collection, a PUT request can replace the entire set of resources in a collection, and a DELETE request can remove an entire collection. + + Note that the OData support included in ASP.NET Web API 2 provides the ability to batch requests. A client application can package up several web API requests and send them to the server in a single HTTP request, and receive a single HTTP response that contains the replies to each request. For more information, see the page [Introducing Batch Support in Web API and Web API OData](http://blogs.msdn.com/b/webdev/archive/2013/11/01/introducing-batch-support-in-web-api-and-web-api-odata.aspx) on the Microsoft website. +* **Abide by the HTTP protocol when sending a response back to a client application**. + + A web API must return messages that contain the correct HTTP status code to enable the client to determine how to handle the result, the appropriate HTTP headers so that the client understands the nature of the result, and a suitably formatted body to enable the client to parse the result. If you are using the ASP.NET Web API template, the default strategy for implementing methods that respond to HTTP POST requests is simply to return a copy of the newly created resource, as illustrated by the following example: + + ```C# + public class CustomersController : ApiController + { + ... + [Route("api/customers")] + [HttpPost] + public Customer CreateNewCustomer(Customer customerDetails) + { + // Add the new customer to the repository + // This method returns a customer with a unique ID allocated + // by the repository + var newCust = repository.Add(customerDetails); + // Return the newly added customer + return newCust; + } + ... + } + ``` + + If the POST operation is successful, the Web API framework creates an HTTP response with status code 200 (OK) and the details of the customer as the message body. However, in this case, according to the HTTP protocol, a POST operation should return status code 201 (Created) and the response message should include the URI of the newly created resource in the Location header of the response message. + + To provide these features, return your own HTTP response message by using the `IHttpActionResult` interface. This approach gives you fine control over the HTTP status code, the headers in the response message, and even the format of the data in the response message body, as shown in the following code example. This version of the `CreateNewCustomer` method conforms more closely to the expectations of client following the HTTP protocol. The `Created` method of the `ApiController` class constructs the response message from the specified data, and adds the Location header to the results: + + ```C# + public class CustomersController : ApiController + { + ... + [Route("api/customers")] + [HttpPost] + public IHttpActionResult CreateNewCustomer(Customer customerDetails) + { + // Add the new customer to the repository + var newCust = repository.Add(customerDetails); + + // Create a value for the Location header to be returned in the response + // The URI should be the location of the customer including its ID, + // such as http://adventure-works.com/api/customers/99 + var location = new Uri(...); + + // Return the HTTP 201 response, + // including the location and the newly added customer + return Created(location, newCust); + } + ... + } + ``` +* **Support content negotiation**. + + The body of a response message may contain data in a variety of formats. For example, an HTTP GET request could return data in JSON, or XML format. When the client submits a request, it can include an Accept header that specifies the data formats that it can handle. These formats are specified as media types. For example, a client that issues a GET request that retrieves an image can specify an Accept header that lists the media types that the client can handle, such as "image/jpeg, image/gif, image/png". When the web API returns the result, it should format the data by using one of these media types and specify the format in the Content-Type header of the response. + + If the client does not specify an Accept header, then use a sensible default format for the response body. As an example, the ASP.NET Web API framework defaults to JSON for text-based data. + + > [!NOTE] + > The ASP.NET Web API framework performs some automatic detection of Accept headers and handles them itself based on the type of the data in the body of the response message. For example, if the body of a response message contains a CLR (common language runtime) object, the ASP.NET Web API automatically formats the response as JSON with the Content-Type header of the response set to "application/json" unless the client indicates that it requires the results as XML, in which case the ASP.NET Web API framework formats the response as XML and sets the Content-Type header of the response to "text/xml". However, it may be necessary to handle Accept headers that specify different media types explicitly in the implementation code for an operation. + > + > +* **Provide links to support HATEOAS-style navigation and discovery of resources**. + + The API Design Guidance describes how following the HATEOAS approach enables a client to navigate and discover resources from an initial starting point. This is achieved by using links containing URIs; when a client issues an HTTP GET request to obtain a resource, the response should contain URIs that enable a client application to quickly locate any directly related resources. For example, in a web API that supports an e-commerce solution, a customer may have placed many orders. When a client application retrieves the details for a customer, the response should include links that enable the client application to send HTTP GET requests that can retrieve these orders. Additionally, HATEOAS-style links should describe the other operations (POST, PUT, DELETE, and so on) that each linked resource supports together with the corresponding URI to perform each request. This approach is described in more detail in the API Design Guidance document. + + Currently there are no standards that govern the implementation of HATEOAS, but the following example illustrates one possible approach. In this example, an HTTP GET request that finds the details for a customer returns a response that include HATEOAS links that reference the orders for that customer: + + ```HTTP + GET http://adventure-works.com/customers/2 HTTP/1.1 + Accept: text/json + ... + ``` + + ```HTTP + HTTP/1.1 200 OK + ... + Content-Type: application/json; charset=utf-8 + ... + Content-Length: ... + {"CustomerID":2,"CustomerName":"Bert","Links":[ + {"rel":"self", + "href":"http://adventure-works.com/customers/2", + "action":"GET", + "types":["text/xml","application/json"]}, + {"rel":"self", + "href":"http://adventure-works.com/customers/2", + "action":"PUT", + "types":["application/x-www-form-urlencoded"]}, + {"rel":"self", + "href":"http://adventure-works.com/customers/2", + "action":"DELETE", + "types":[]}, + {"rel":"orders", + "href":"http://adventure-works.com/customers/2/orders", + "action":"GET", + "types":["text/xml","application/json"]}, + {"rel":"orders", + "href":"http://adventure-works.com/customers/2/orders", + "action":"POST", + "types":["application/x-www-form-urlencoded"]} + ]} + ``` + + In this example, the customer data is represented by the `Customer` class shown in the following code snippet. The HATEOAS links are held in the `Links` collection property: + + ```C# + public class Customer + { + public int CustomerID { get; set; } + public string CustomerName { get; set; } + public List Links { get; set; } + ... + } + + public class Link + { + public string Rel { get; set; } + public string Href { get; set; } + public string Action { get; set; } + public string [] Types { get; set; } + } + ``` + + The HTTP GET operation retrieves the customer data from storage and constructs a `Customer` object, and then populates the `Links` collection. The result is formatted as a JSON response message. Each link comprises the following fields: + + * The relationship between the object being returned and the object described by the link. In this case "self" indicates that the link is a reference back to the object itself (similar to a `this` pointer in many object-oriented languages), and "orders" is the name of a collection containing the related order information. + * The hyperlink (`Href`) for the object being described by the link in the form of a URI. + * The type of HTTP request (`Action`) that can be sent to this URI. + * The format of any data (`Types`) that should be provided in the HTTP request or that can be returned in the response, depending on the type of the request. + + The HATEOAS links shown in the example HTTP response indicate that a client application can perform the following operations: + * An HTTP GET request to the URI *http://adventure-works.com/customers/2* to fetch the details of the customer (again). The data can be returned as XML or JSON. + * An HTTP PUT request to the URI *http://adventure-works.com/customers/2* to modify the details of the customer. The new data must be provided in the request message in x-www-form-urlencoded format. + * An HTTP DELETE request to the URI *http://adventure-works.com/customers/2* to delete the customer. The request does not expect any additional information or return data in the response message body. + * An HTTP GET request to the URI *http://adventure-works.com/customers/2/orders* to find all the orders for the customer. The data can be returned as XML or JSON. + * An HTTP PUT request to the URI *http://adventure-works.com/customers/2/orders* to create a new order for this customer. The data must be provided in the request message in x-www-form-urlencoded format. + +## Considerations for handling exceptions +By default, in the ASP.NET Web API framework, if an operation throws an uncaught exception the framework returns a response message with HTTP status code 500 (Internal Server Error). In many cases, this simplistic approach is not useful in isolation, and makes determining the cause of the exception difficult. Therefore you should adopt a more comprehensive approach to handling exceptions, considering the following points: + +* **Capture exceptions and return a meaningful response to clients**. + + The code that implements an HTTP operation should provide comprehensive exception handling rather than letting uncaught exceptions propagate to the Web API framework. If an exception makes it impossible to complete the operation successfully, the exception can be passed back in the response message, but it should include a meaningful description of the error that caused the exception. The exception should also include the appropriate HTTP status code rather than simply returning status code 500 for every situation. For example, if a user request causes a database update that violates a constraint (such as attempting to delete a customer that has outstanding orders), you should return status code 409 (Conflict) and a message body indicating the reason for the conflict. If some other condition renders the request unachievable, you can return status code 400 (Bad Request). You can find a full list of HTTP status codes on the [Status Code Definitions](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html) page on the W3C website. + + The following code shows an example that traps different conditions and returns an appropriate response. + + ```C# + [HttpDelete] + [Route("customers/{id:int}")] + public IHttpActionResult DeleteCustomer(int id) + { + try + { + // Find the customer to be deleted in the repository + var customerToDelete = repository.GetCustomer(id); + + // If there is no such customer, return an error response + // with status code 404 (Not Found) + if (customerToDelete == null) + { + return NotFound(); + } + + // Remove the customer from the repository + // The DeleteCustomer method returns true if the customer + // was successfully deleted + if (repository.DeleteCustomer(id)) + { + // Return a response message with status code 204 (No Content) + // To indicate that the operation was successful + return StatusCode(HttpStatusCode.NoContent); + } + else + { + // Otherwise return a 400 (Bad Request) error response + return BadRequest(Strings.CustomerNotDeleted); + } + } + catch + { + // If an uncaught exception occurs, return an error response + // with status code 500 (Internal Server Error) + return InternalServerError(); + } + } + ``` + + > [!TIP] + > Do not include information that could be useful to an attacker attempting to penetrate your web API.For further information, visit the [Exception Handling in ASP.NET Web API](http://www.asp.net/web-api/overview/error-handling/exception-handling) page on the Microsoft website. + > + > [!NOTE] + > Many web servers trap error conditions themselves before they reach the web API. For example, if you configure authentication for a web site and the user fails to provide the correct authentication information, the web server should respond with status code 401 (Unauthorized). Once a client has been authenticated, your code can perform its own checks to verify that the client should be able access the requested resource. If this authorization fails, you should return status code 403 (Forbidden). + > + > +* **Handle exceptions in a consistent manner and log information about errors**. + + To handle exceptions in a consistent manner, consider implementing a global error handling strategy across the entire web API. You can achieve part of this by creating an exception filter that runs whenever a controller throws any unhandled exception that is not an `HttpResponseException` exception. This approach is described on the [Exception Handling in ASP.NET Web API](http://www.asp.net/web-api/overview/error-handling/exception-handling) page on the Microsoft website. + + However, there are several situations where an exception filter will not catch an exception, including: + + * Exceptions thrown from controller constructors. + * Exceptions thrown from message handlers. + * Exceptions thrown during routing. + * Exceptions thrown while serializing the content for a response message. + + To handle these cases, you may need to implement a more customized approach. You should also incorporate error logging which captures the full details of each exception; this error log can contain detailed information as long as it is not made accessible over the web to clients. The article [Web API Global Error Handling](http://www.asp.net/web-api/overview/error-handling/web-api-global-error-handling) on the Microsoft website shows one way of performing this task. +* **Distinguish between client-side errors and server-side errors**. + + The HTTP protocol distinguishes between errors that occur due to the client application (the HTTP 4xx status codes), and errors that are caused by a mishap on the server (the HTTP 5xx status codes). Make sure that you respect this convention in any error response messages. + + + +## Considerations for optimizing client-side data access +In a distributed environment such as that involving a web server and client applications, one of the primary sources of concern is the network. This can act as a considerable bottleneck, especially if a client application is frequently sending requests or receiving data. Therefore you should aim to minimize the amount of traffic that flows across the network. Consider the following points when you implement the code to retrieve and maintain data: + +* **Support client-side caching**. + + The HTTP 1.1 protocol supports caching in clients and intermediate servers through which a request is routed by the use of the Cache-Control header. When a client application sends an HTTP GET request to the web API, the response can include a Cache-Control header that indicates whether the data in the body of the response can be safely cached by the client or an intermediate server through which the request has been routed, and for how long before it should expire and be considered out-of-date. The following example shows an HTTP GET request and the corresponding response that includes a Cache-Control header: + + ```HTTP + GET http://adventure-works.com/orders/2 HTTP/1.1 + ... + ``` + + ```HTTP + HTTP/1.1 200 OK + ... + Cache-Control: max-age=600, private + Content-Type: text/json; charset=utf-8 + Content-Length: ... + {"orderID":2,"productID":4,"quantity":2,"orderValue":10.00} + ``` + + In this example, the Cache-Control header specifies that the data returned should be expired after 600 seconds, and is only suitable for a single client and must not be stored in a shared cache used by other clients (it is *private*). The Cache-Control header could specify *public* rather than *private* in which case the data can be stored in a shared cache, or it could specify *no-store* in which case the data must **not** be cached by the client. The following code example shows how to construct a Cache-Control header in a response message: + + ```C# + public class OrdersController : ApiController + { + ... + [Route("api/orders/{id:int:min(0)}")] + [HttpGet] + public IHttpActionResult FindOrderByID(int id) + { + // Find the matching order + Order order = ...; + ... + // Create a Cache-Control header for the response + var cacheControlHeader = new CacheControlHeaderValue(); + cacheControlHeader.Private = true; + cacheControlHeader.MaxAge = new TimeSpan(0, 10, 0); + ... + + // Return a response message containing the order and the cache control header + OkResultWithCaching response = new OkResultWithCaching(order, this) + { + CacheControlHeader = cacheControlHeader + }; + return response; + } + ... + } + ``` + + This code makes use of a custom `IHttpActionResult` class named `OkResultWithCaching`. This class enables the controller to set the cache header contents: + + ```C# + public class OkResultWithCaching : OkNegotiatedContentResult + { + public OkResultWithCaching(T content, ApiController controller) + : base(content, controller) { } + + public OkResultWithCaching(T content, IContentNegotiator contentNegotiator, HttpRequestMessage request, IEnumerable formatters) + : base(content, contentNegotiator, request, formatters) { } + + public CacheControlHeaderValue CacheControlHeader { get; set; } + public EntityTagHeaderValue ETag { get; set; } + + public override async Task ExecuteAsync(CancellationToken cancellationToken) + { + HttpResponseMessage response = await base.ExecuteAsync(cancellationToken); + + response.Headers.CacheControl = this.CacheControlHeader; + response.Headers.ETag = ETag; + + return response; + } + } + ``` + + > [!NOTE] + > The HTTP protocol also defines the *no-cache* directive for the Cache-Control header. Rather confusingly, this directive does not mean "do not cache" but rather "revalidate the cached information with the server before returning it"; the data can still be cached, but it is checked each time it is used to ensure that it is still current. + > + > + + Cache management is the responsibility of the client application or intermediate server, but if properly implemented it can save bandwidth and improve performance by removing the need to fetch data that has already been recently retrieved. + + The *max-age* value in the Cache-Control header is only a guide and not a guarantee that the corresponding data won't change during the specified time. The web API should set the max-age to a suitable value depending on the expected volatility of the data. When this period expires, the client should discard the object from the cache. + + > [!NOTE] + > Most modern web browsers support client-side caching by adding the appropriate cache-control headers to requests and examining the headers of the results, as described. However, some older browsers will not cache the values returned from a URL that includes a query string. This is not usually an issue for custom client applications which implement their own cache management strategy based on the protocol discussed here. + > + > Some older proxies exhibit the same behavior and might not cache requests based on URLs with query strings. This could be an issue for custom client applications that connect to a web server through such a proxy. + > + > +* **Provide ETags to Optimize Query Processing**. + + When a client application retrieves an object, the response message can also include an *ETag* (Entity Tag). An ETag is an opaque string that indicates the version of a resource; each time a resource changes the Etag is also modified. This ETag should be cached as part of the data by the client application. The following code example shows how to add an ETag as part of the response to an HTTP GET request. This code uses the `GetHashCode` method of an object to generate a numeric value that identifies the object (you can override this method if necessary and generate your own hash using an algorithm such as MD5) : + + ```C# + public class OrdersController : ApiController + { + ... + public IHttpActionResult FindOrderByID(int id) + { + // Find the matching order + Order order = ...; + ... + + var hashedOrder = order.GetHashCode(); + string hashedOrderEtag = String.Format("\"{0}\"", hashedOrder); + var eTag = new EntityTagHeaderValue(hashedOrderEtag); + + // Return a response message containing the order and the cache control header + OkResultWithCaching response = new OkResultWithCaching(order, this) + { + ..., + ETag = eTag + }; + return response; + } + ... + } + ``` + + The response message posted by the web API looks like this: + + ```HTTP + HTTP/1.1 200 OK + ... + Cache-Control: max-age=600, private + Content-Type: text/json; charset=utf-8 + ETag: "2147483648" + Content-Length: ... + {"orderID":2,"productID":4,"quantity":2,"orderValue":10.00} + ``` + + > [!TIP] + > For security reasons, do not allow sensitive data or data returned over an authenticated (HTTPS) connection to be cached. + > + > + + A client application can issue a subsequent GET request to retrieve the same resource at any time, and if the resource has changed (it has a different ETag) the cached version should be discarded and the new version added to the cache. If a resource is large and requires a significant amount of bandwidth to transmit back to the client, repeated requests to fetch the same data can become inefficient. To combat this, the HTTP protocol defines the following process for optimizing GET requests that you should support in a web API: + + * The client constructs a GET request containing the ETag for the currently cached version of the resource referenced in an If-None-Match HTTP header: + + ```HTTP + GET http://adventure-works.com/orders/2 HTTP/1.1 + If-None-Match: "2147483648" + ... + ``` + * The GET operation in the web API obtains the current ETag for the requested data (order 2 in the above example), and compares it to the value in the If-None-Match header. + * If the current ETag for the requested data matches the ETag provided by the request, the resource has not changed and the web API should return an HTTP response with an empty message body and a status code of 304 (Not Modified). + * If the current ETag for the requested data does not match the ETag provided by the request, then the data has changed and the web API should return an HTTP response with the new data in the message body and a status code of 200 (OK). + * If the requested data no longer exists then the web API should return an HTTP response with the status code of 404 (Not Found). + * The client uses the status code to maintain the cache. If the data has not changed (status code 304) then the object can remain cached and the client application should continue to use this version of the object. If the data has changed (status code 200) then the cached object should be discarded and the new one inserted. If the data is no longer available (status code 404) then the object should be removed from the cache. + + > [!NOTE] + > If the response header contains the Cache-Control header no-store then the object should always be removed from the cache regardless of the HTTP status code. + > + > + + The code below shows the `FindOrderByID` method extended to support the If-None-Match header. Notice that if the If-None-Match header is omitted, the specified order is always retrieved: + + ```C# + public class OrdersController : ApiController + { + ... + [Route("api/orders/{id:int:min(0)}")] + [HttpGet] + public IHttpActionResult FindOrderById(int id) + { + try + { + // Find the matching order + Order order = ...; + + // If there is no such order then return NotFound + if (order == null) + { + return NotFound(); + } + + // Generate the ETag for the order + var hashedOrder = order.GetHashCode(); + string hashedOrderEtag = String.Format("\"{0}\"", hashedOrder); + + // Create the Cache-Control and ETag headers for the response + IHttpActionResult response = null; + var cacheControlHeader = new CacheControlHeaderValue(); + cacheControlHeader.Public = true; + cacheControlHeader.MaxAge = new TimeSpan(0, 10, 0); + var eTag = new EntityTagHeaderValue(hashedOrderEtag); + + // Retrieve the If-None-Match header from the request (if it exists) + var nonMatchEtags = Request.Headers.IfNoneMatch; + + // If there is an ETag in the If-None-Match header and + // this ETag matches that of the order just retrieved, + // then create a Not Modified response message + if (nonMatchEtags.Count > 0 && + String.Compare(nonMatchEtags.First().Tag, hashedOrderEtag) == 0) + { + response = new EmptyResultWithCaching() + { + StatusCode = HttpStatusCode.NotModified, + CacheControlHeader = cacheControlHeader, + ETag = eTag + }; + } + // Otherwise create a response message that contains the order details + else + { + response = new OkResultWithCaching(order, this) + { + CacheControlHeader = cacheControlHeader, + ETag = eTag + }; + } + + return response; + } + catch + { + return InternalServerError(); + } + } + ... + } + ``` + + This example incorporates an additional custom `IHttpActionResult` class named `EmptyResultWithCaching`. This class simply acts as a wrapper around an `HttpResponseMessage` object that does not contain a response body: + + ```C# + public class EmptyResultWithCaching : IHttpActionResult + { + public CacheControlHeaderValue CacheControlHeader { get; set; } + public EntityTagHeaderValue ETag { get; set; } + public HttpStatusCode StatusCode { get; set; } + public Uri Location { get; set; } + + public async Task ExecuteAsync(CancellationToken cancellationToken) + { + HttpResponseMessage response = new HttpResponseMessage(StatusCode); + response.Headers.CacheControl = this.CacheControlHeader; + response.Headers.ETag = this.ETag; + response.Headers.Location = this.Location; + return response; + } + } + ``` + + > [!TIP] + > In this example, the ETag for the data is generated by hashing the data retrieved from the underlying data source. If the ETag can be computed in some other way, then the process can be optimized further and the data only needs to be fetched from the data source if it has changed. This approach is especially useful if the data is large or accessing the data source can result in significant latency (for example, if the data source is a remote database). + > + > +* **Use ETags to Support Optimistic Concurrency**. + + To enable updates over previously cached data, the HTTP protocol supports an optimistic concurrency strategy. If, after fetching and caching a resource, the client application subsequently sends a PUT or DELETE request to change or remove the resource, it should include in If-Match header that references the ETag. The web API can then use this information to determine whether the resource has already been changed by another user since it was retrieved and send an appropriate response back to the client application as follows: + + * The client constructs a PUT request containing the new details for the resource and the ETag for the currently cached version of the resource referenced in an If-Match HTTP header. The following example shows a PUT request that updates an order: + + ```HTTP + PUT http://adventure-works.com/orders/1 HTTP/1.1 + If-Match: "2282343857" + Content-Type: application/x-www-form-urlencoded + ... + Date: Fri, 12 Sep 2014 09:18:37 GMT + Content-Length: ... + productID=3&quantity=5&orderValue=250 + ``` + * The PUT operation in the web API obtains the current ETag for the requested data (order 1 in the above example), and compares it to the value in the If-Match header. + * If the current ETag for the requested data matches the ETag provided by the request, the resource has not changed and the web API should perform the update, returning a message with HTTP status code 204 (No Content) if it is successful. The response can include Cache-Control and ETag headers for the updated version of the resource. The response should always include the Location header that references the URI of the newly updated resource. + * If the current ETag for the requested data does not match the ETag provided by the request, then the data has been changed by another user since it was fetched and the web API should return an HTTP response with an empty message body and a status code of 412 (Precondition Failed). + * If the resource to be updated no longer exists then the web API should return an HTTP response with the status code of 404 (Not Found). + * The client uses the status code and response headers to maintain the cache. If the data has been updated (status code 204) then the object can remain cached (as long as the Cache-Control header does not specify no-store) but the ETag should be updated. If the data was changed by another user changed (status code 412) or not found (status code 404) then the cached object should be discarded. + + The next code example shows an implementation of the PUT operation for the Orders controller: + + ```C# + public class OrdersController : ApiController + { + ... + [HttpPut] + [Route("api/orders/{id:int}")] + public IHttpActionResult UpdateExistingOrder(int id, DTOOrder order) + { + try + { + var baseUri = Constants.GetUriFromConfig(); + var orderToUpdate = this.ordersRepository.GetOrder(id); + if (orderToUpdate == null) + { + return NotFound(); + } + + var hashedOrder = orderToUpdate.GetHashCode(); + string hashedOrderEtag = String.Format("\"{0}\"", hashedOrder); + + // Retrieve the If-Match header from the request (if it exists) + var matchEtags = Request.Headers.IfMatch; + + // If there is an Etag in the If-Match header and + // this etag matches that of the order just retrieved, + // or if there is no etag, then update the Order + if (((matchEtags.Count > 0 && + String.Compare(matchEtags.First().Tag, hashedOrderEtag) == 0)) || + matchEtags.Count == 0) + { + // Modify the order + orderToUpdate.OrderValue = order.OrderValue; + orderToUpdate.ProductID = order.ProductID; + orderToUpdate.Quantity = order.Quantity; + + // Save the order back to the data store + // ... + + // Create the No Content response with Cache-Control, ETag, and Location headers + var cacheControlHeader = new CacheControlHeaderValue(); + cacheControlHeader.Private = true; + cacheControlHeader.MaxAge = new TimeSpan(0, 10, 0); + + hashedOrder = order.GetHashCode(); + hashedOrderEtag = String.Format("\"{0}\"", hashedOrder); + var eTag = new EntityTagHeaderValue(hashedOrderEtag); + + var location = new Uri(string.Format("{0}/{1}/{2}", baseUri, Constants.ORDERS, id)); + var response = new EmptyResultWithCaching() + { + StatusCode = HttpStatusCode.NoContent, + CacheControlHeader = cacheControlHeader, + ETag = eTag, + Location = location + }; + + return response; + } + + // Otherwise return a Precondition Failed response + return StatusCode(HttpStatusCode.PreconditionFailed); + } + catch + { + return InternalServerError(); + } + } + ... + } + ``` + + > [!TIP] + > Use of the If-Match header is entirely optional, and if it is omitted the web API will always attempt to update the specified order, possibly blindly overwriting an update made by another user. To avoid problems due to lost updates, always provide an If-Match header. + > + > + + + +## Considerations for handling large requests and responses +There may be occasions when a client application needs to issue requests that send or receive data that may be several megabytes (or bigger) in size. Waiting while this amount of data is transmitted could cause the client application to become unresponsive. Consider the following points when you need to handle requests that include significant amounts of data: + +* **Optimize requests and responses that involve large objects**. + + Some resources may be large objects or include large fields, such as graphics images or other types of binary data. A web API should support streaming to enable optimized uploading and downloading of these resources. + + The HTTP protocol provides the chunked transfer encoding mechanism to stream large data objects back to a client. When the client sends an HTTP GET request for a large object, the web API can send the reply back in piecemeal *chunks* over an HTTP connection. The length of the data in the reply may not be known initially (it might be generated), so the server hosting the web API should send a response message with each chunk that specifies the Transfer-Encoding: Chunked header rather than a Content-Length header. The client application can receive each chunk in turn to build up the complete response. The data transfer completes when the server sends back a final chunk with zero size. You can implement chunking in the ASP.NET Web API by using the `PushStreamContent` class. + + The following example shows an operation that responds to HTTP GET requests for product images: + + ```C# + public class ProductImagesController : ApiController + { + ... + [HttpGet] + [Route("productimages/{id:int}")] + public IHttpActionResult Get(int id) + { + try + { + var container = ConnectToBlobContainer(Constants.PRODUCTIMAGESCONTAINERNAME); + + if (!BlobExists(container, string.Format("image{0}.jpg", id))) + { + return NotFound(); + } + else + { + return new FileDownloadResult() + { + Container = container, + ImageId = id + }; + } + } + catch + { + return InternalServerError(); + } + } + ... + } + ``` + + In this example, `ConnectBlobToContainer` is a helper method that connects to a specified container (name not shown) in Azure Blob storage. `BlobExists` is another helper method that returns a Boolean value that indicates whether a blob with the specified name exists in the blob storage container. + + Each product has its own image held in blob storage. The `FileDownloadResult` class is a custom `IHttpActionResult` class that uses a `PushStreamContent` object to read the image data from appropriate blob and transmit it asynchronously as the content of the response message: + + ```C# + public class FileDownloadResult : IHttpActionResult + { + public CloudBlobContainer Container { get; set; } + public int ImageId { get; set; } + + public async Task ExecuteAsync(CancellationToken cancellationToken) + { + var response = new HttpResponseMessage(); + response.Content = new PushStreamContent(async (outputStream, _, __) => + { + try + { + CloudBlockBlob blockBlob = Container.GetBlockBlobReference(String.Format("image{0}.jpg", ImageId)); + await blockBlob.DownloadToStreamAsync(outputStream); + } + finally + { + outputStream.Close(); + } + }); + + response.StatusCode = HttpStatusCode.OK; + response.Content.Headers.ContentType = new MediaTypeHeaderValue("image/jpeg"); + return response; + } + } + ``` + + You can also apply streaming to upload operations if a client needs to POST a new resource that includes a large object. The next example shows the Post method for the `ProductImages` controller. This method enables the client to upload a new product image: + + ```C# + public class ProductImagesController : ApiController + { + ... + [HttpPost] + [Route("productimages")] + public async Task Post() + { + try + { + if (!Request.Content.Headers.ContentType.MediaType.Equals("image/jpeg")) + { + return StatusCode(HttpStatusCode.UnsupportedMediaType); + } + else + { + var id = new Random().Next(); // Use a random int as the key for the new resource. Should probably check that this key has not already been used + var container = ConnectToBlobContainer(Constants.PRODUCTIMAGESCONTAINERNAME); + return new FileUploadResult() + { + Container = container, + ImageId = id, + Request = Request + }; + } + } + catch + { + return InternalServerError(); + } + } + ... + } + ``` + + This code uses another custom `IHttpActionResult` class called `FileUploadResult`. This class contains the logic for uploading the data asynchronously: + + ```C# + public class FileUploadResult : IHttpActionResult + { + public CloudBlobContainer Container { get; set; } + public int ImageId { get; set; } + public HttpRequestMessage Request { get; set; } + + public async Task ExecuteAsync(CancellationToken cancellationToken) + { + var response = new HttpResponseMessage(); + CloudBlockBlob blockBlob = Container.GetBlockBlobReference(String.Format("image{0}.jpg", ImageId)); + await blockBlob.UploadFromStreamAsync(await Request.Content.ReadAsStreamAsync()); + var baseUri = string.Format("{0}://{1}:{2}", Request.RequestUri.Scheme, Request.RequestUri.Host, Request.RequestUri.Port); + response.Headers.Location = new Uri(string.Format("{0}/productimages/{1}", baseUri, ImageId)); + response.StatusCode = HttpStatusCode.OK; + return response; + } + } + ``` + + > [!TIP] + > The volume of data that you can upload to a web service is not constrained by streaming, and a single request could conceivably result in a massive object that consumes considerable resources. If, during the streaming process, the web API determines that the amount of data in a request has exceeded some acceptable bounds, it can abort the operation and return a response message with status code 413 (Request Entity Too Large). + > + > + + You can minimize the size of large objects transmitted over the network by using HTTP compression. This approach helps to reduce the amount of network traffic and the associated network latency, but at the cost of requiring additional processing at the client and the server hosting the web API. For example, a client application that expects to receive compressed data can include an Accept-Encoding: gzip request header (other data compression algorithms can also be specified). If the server supports compression it should respond with the content held in gzip format in the message body and the Content-Encoding: gzip response header. + + > [!TIP] + > You can combine encoded compression with streaming; compress the data first before streaming it, and specify the gzip content encoding and chunked transfer encoding in the message headers. Also note that some web servers (such as Internet Information Server) can be configured to automatically compress HTTP responses regardless of whether the web API compresses the data or not. + > + > +* **Implement partial responses for clients that do not support asynchronous operations**. + + As an alternative to asynchronous streaming, a client application can explicitly request data for large objects in chunks, known as partial responses. The client application sends an HTTP HEAD request to obtain information about the object. If the web API supports partial responses if should respond to the HEAD request with a response message that contains an Accept-Ranges header and a Content-Length header that indicates the total size of the object, but the body of the message should be empty. The client application can use this information to construct a series of GET requests that specify a range of bytes to receive. The web API should return a response message with HTTP status 206 (Partial Content), a Content-Length header that specifies the actual amount of data included in the body of the response message, and a Content-Range header that indicates which part (such as bytes 4000 to 8000) of the object this data represents. + + HTTP HEAD requests and partial responses are described in more detail in the API Design Guidance document. +* **Avoid sending unnecessary Continue status messages in client applications**. + + A client application that is about to send a large amount of data to a server may determine first whether the server is actually willing to accept the request. Prior to sending the data, the client application can submit an HTTP request with an Expect: 100-Continue header, a Content-Length header that indicates the size of the data, but an empty message body. If the server is willing to handle the request, it should respond with a message that specifies the HTTP status 100 (Continue). The client application can then proceed and send the complete request including the data in the message body. + + If you are hosting a service by using IIS, the HTTP.sys driver automatically detects and handles Expect: 100-Continue headers before passing requests to your web application. This means that you are unlikely to see these headers in your application code, and you can assume that IIS has already filtered any messages that it deems to be unfit or too large. + + If you are building client applications by using the .NET Framework, then all POST and PUT messages will first send messages with Expect: 100-Continue headers by default. As with the server-side, the process is handled transparently by the .NET Framework. However, this process results in each POST and PUT request causing 2 round-trips to the server, even for small requests. If your application is not sending requests with large amounts of data, you can disable this feature by using the `ServicePointManager` class to create `ServicePoint` objects in the client application. A `ServicePoint` object handles the connections that the client makes to a server based on the scheme and host fragments of URIs that identify resources on the server. You can then set the `Expect100Continue` property of the `ServicePoint` object to false. All subsequent POST and PUT requests made by the client through a URI that matches the scheme and host fragments of the `ServicePoint` object will be sent without Expect: 100-Continue headers. The following code shows how to configure a `ServicePoint` object that configures all requests sent to URIs with a scheme of `http` and a host of `www.contoso.com`. + + ```C# + Uri uri = new Uri("http://www.contoso.com/"); + ServicePoint sp = ServicePointManager.FindServicePoint(uri); + sp.Expect100Continue = false; + ``` + + You can also set the static `Expect100Continue` property of the `ServicePointManager` class to specify the default value of this property for all subsequently created `ServicePoint` objects. For more information, see the [ServicePoint Class](https://msdn.microsoft.com/library/system.net.servicepoint.aspx) page on the Microsoft website. +* **Support pagination for requests that may return large numbers of objects**. + + If a collection contains a large number of resources, issuing a GET request to the corresponding URI could result in significant processing on the server hosting the web API affecting performance, and generate a significant amount of network traffic resulting in increased latency. + + To handle these cases, the web API should support query strings that enable the client application to refine requests or fetch data in more manageable, discrete blocks (or pages). The ASP.NET Web API framework parses query strings and splits them up into a series of parameter/value pairs which are passed to the appropriate method, following the routing rules described earlier. The method should be implemented to accept these parameters using the same names specified in the query string. Additionally, these parameters should be optional (in case the client omits the query string from a request) and have meaningful default values. The code below shows the `GetAllOrders` method in the `Orders` controller. This method retrieves the details of orders. If this method was unconstrained, it could conceivably return a large amount of data. The `limit` and `offset` parameters are intended to reduce the volume of data to a smaller subset, in this case only the first 10 orders by default: + + ```C# + public class OrdersController : ApiController + { + ... + [Route("api/orders")] + [HttpGet] + public IEnumerable GetAllOrders(int limit=10, int offset=0) + { + // Find the number of orders specified by the limit parameter + // starting with the order specified by the offset parameter + var orders = ... + return orders; + } + ... + } + ``` + + A client application can issue a request to retrieve 30 orders starting at offset 50 by using the URI *http://www.adventure-works.com/api/orders?limit=30&offset=50*. + + > [!TIP] + > Avoid enabling client applications to specify query strings that result in a URI that is more than 2000 characters long. Many web clients and servers cannot handle URIs that are this long. + > + > + + + +## Considerations for maintaining responsiveness, scalability, and availability +The same web API might be utilized by many client applications running anywhere in the world. It is important to ensure that the web API is implemented to maintain responsiveness under a heavy load, to be scalable to support a highly varying workload, and to guarantee availability for clients that perform business-critical operations. Consider the following points when determining how to meet these requirements: + +* **Provide Asynchronous Support for Long-Running Requests**. + + A request that might take a long time to process should be performed without blocking the client that submitted the request. The web API can perform some initial checking to validate the request, initiate a separate task to perform the work, and then return a response message with HTTP code 202 (Accepted). The task could run asynchronously as part of the web API processing, or it could be offloaded to an Azure WebJob (if the web API is hosted by an Azure Website) or a worker role (if the web API is implemented as an Azure cloud service). + + > [!NOTE] + > For more information about using WebJobs with Azure Website, visit the page [Use WebJobs to run background tasks in Microsoft Azure Websites](/azure/app-service-web/web-sites-create-web-jobs/) on the Microsoft website. + > + > + + The web API should also provide a mechanism to return the results of the processing to the client application. You can achieve this by providing a polling mechanism for client applications to periodically query whether the processing has finished and obtain the result, or enabling the web API to send a notification when the operation has completed. + + You can implement a simple polling mechanism by providing a *polling* URI that acts as a virtual resource using the following approach: + + 1. The client application sends the initial request to the web API. + 2. The web API stores information about the request in a table held in table storage or Microsoft Azure Cache, and generates a unique key for this entry, possibly in the form of a GUID. + 3. The web API initiates the processing as a separate task. The web API records the state of the task in the table as *Running*. + 4. The web API returns a response message with HTTP status code 202 (Accepted), and the GUID of the table entry in the body of the message. + 5. When the task has completed, the web API stores the results in the table, and sets the state of the task to *Complete*. Note that if the task fails, the web API could also store information about the failure and set the status to *Failed*. + 6. While the task is running, the client can continue performing its own processing. It can periodically send a request to the URI */polling/{guid}* where *{guid}* is the GUID returned in the 202 response message by the web API. + 7. The web API at the */polling/{guid}* URI queries the state of the corresponding task in the table and returns a response message with HTTP status code 200 (OK) containing this state (*Running*, *Complete*, or *Failed*). If the task has completed or failed, the response message can also include the results of the processing or any information available about the reason for the failure. + + If you prefer to implement notifications, the options available include: + 8. Using an Azure Notification Hub to push asynchronous responses to client applications. The page [Azure Notification Hubs Notify Users](/azure/notification-hubs/notification-hubs-aspnet-backend-windows-dotnet-wns-notification/) on the Microsoft website provides further details. + 9. Using the Comet model to retain a persistent network connection between the client and the server hosting the web API, and using this connection to push messages from the server back to the client. The MSDN magazine article [Building a Simple Comet Application in the Microsoft .NET Framework](https://msdn.microsoft.com/magazine/jj891053.aspx) describes an example solution. + 10. Using SignalR to push data in real-time from the web server to the client over a persistent network connection. SignalR is available for ASP.NET web applications as a NuGet package. You can find more information on the [ASP.NET SignalR](http://signalr.net/) website. + + > [!NOTE] + > Comet and SignalR both utilize persistent network connections between the web server and the client application. This can affect scalability as a large number of clients may require an equally large number of concurrent connections. + > + > +* **Ensure that each request is stateless**. + + Each request should be considered atomic. There should be no dependencies between one request made by a client application and any subsequent requests submitted by the same client. This approach assists in scalability; instances of the web service can be deployed on a number of servers. Client requests can be directed at any of these instances and the results should always be the same. It also improves availability for a similar reason; if a web server fails requests can be routed to another instance (by using Azure Traffic Manager) while the server is restarted with no ill effects on client applications. +* **Track clients and implement throttling to reduce the chances of DOS attacks**. + + If a specific client makes a large number of requests within a given period of time it might monopolize the service and affect the performance of other clients. To mitigate this issue, a web API can monitor calls from client applications either by tracking the IP address of all incoming requests or by logging each authenticated access. You can use this information to limit resource access. If a client exceeds a defined limit, the web API can return a response message with status 503 (Service Unavailable) and include a Retry-After header that specifies when the client can send the next request without it being declined. This strategy can help to reduce the chances of a Denial Of Service (DOS) attack from a set of clients stalling the system. +* **Manage persistent HTTP connections carefully**. + + The HTTP protocol supports persistent HTTP connections where they are available. The HTTP 1.0 specificiation added the Connection:Keep-Alive header that enables a client application to indicate to the server that it can use the same connection to send subsequent requests rather than opening new ones. The connection closes automatically if the client does not reuse the connection within a period defined by the host. This behavior is the default in HTTP 1.1 as used by Azure services, so there is no need to include Keep-Alive headers in messages. + + Keeping a connection open can help to improve responsiveness by reducing latency and network congestion, but it can be detrimental to scalability by keeping unnecessary connections open for longer than required, limiting the ability of other concurrent clients to connect. It can also affect battery life if the client application is running on a mobile device; if the application only makes occasional requests to the server, maintaining an open connection can cause the battery to drain more quickly. To ensure that a connection is not made persistent with HTTP 1.1, the client can include a Connection:Close header with messages to override the default behavior. Similarly, if a server is handling a very large number of clients it can include a Connection:Close header in response messages which should close the connection and save server resources. + + > [!NOTE] + > Persistent HTTP connections are a purely optional feature to reduce the network overhead associated with repeatedly establishing a communications channel. Neither the web API nor the client application should depend on a persistent HTTP connection being available. Do not use persistent HTTP connections to implement Comet-style notification systems; instead you should utilize sockets (or websockets if available) at the TCP layer. Finally, note Keep-Alive headers are of limited use if a client application communicates with a server via a proxy; only the connection with the client and the proxy will be persistent. + > + > + +## Considerations for publishing and managing a web API +To make a web API available for client applications, the web API must be deployed to a host environment. This environment is typically a web server, although it may be some other type of host process. You should consider the following points when publishing a web API: + +* All requests must be authenticated and authorized, and the appropriate level of access control must be enforced. +* A commercial web API might be subject to various quality guarantees concerning response times. It is important to ensure that host environment is scalable if the load can vary significantly over time. +* If may be necessary to meter requests for monetization purposes. +* It might be necessary to regulate the flow of traffic to the web API, and implement throttling for specific clients that have exhausted their quotas. +* Regulatory requirements might mandate logging and auditing of all requests and responses. +* To ensure availability, it may be necessary to monitor the health of the server hosting the web API and restart it if necessary. + +It is useful to be able to decouple these issues from the technical issues concerning the implementation of the web API. For this reason, consider creating a [façade](http://en.wikipedia.org/wiki/Facade_pattern), running as a separate process and that routes requests to the web API. The façade can provide the management operations and forward validated requests to the web API. Using a façade can also bring many functional advantages, including: + +* Acting as an integration point for multiple web APIs. +* Transforming messages and translating communications protocols for clients built by using varying technologies. +* Caching requests and responses to reduce load on the server hosting the web API. + +## Considerations for testing a web API +A web API should be tested as thoroughly as any other piece of software. You should consider creating unit tests to validate the functionality of each operation, as you would with any other type of application. For more information, see the page [Verifying Code by Using Unit Tests](https://msdn.microsoft.com/library/dd264975.aspx) on the Microsoft website. + +> [!NOTE] +> The sample web API available with this guidance includes a test project that shows how to perform unit testing over selected operations. +> +> + +The nature of a web API brings its own additional requirements to verify that it operates correctly. You should pay particular attention to the following aspects: + +* Test all routes to verify that they invoke the correct operations. Be especially aware of HTTP status code 405 (Method Not Allowed) being returned unexpectedly as this can indicate a mismatch between a route and the HTTP methods (GET, POST, PUT, DELETE) that can be dispatched to that route. + + Send HTTP requests to routes that do not support them, such as submitting a POST request to a specific resource (POST requests should only be sent to resource collections). In these cases, the only valid response *should* be status code 405 (Not Allowed). +* Verify that all routes are protected properly and are subject to the appropriate authentication and authorization checks. + + > [!NOTE] + > Some aspects of security such as user authentication are most likely to be the responsibility of the host environment rather than the web API, but it is still necessary to include security tests as part of the deployment process. + > + > +* Test the exception handling performed by each operation and verify that an appropriate and meaningful HTTP response is passed back to the client application. +* Verify that request and response messages are well-formed. For example, if an HTTP POST request contains the data for a new resource in x-www-form-urlencoded format, confirm that the corresponding operation correctly parses the data, creates the resources, and returns a response containing the details of the new resource, including the correct Location header. +* Verify all links and URIs in response messages. For example, an HTTP POST message should return the URI of the newly-created resource. All HATEOAS links should be valid. + + > [!IMPORTANT] + > If you publish the web API through an API Management Service, then these URIs should reflect the URL of the management service and not that of the web server hosting the web API. + > + > +* Ensure that each operation returns the correct status codes for different combinations of input. For example: + + * If a query is successful, it should return status code 200 (OK) + * If a resource is not found, the operation should return HTTP status code 404 (Not Found). + * If the client sends a request that successfully deletes a resource, the status code should be 204 (No Content). + * If the client sends a request that creates a new resource, the status code should be 201 (Created) + +Watch out for unexpected response status codes in the 5xx range. These messages are usually reported by the host server to indicate that it was unable to fulfill a valid request. + +* Test the different request header combinations that a client application can specify and ensure that the web API returns the expected information in response messages. +* Test query strings. If an operation can take optional parameters (such as pagination requests), test the different combinations and order of parameters. +* Verify that asynchronous operations complete successfully. If the web API supports streaming for requests that return large binary objects (such as video or audio), ensure that client requests are not blocked while the data is streamed. If the web API implements polling for long-running data modification operations, verify that that the operations report their status correctly as they proceed. + +You should also create and run performance tests to check that the web API operates satisfactorily under duress. You can build a web performance and load test project by using Visual Studio Ultimate. For more information, see the page [Run performance tests on an application before a release](https://msdn.microsoft.com/library/dn250793.aspx) on the Microsoft website. + +## Publishing and managing a web API by using the Azure API Management Service +Azure provides the [API Management Service](https://azure.microsoft.com/documentation/services/api-management/) which you can use to publish and manage a web API. Using this facility, you can generate a service that acts a façade for one or more web APIs. The service is itself a scalable web service that you can create and configure by using the Azure Management portal. You can use this service to publish and manage a web API as follows: + +1. Deploy the web API to a website, Azure cloud service, or Azure virtual machine. +2. Connect the API management service to the web API. Requests sent to the URL of the management API are mapped to URIs in the web API. The same API management service can route requests to more than one web API. This enables you to aggregate multiple web APIs into a single management service. Similarly, the same web API can be referenced from more than one API management service if you need to restrict or partition the functionality available to different applications. + + > [!NOTE] + > The URIs in HATEOAS links generated as part of the response for HTTP GET requests should reference the URL of the API management service and not the web server hosting the web API. + > + > +3. For each web API, specify the HTTP operations that the web API exposes together with any optional parameters that an operation can take as input. You can also configure whether the API management service should cache the response received from the web API to optimize repeated requests for the same data. Record the details of the HTTP responses that each operation can generate. This information is used to generate documentation for developers, so it is important that it is accurate and complete. + + You can either define operations manually using the wizards provided by the Azure Management portal, or you can import them from a file containing the definitions in WADL or Swagger format. +4. Configure the security settings for communications between the API management service and the web server hosting the web API. The API management service currently supports Basic authentication and mutual authentication using certificates, and OAuth 2.0 user authorization. +5. Create a product. A product is the unit of publication; you add the web APIs that you previously connected to the management service to the product. When the product is published, the web APIs become available to developers. + + > [!NOTE] + > Prior to publishing a product, you can also define user-groups that can access the product and add users to these groups. This gives you control over the developers and applications that can use the web API. If a web API is subject to approval, prior to being able to access it a developer must send a request to the product administrator. The administrator can grant or deny access to the developer. Existing developers can also be blocked if circumstances change. + > + > +6. Configure policies for each web API. Policies govern aspects such as whether cross-domain calls should be allowed, how to authenticate clients, whether to convert between XML and JSON data formats transparently, whether to restrict calls from a given IP range, usage quotas, and whether to limit the call rate. Policies can be applied globally across the entire product, for a single web API in a product, or for individual operations in a web API. + +You can find full details describing how to perform these tasks on the [API Management](https://azure.microsoft.com/services/api-management/) page on the Microsoft website. The Azure API Management Service also provides its own REST interface, enabling you to build a custom interface for simplifying the process of configuring a web API. For more information, visit the [Azure API Management REST API Reference](https://msdn.microsoft.com/library/azure/dn776326.aspx) page on the Microsoft website. + +> [!TIP] +> Azure provides the Azure Traffic Manager which enables you to implement failover and load-balancing, and reduce latency across multiple instances of a web site hosted in different geographic locations. You can use Azure Traffic Manager in conjunction with the API Management Service; the API Management Service can route requests to instances of a web site through Azure Traffic Manager. For more information, visit the [Traffic Manager routing Methods](/azure/traffic-manager/traffic-manager-routing-methods/) page on the Microsoft website. +> +> In this structure, if you are using custom DNS names for your web sites, you should configure the appropriate CNAME record for each web site to point to the DNS name of the Azure Traffic Manager web site. +> +> + +## Supporting developers building client applications +Developers constructing client applications typically require information on how to access the web API, and documentation concerning the parameters, data types, return types, and return codes that describe the different requests and responses between the web service and the client application. + +### Documenting the REST operations for a web API +The Azure API Management Service includes a developer portal that describes the REST operations exposed by a web API. When a product has been published it appears on this portal. Developers can use this portal to sign up for access; the administrator can then approve or deny the request. If the developer is approved, they are assigned a subscription key that is used to authenticate calls from the client applications that they develop. This key must be provided with each web API call otherwise it will be rejected. + +This portal also provides: + +* Documentation for the product, listing the operations that it exposes, the parameters required, and the different responses that can be returned. Note that this information is generated from the details provided in step 3 in the list in the Publishing a web API by using the Microsoft Azure API Management Service section. +* Code snippets that show how to invoke operations from several languages, including JavaScript, C#, Java, Ruby, Python, and PHP. +* A developers' console that enables a developer to send an HTTP request to test each operation in the product and view the results. +* A page where the developer can report any issues or problems found. + +The Azure Management portal enables you to customize the developer portal to change the styling and layout to match the branding of your organization. + +### Implementing a client SDK +Building a client application that invokes REST requests to access a web API requires writing a significant amount of code to construct each request and format it appropriately, send the request to the server hosting the web service, and parse the response to work out whether the request succeeded or failed and extract any data returned. To insulate the client application from these concerns, you can provide an SDK that wraps the REST interface and abstracts these low-level details inside a more functional set of methods. A client application uses these methods, which transparently convert calls into REST requests and then convert the responses back into method return values. This is a common technique that is implemented by many services, including the Azure SDK. + +Creating a client-side SDK is a considerable undertaking as it has to be implemented consistently and tested carefully. However, much of this process can be made mechanical, and many vendors supply tools that can automate many of these tasks. + +## Monitoring a web API +Depending on how you have published and deployed your web API you can monitor the web API directly, or you can gather usage and health information by analyzing the traffic that passes through the API Management service. + +### Monitoring a web API directly +If you have implemented your web API by using the ASP.NET Web API template (either as a Web API project or as a Web role in an Azure cloud service) and Visual Studio 2013, you can gather availability, performance, and usage data by using ASP.NET Application Insights. Application Insights is a package that transparently tracks and records information about requests and responses when the web API is deployed to the cloud; once the package is installed and configured, you don't need to amend any code in your web API to use it. When you deploy the web API to an Azure web site, all traffic is examined and the following statistics are gathered: + +* Server response time. +* Number of server requests and the details of each request. +* The top slowest requests in terms of average response time. +* The details of any failed requests. +* The number of sessions initiated by different browsers and user agents. +* The most frequently viewed pages (primarily useful for web applications rather than web APIs). +* The different user roles accessing the web API. + +You can view this data in real time from the Azure Management portal. You can also create webtests that monitor the health of the web API. A webtest sends a periodic request to a specified URI in the web API and captures the response. You can specify the definition of a successful response (such as HTTP status code 200), and if the request does not return this response you can arrange for an alert to be sent to an administrator. If necessary, the administrator can restart the server hosting the web API if it has failed. + +The [Application Insights - Get started with ASP.NET](/azure/application-insights/app-insights-asp-net/) page on the Microsoft website provides more information. + +### Monitoring a web API through the API Management Service +If you have published your web API by using the API Management service, the API Management page on the Azure Management portal contains a dashboard that enables you to view the overall performance of the service. The Analytics page enables you to drill down into the details of how the product is being used. This page contains the following tabs: + +* **Usage**. This tab provides information about the number of API calls made and the bandwidth used to handle these calls over time. You can filter usage details by product, API, and operation. +* **Health**. This tab enables you view the outcome of API requests (the HTTP status codes returned), the effectiveness of the caching policy, the API response time, and the service response time. Again, you can filter health data by product, API, and operation. +* **Activity**. This tab provides a text summary of the numbers of successful calls, failed called, blocked calls, average response time, and response times for each product, web API, and operation. This page also lists the number of calls made by each developer. +* **At a glance**. This tab displays a summary of the performance data, including the developers responsible for making the most API calls, and the products, web APIs, and operations that received these calls. + +You can use this information to determine whether a particular web API or operation is causing a bottleneck, and if necessary scale the host environment and add more servers. You can also ascertain whether one or more applications are using a disproportionate volume of resources and apply the appropriate policies to set quotas and limit call rates. + +> [!NOTE] +> You can change the details for a published product, and the changes are applied immediately. For example, you can add or remove an operation from a web API without requiring that you republish the product that contains the web API. +> +> + +## Related patterns +* The [façade](http://en.wikipedia.org/wiki/Facade_pattern) pattern describes how to provide an interface to a web API. + +## More information +* The page [Learn About ASP.NET Web API](http://www.asp.net/web-api) on the Microsoft website provides a detailed introduction to building RESTful web services by using the Web API. +* The page [Routing in ASP.NET Web API](http://www.asp.net/web-api/overview/web-api-routing-and-actions/routing-in-aspnet-web-api) on the Microsoft website describes how convention-based routing works in the ASP.NET Web API framework. +* For more information on attribute-based routing, see the page [Attribute Routing in Web API 2](http://www.asp.net/web-api/overview/web-api-routing-and-actions/attribute-routing-in-web-api-2) on the Microsoft website. +* The [Basic Tutorial](http://www.odata.org/getting-started/basic-tutorial/) page on the OData website provides an introduction to the features of the OData protocol. +* The [ASP.NET Web API OData](http://www.asp.net/web-api/overview/odata-support-in-aspnet-web-api) page on the Microsoft website contains examples and further information on implementing an OData web API by using ASP.NET. +* The page [Introducing Batch Support in Web API and Web API OData](http://blogs.msdn.com/b/webdev/archive/2013/11/01/introducing-batch-support-in-web-api-and-web-api-odata.aspx) on the Microsoft website describes how to implement batch operations in a web API by using OData. +* The article [Idempotency Patterns](http://blog.jonathanoliver.com/idempotency-patterns/) on Jonathan Oliver’s blog provides an overview of idempotency and how it relates to data management operations. +* The [Status Code Definitions](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html) page on the W3C website contains a full list of HTTP status codes and their descriptions. +* For detailed information on handling HTTP exceptions with the ASP.NET Web API, visit the [Exception Handling in ASP.NET Web API](http://www.asp.net/web-api/overview/error-handling/exception-handling) page on the Microsoft website. +* The article [Web API Global Error Handling](http://www.asp.net/web-api/overview/error-handling/web-api-global-error-handling) on the Microsoft website describes how to implement a global error handling and logging strategy for a web API. +* The page [Run background tasks with WebJobs](/azure/app-service-web/web-sites-create-web-jobs/) on the Microsoft website provides information and examples on using WebJobs to perform background operations on an Azure Website. +* The page [Azure Notification Hubs Notify Users](/azure/notification-hubs/notification-hubs-aspnet-backend-windows-dotnet-wns-notification/) on the Microsoft website shows how you can use an Azure Notification Hub to push asynchronous responses to client applications. +* The [API Management](https://azure.microsoft.com/services/api-management/) page on the Microsoft website describes how to publish a product that provides controlled and secure access to a web API. +* The [Azure API Management REST API Reference](https://msdn.microsoft.com/library/azure/dn776326.aspx) page on the Microsoft website describes how to use the API Management REST API to build custom management applications. +* The [Traffic Manager Routing Methods](/azure/traffic-manager/traffic-manager-routing-methods/) page on the Microsoft website summarizes how Azure Traffic Manager can be used to load-balance requests across multiple instances of a website hosting a web API. +* The [Application Insights - Get started with ASP.NET](/azure/application-insights/app-insights-asp-net/) page on the Microsoft website provides detailed information on installing and configuring Application Insights in an ASP.NET Web API project. +* The page [Verifying Code by Using Unit Tests](https://msdn.microsoft.com/library/dd264975.aspx) on the Microsoft website provides detailed information on creating and managing unit tests by using Visual Studio. +* The page [Run performance tests on an application before a release](https://msdn.microsoft.com/library/dn250793.aspx) on the Microsoft website describes how to use Visual Studio Ultimate to create a web performance and load test project. diff --git a/docs/best-practices/auto-scaling.md b/docs/best-practices/auto-scaling.md new file mode 100644 index 00000000000..2854424baa2 --- /dev/null +++ b/docs/best-practices/auto-scaling.md @@ -0,0 +1,116 @@ +--- +title: Autoscaling guidance +description: Guidance on how to autoscale to dynamically allocate resources required by an application. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: 25bba4f9-9ca5-48d1-ac8b-08a3441ba696 +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/13/2016 +ms.author: masashin + +--- +# Autoscaling +[!INCLUDE [header](../_includes/header.md)] + +Autoscaling is the process of dynamically allocating the resources required by an application to match performance requirements and satisfy service-level agreements (SLAs), while minimizing runtime costs. As the volume of work grows, an application may require additional resources to enable it to perform its tasks in a timely manner. As demand slackens, resources can be de-allocated to minimize costs, while still maintaining adequate performance and meeting SLAs. +Autoscaling takes advantage of the elasticity of cloud-hosted environments while easing management overhead. It does so by reducing the need for an operator to continually monitor the performance of a system and make decisions about adding or removing resources. + +> [!NOTE] +> Autoscaling applies to all of the resources used by an application, not just the compute resources. For example, if your system uses message queues to send and receive information, it could create additional queues as it scales. +> +> + +## Types of scaling +Scaling typically takes one of the following two forms: + +* **Vertical** (often referred to as *scaling up and down*). This form requires that you modify the hardware (expand or reduce its capacity and performance), or redeploy the solution using alternative hardware that has the appropriate capacity and performance. In a cloud environment, the hardware platform is typically a virtualized environment. Unless the original hardware was substantially overprovisioned, with the consequent upfront capital expense, vertically scaling up in this environment involves provisioning more powerful resources, and then moving the system onto these new resources. Vertical scaling is often a disruptive process that requires making the system temporarily unavailable while it is being redeployed. It may be possible to keep the original system running while the new hardware is provisioned and brought online, but there will likely be some interruption while the processing transitions from the old environment to the new one. It is uncommon to use autoscaling to implement a vertical scaling strategy. +* **Horizontal** (often referred to as *scaling out and in*). This form requires deploying the solution on additional or fewer resources, which are typically commodity resources rather than high-powered systems. The solution can continue running without interruption while these resources are provisioned. When the provisioning process is complete, copies of the elements that comprise the solution can be deployed on these additional resources and made available. If demand drops, the additional resources can be reclaimed after the elements using them have been shut down cleanly. Many cloud-based systems, including Microsoft Azure, support automation of this form of scaling. + +## Implement an autoscaling strategy +Implementing an autoscaling strategy typically involves the following components and processes: + +* Instrumentation and monitoring systems at the application, service, and infrastructure levels. These systems capture key metrics, such as response times, queue lengths, CPU utilization, and memory usage. +* Decision-making logic that can evaluate the monitored scaling factors against predefined system thresholds or schedules, and make decisions regarding whether to scale or not. +* Components that are responsible for carrying out tasks associated with scaling the system, such as provisioning or de-provisioning resources. +* Testing, monitoring, and tuning of the autoscaling strategy to ensure that it functions as expected. + +Most cloud-based environments, such as Azure, provide built-in autoscaling mechanisms that address common scenarios. If the environment or service you use doesn't provide the necessary automated scaling functionality, or if you have extreme autoscaling requirements beyond its capabilities, a custom implementation may be necessary. Use this customized implementation to collect operational and system metrics, analyze them to identify relevant data, and then scale resources accordingly. + +## Configure autoscaling for an Azure solution +There are several options for configuring autoscaling for your Azure solutions: + +* **Azure Autoscale** supports the most common scaling scenarios based on a schedule and, optionally, triggered scaling operations based on runtime metrics (such as processor utilization, queue length, or built-in and custom counters). You can configure simple autoscaling policies for a solution quickly and easily by using the Azure portal. For more detailed control, you can make use of the [Azure Service Management REST API](https://msdn.microsoft.com/library/azure/ee460799.aspx) or the [Azure Resource Manager REST API](https://msdn.microsoft.com//library/azure/dn790568.aspx). The [Azure Monitoring Service Management Library](http://www.nuget.org/packages/Microsoft.WindowsAzure.Management.Monitoring) and the [Microsoft Insights Library](https://www.nuget.org/packages/Microsoft.Azure.Insights/) (in preview) are SDKs that allow collecting metrics from different resources, and perform autoscaling by making use of the REST APIs. For resources where Azure Resource Manager support isn't available, or if you are using Azure Cloud Services, the Service Management REST API can be used for autoscaling. In all other cases, use Azure Resource Manager. +* **A custom solution**, based on your instrumentation on the application, and management features of Azure, can be useful. For example, you could use Azure diagnostics or other methods of instrumentation in your application, along with custom code to continually monitor and export metrics of the application. You could have custom rules that work on these metrics, and make use of the Service Management or Resource Manager REST API's to trigger autoscaling. The metrics for triggering a scaling operation can be any built-in or custom counter, or other instrumentation you implement within the application. However, a custom solution is not simple to implement, and should be considered only if none of the previous approaches can fulfill your requirements. The [Autoscaling Application Block](http://msdn.microsoft.com/library/hh680892%28v=pandp.50%29.aspx) makes use of this approach. +* **Third-party services**, such as [Paraleap AzureWatch](http://www.paraleap.com/AzureWatch), enable you to scale a solution based on schedules, service load and system performance indicators, custom rules, and combinations of different types of rules. + +When choosing which autoscaling solution to adopt, consider the following points: + +* Use the built-in autoscaling features of the platform, if they can meet your requirements. If not, carefully consider whether you really do need more complex scaling features. Some examples of additional requirements may include more granularity of control, different ways to detect trigger events for scaling, scaling across subscriptions, and scaling other types of resources. +* Consider if you can predict the load on the application with sufficient accuracy to depend only on scheduled autoscaling (adding and removing instances to meet anticipated peaks in demand). Where this isn't possible, use reactive autoscaling based on metrics collected at runtime, to allow the application to handle unpredictable changes in demand. Typically, you can combine these approaches. For example, create a strategy that adds resources such as compute, storage, and queues, based on a schedule of the times when you know the application is most busy. This helps to ensure that capacity is available when required, without the delay encountered when starting new instances. In addition, for each scheduled rule, define metrics that allow reactive autoscaling during that period to ensure that the application can handle sustained but unpredictable peaks in demand. +* It's often difficult to understand the relationship between metrics and capacity requirements, especially when an application is initially deployed. Prefer to provision a little extra capacity at the beginning, and then monitor and tune the autoscaling rules to bring the capacity closer to the actual load. + +### Use Azure Autoscale +Autoscale enables you to configure scale out and scale in options for a solution. Autoscale can automatically add and remove instances of Azure Cloud Services web and worker roles, Azure Mobile Services, and Web Apps feature in Azure App Service. It can also enable automatic scaling by starting and stopping instances of Azure Virtual Machines. An Azure autoscaling strategy includes two sets of factors: + +* Schedule-based autoscaling that can ensure additional instances are available to coincide with an expected peak in usage, and can scale in once the peak time has passed. This enables you to ensure that you have sufficient instances already running, without waiting for the system to react to the load. +* Metrics-based autoscaling that reacts to factors such as average CPU utilization over the last hour, or the backlog of messages that the solution is processing in an Azure storage or Azure Service Bus queue. This allows the application to react separately from the scheduled autoscaling rules to accommodate unplanned or unforeseen changes in demand. + +Consider the following points when using Autoscale: + +* Your autoscaling strategy combines both scheduled and metrics-based scaling. You can specify both types of rules for a service. +* You should configure the autoscaling rules, and then monitor the performance of your application over time. Use the results of this monitoring to adjust the way in which the system scales if necessary. However, keep in mind that autoscaling is not an instantaneous process. It takes time to react to a metric such as average CPU utilization exceeding (or falling below) a specified threshold. +* Autoscaling rules that use a detection mechanism based on a measured trigger attribute (such as CPU usage or queue length) use an aggregated value over time, rather than instantaneous values, to trigger an autoscaling action. By default, the aggregate is an average of the values. This prevents the system from reacting too quickly, or causing rapid oscillation. It also allows time for new instances that are auto-started to settle into running mode, preventing additional autoscaling actions from occurring while the new instances are starting up. For Azure Cloud Services and Azure Virtual Machines, the default period for the aggregation is 45 minutes, so it can take up to this period of time for the metric to trigger autoscaling in response to spikes in demand. You can change the aggregation period by using the SDK, but be aware that periods of fewer than 25 minutes may cause unpredictable results (for more information, see [Auto Scaling Cloud Services on CPU Percentage with the Azure Monitoring Services Management Library](http://rickrainey.com/2013/12/15/auto-scaling-cloud-services-on-cpu-percentage-with-the-windows-azure-monitoring-services-management-library/)). For Web Apps, the averaging period is much shorter, allowing new instances to be available in about five minutes after a change to the average trigger measure. +* If you configure autoscaling using the SDK rather than the web portal, you can specify a more detailed schedule during which the rules are active. You can also create your own metrics and use them with or without any of the existing ones in your autoscaling rules. For example, you may wish to use alternative counters, such as the number of requests per second or the average memory availability, or use custom counters that measure specific business processes. +* When autoscaling Azure Virtual Machines, you must deploy a number of instances of the virtual machine that is equal to the maximum number you will allow autoscaling to start. These instances must be part of the same availability set. The Virtual Machines autoscaling mechanism does not create or delete instances of the virtual machine; instead, the autoscaling rules you configure will start and stop an appropriate number of these instances. For more information, see [Automatically scale an application running Web Roles, Worker Roles, or Virtual Machines](/azure/cloud-services/cloud-services-how-to-scale/). +* If new instances cannot be started, perhaps because the maximum for a subscription has been reached or an error occurs during startup, the portal may show that an autoscaling operation succeeded. However, subsequent **ChangeDeploymentConfiguration** events displayed in the portal will show only that a service startup was requested, and there will be no event to indicate it was successfully completed. +* You can use the web portal UI to link resources such as SQL Database instances and queues to a compute service instance. This allows you to more easily access the separate manual and automatic scaling configuration options for each of the linked resources. For more information, see "How to: Link a resource to a cloud service" in [How to Manage Cloud Services](/azure/cloud-services/cloud-services-how-to-manage) and [How to Scale an Application](/azure/cloud-services/cloud-services-how-to-scale/). +* When you configure multiple policies and rules, they could conflict with each other. Autoscale uses the following conflict resolution rules to ensure that there is always a sufficient number of instances running: + * Scale out operations always take precedence over scale in operations. + * When scale out operations conflict, the rule that initiates the largest increase in the number of instances takes precedence. + * When scale in operations conflict, the rule that initiates the smallest decrease in the number of instances takes precedence. + + + +## Application design considerations for implementing autoscaling +Autoscaling isn't an instant solution. Simply adding resources to a system or running more instances of a process doesn't guarantee that the performance of the system will improve. Consider the following points when designing an autoscaling strategy: + +* The system must be designed to be horizontally scalable. Avoid making assumptions about instance affinity; do not design solutions that require that the code is always running in a specific instance of a process. When scaling a cloud service or web site horizontally, don't assume that a series of requests from the same source will always be routed to the same instance. For the same reason, design services to be stateless to avoid requiring a series of requests from an application to always be routed to the same instance of a service. When designing a service that reads messages from a queue and processes them, don't make any assumptions about which instance of the service handles a specific message. Autoscaling could start additional instances of a service as the queue length grows. The [Competing Consumers Pattern](http://msdn.microsoft.com/library/dn568101.aspx) describes how to handle this scenario. +* If the solution implements a long-running task, design this task to support both scaling out and scaling in. Without due care, such a task could prevent an instance of a process from being shut down cleanly when the system scales in, or it could lose data if the process is forcibly terminated. Ideally, refactor a long-running task and break up the processing that it performs into smaller, discrete chunks. The [Pipes and Filters Pattern](http://msdn.microsoft.com/library/dn568100.aspx) provides an example of how you can achieve this. +* Alternatively, you can implement a checkpoint mechanism that records state information about the task at regular intervals, and save this state in durable storage that can be accessed by any instance of the process running the task. In this way, if the process is shutdown, the work that it was performing can be resumed from the last checkpoint by using another instance. +* When background tasks run on separate compute instances, such as in worker roles of a cloud services hosted application, you may need to scale different parts of the application using different scaling policies. For example, you may need to deploy additional user interface (UI) compute instances without increasing the number of background compute instances, or the opposite of this. If you offer different levels of service (such as basic and premium service packages), you may need to scale out the compute resources for premium service packages more aggressively than those for basic service packages in order to meet SLAs. +* Consider using the length of the queue over which UI and background compute instances communicate as a criterion for your autoscaling strategy. This is the best indicator of an imbalance or difference between the current load and the processing capacity of the background task. +* If you base your autoscaling strategy on counters that measure business processes, such as the number of orders placed per hour or the average execution time of a complex transaction, ensure that you fully understand the relationship between the results from these types of counters and the actual compute capacity requirements. It may be necessary to scale more than one component or compute unit in response to changes in business process counters. +* To prevent a system from attempting to scale out excessively, and to avoid the costs associated with running many thousands of instances, consider limiting the maximum number of instances that can be automatically added. Most autoscaling mechanisms allow you to specify the minimum and maximum number of instances for a rule. In addition, consider gracefully degrading the functionality that the system provides if the maximum number of instances have been deployed, and the system is still overloaded. +* Keep in mind that autoscaling might not be the most appropriate mechanism to handle a sudden burst in workload. It takes time to provision and start new instances of a service or add resources to a system, and the peak demand may have passed by the time these additional resources have been made available. In this scenario, it may be better to throttle the service. For more information, see the [Throttling Pattern](http://msdn.microsoft.com/library/dn589798.aspx). +* Conversely, if you do need the capacity to process all requests when the volume fluctuates rapidly, and cost isn't a major contributing factor, consider using an aggressive autoscaling strategy that starts additional instances more quickly. You can also use a scheduled policy that starts a sufficient number of instances to meet the maximum load before that load is expected. +* The autoscaling mechanism should monitor the autoscaling process, and log the details of each autoscaling event (what triggered it, what resources were added or removed, and when). If you create a custom autoscaling mechanism, ensure that it incorporates this capability. Analyze the information to help measure the effectiveness of the autoscaling strategy, and tune it if necessary. You can tune both in the short term, as the usage patterns become more obvious, and over the long term, as the business expands or the requirements of the application evolve. If an application reaches the upper limit defined for autoscaling, the mechanism might also alert an operator who could manually start additional resources if necessary. Note that, under these circumstances, the operator may also be responsible for manually removing these resources after the workload eases. + +## Related patterns and guidance +The following patterns and guidance may also be relevant to your scenario when implementing autoscaling: + +* [Throttling Pattern](http://msdn.microsoft.com/library/dn589798.aspx). This pattern describes how an application can continue to function and meet SLAs when an increase in demand places an extreme load on resources. Throttling can be used with autoscaling to prevent a system from being overwhelmed while the system scales out. +* [Competing Consumers Pattern](http://msdn.microsoft.com/library/dn568101.aspx). This pattern describes how to implement a pool of service instances that can handle messages from any application instance. Autoscaling can be used to start and stop service instances to match the anticipated workload. This approach enables a system to process multiple messages concurrently to optimize throughput, improve scalability and availability, and balance the workload. +* [Instrumentation and Telemetry Guidance](http://msdn.microsoft.com/library/dn589775.aspx). Instrumentation and telemetry are vital for gathering the information that can drive the autoscaling process. + +## More information +* [How to Scale an Application](/azure/cloud-services/cloud-services-how-to-scale/) +* [Automatically scale an application running Web Roles, Worker Roles, or Virtual Machines](/azure/cloud-services/cloud-services-how-to-manage) +* [How to: Link a resource to a cloud service](/azure/cloud-services/cloud-services-how-to-manage) +* [Scale linked resources](/azure/cloud-services/cloud-services-how-to-scale) +* [Azure Monitoring Services Management Library](http://www.nuget.org/packages/Microsoft.WindowsAzure.Management.Monitoring) +* [Azure Service Management REST API](http://msdn.microsoft.com/library/azure/ee460799.aspx) +* [Azure Resource Manager REST API](https://msdn.microsoft.com/library/azure/dn790568.aspx) +* [Microsoft Insights library](https://www.nuget.org/packages/Microsoft.Azure.Insights/) +* [Operations on Autoscaling](http://msdn.microsoft.com/library/azure/dn510374.aspx) +* [Microsoft.WindowsAzure.Management.Monitoring.Autoscale Namespace](http://msdn.microsoft.com/library/azure/microsoft.windowsazure.management.monitoring.autoscale.aspx) diff --git a/docs/best-practices/background-jobs.md b/docs/best-practices/background-jobs.md new file mode 100644 index 00000000000..937f9d77402 --- /dev/null +++ b/docs/best-practices/background-jobs.md @@ -0,0 +1,298 @@ +--- +title: Background jobs guidance +description: Guidance on background tasks that run independently of the user interface. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: f6040f8c-4cbb-4c21-a886-8d54a5868421 +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/21/2016 +ms.author: masashin + +--- +# Background jobs +[!INCLUDE [header](../_includes/header.md)] + +Many types of applications require background tasks that run independently of the user interface (UI). Examples include batch jobs, intensive processing tasks, and long-running processes such as workflows. Background jobs can be executed without requiring user interaction--the application can start the job and then continue to process interactive requests from users. This can help to minimize the load on the application UI, which can improve availability and reduce interactive response times. + +For example, if an application is required to generate thumbnails of images that are uploaded by users, it can do this as a background job and save the thumbnail to storage when it is complete--without the user needing to wait for the process to be completed. In the same way, a user placing an order can initiate a background workflow that processes the order, while the UI allows the user to continue browsing the web app. When the background job is complete, it can update the stored orders data and send an email to the user that confirms the order. + +When you consider whether to implement a task as a background job, the main criteria is whether the task can run without user interaction and without the UI needing to wait for the job to be completed. Tasks that require the user or the UI to wait while they are completed might not be appropriate as background jobs. + +## Types of background jobs +Background jobs typically include one or more of the following types of jobs: + +* CPU-intensive jobs, such as mathematical calculations or structural model analysis. +* I/O-intensive jobs, such as executing a series of storage transactions or indexing files. +* Batch jobs, such as nightly data updates or scheduled processing. +* Long-running workflows, such as order fulfillment, or provisioning services and systems. +* Sensitive-data processing where the task is handed off to a more secure location for processing. For example, you might not want to process sensitive data within a web app. Instead, you might use a pattern such as [Gatekeeper](http://msdn.microsoft.com/library/dn589793.aspx) to transfer the data to an isolated background process that has access to protected storage. + +## Triggers +Background jobs can be initiated in several different ways. They fall into one of the following categories: + +* [**Event-driven triggers**](#event-driven-triggers). The task is started in response to an event, typically an action taken by a user or a step in a workflow. +* [**Schedule-driven triggers**](#schedule-driven-triggers). The task is invoked on a schedule based on a timer. This might be a recurring schedule or a one-off invocation that is specified for a later time. + +### Event-driven triggers +Event-driven invocation uses a trigger to start the background task. Examples of using event-driven triggers include: + +* The UI or another job places a message in a queue. The message contains data about an action that has taken place, such as the user placing an order. The background task listens on this queue and detects the arrival of a new message. It reads the message and uses the data in it as the input to the background job. +* The UI or another job saves or updates a value in storage. The background task monitors the storage and detects changes. It reads the data and uses it as the input to the background job. +* The UI or another job makes a request to an endpoint, such as an HTTPS URI, or an API that is exposed as a web service. It passes the data that is required to complete the background task as part of the request. The endpoint or web service invokes the background task, which uses the data as its input. + +Typical examples of tasks that are suited to event-driven invocation include image processing, workflows, sending information to remote services, sending email messages, and provisioning new users in multitenant applications. + +### Schedule-driven triggers +Schedule-driven invocation uses a timer to start the background task. Examples of using schedule-driven triggers include: + +* A timer that is running locally within the application or as part of the application’s operating system invokes a background task on a regular basis. +* A timer that is running in a different application, or a timer service such as Azure Scheduler, sends a request to an API or web service on a regular basis. The API or web service invokes the background task. +* A separate process or application starts a timer that causes the background task to be invoked once after a specified time delay, or at a specific time. + +Typical examples of tasks that are suited to schedule-driven invocation include batch-processing routines (such as updating related-products lists for users based on their recent behavior), routine data processing tasks (such as updating indexes or generating accumulated results), data analysis for daily reports, data retention cleanup, and data consistency checks. + +If you use a schedule-driven task that must run as a single instance, be aware of the following: + +* If the compute instance that is running the scheduler (such as a virtual machine using Windows scheduled tasks) is scaled, you will have multiple instances of the scheduler running. These could start multiple instances of the task. +* If tasks run for longer than the period between scheduler events, the scheduler may start another instance of the task while the previous one is still running. + +## Returning results +Background jobs execute asynchronously in a separate process, or even in a separate location, from the UI or the process that invoked the background task. Ideally, background tasks are “fire and forget” operations, and their execution progress has no impact on the UI or the calling process. This means that the calling process does not wait for completion of the tasks. Therefore, it cannot automatically detect when the task ends. + +If you require a background task to communicate with the calling task to indicate progress or completion, you must implement a mechanism for this. Some examples are: + +* Write a status indicator value to storage that is accessible to the UI or caller task, which can monitor or check this value when required. Other data that the background task must return to the caller can be placed into the same storage. +* Establish a reply queue that the UI or caller listens on. The background task can send messages to the queue that indicate status and completion. Data that the background task must return to the caller can be placed into the messages. If you are using Azure Service Bus, you can use the **ReplyTo** and **CorrelationId** properties to implement this capability. For more information, see [Correlation in Service Bus Brokered Messaging](http://www.cloudcasts.net/devguide/Default.aspx?id=13029). +* Expose an API or endpoint from the background task that the UI or caller can access to obtain status information. Data that the background task must return to the caller can be included in the response. +* Have the background task call back to the UI or caller through an API to indicate status at predefined points or on completion. This might be through events raised locally or through a publish-and-subscribe mechanism. Data that the background task must return to the caller can be included in the request or event payload. + +## Hosting environment +You can host background tasks by using a range of different Azure platform services: + +* [**Azure Web Apps and WebJobs**](#azure-web-apps-and-webjobs). You can use WebJobs to execute custom jobs based on a range of different types of scripts or executable programs within the context of a web app. +* [**Azure Cloud Services web and worker roles**](#azure-cloud-services-web-and-worker-roles). You can write code within a role that executes as a background task. +* [**Azure Virtual Machines**](#azure-virtual-machines). If you have a Windows service or want to use the Windows Task Scheduler, it is common to host your background tasks within a dedicated virtual machine. +* [**Azure Batch**](/azure/batch/batch-technical-overview/). It's a platform service that schedules compute-intensive work to run on a managed collection of virtual machines, and can automatically scale compute resources to meet the needs of your jobs. + +The following sections describe each of these options in more detail, and include considerations to help you choose the appropriate option. + +## Azure Web Apps and WebJobs +You can use Azure WebJobs to execute custom jobs as background tasks within an Azure Web App. WebJobs run within the context of your web app as a continuous process. WebJobs also run in response to a trigger event from Azure Scheduler or external factors, such as changes to storage blobs and message queues. Jobs can be started and stopped on demand, and shut down gracefully. If a continuously running WebJob fails, it is automatically restarted. Retry and error actions are configurable. + +When you configure a WebJob: + +* If you want the job to respond to an event-driven trigger, you should configure it as **Run continuously**. The script or program is stored in the folder named site/wwwroot/app_data/jobs/continuous. +* If you want the job to respond to a schedule-driven trigger, you should configure it as **Run on a schedule**. The script or program is stored in the folder named site/wwwroot/app_data/jobs/triggered. +* If you choose the **Run on demand** option when you configure a job, it will execute the same code as the **Run on a schedule** option when you start it. + +Azure WebJobs run within the sandbox of the web app. This means that they can access environment variables and share information, such as connection strings, with the web app. The job has access to the unique identifier of the machine that is running the job. The connection string named **AzureWebJobsStorage** provides access to Azure storage queues, blobs, and tables for application data, and access to Service Bus for messaging and communication. The connection string named **AzureWebJobsDashboard** provides access to the job action log files. + +Azure WebJobs have the following characteristics: + +* **Security**: WebJobs are protected by the deployment credentials of the web app. +* **Supported file types**: You can define WebJobs by using command scripts (.cmd), batch files (.bat), PowerShell scripts (.ps1), bash shell scripts (.sh), PHP scripts (.php), Python scripts (.py), JavaScript code (.js), and executable programs (.exe, .jar, and more). +* **Deployment**: You can deploy scripts and executables by using the Azure portal, by using the [WebJobsVs](https://marketplace.visualstudio.com/items?itemName=Sayed-Ibrahim-Hashimi.WebJobsVs) add-in for Visual Studio or the [Visual Studio 2013 Update 4](http://www.visualstudio.com/news/vs2013-update4-rc-vs) (you can create and deploy with this option), by using the [Azure WebJobs SDK](/azure/app-service-web/websites-dotnet-webjobs-sdk-get-started/), or by copying them directly to the following locations: + * For triggered execution: site/wwwroot/app_data/jobs/triggered/{job name} + * For continuous execution: site/wwwroot/app_data/jobs/continuous/{job name} +* **Logging**: Console.Out is treated (marked) as INFO. Console.Error is treated as ERROR. You can access monitoring and diagnostics information by using the Azure portal. You can download log files directly from the site. They are saved in the following locations: + * For triggered execution: Vfs/data/jobs/triggered/jobName + * For continuous execution: Vfs/data/jobs/continuous/jobName +* **Configuration**: You can configure WebJobs by using the portal, the REST API, and PowerShell. You can use a configuration file named settings.job in the same root directory as the job script to provide configuration information for a job. For example: + * { "stopping_wait_time": 60 } + * { "is_singleton": true } + +### Considerations +* By default, WebJobs scale with the web app. However, you can configure jobs to run on single instance by setting the **is_singleton** configuration property to **true**. Single instance WebJobs are useful for tasks that you do not want to scale or run as simultaneous multiple instances, such as reindexing, data analysis, and similar tasks. +* To minimize the impact of jobs on the performance of the web app, consider creating an empty Azure Web App instance in a new App Service plan to host WebJobs that may be long running or resource intensive. + +### More information +* [Azure WebJobs recommended resources](/azure/app-service-web/websites-webjobs-resources/) lists the many useful resources, downloads, and samples for WebJobs. + +## Azure Cloud Services web and worker roles +You can execute background tasks within a web role or in a separate worker role. When you are deciding whether to use a worker role, consider scalability and elasticity requirements, task lifetime, release cadence, security, fault tolerance, contention, complexity, and the logical architecture. For more information, see [Compute Resource Consolidation Pattern](http://msdn.microsoft.com/library/dn589778.aspx). + +There are several ways to implement background tasks within a Cloud Services role: + +* Create an implementation of the **RoleEntryPoint** class in the role and use its methods to execute background tasks. The tasks run in the context of WaIISHost.exe. They can use the **GetSetting** method of the **CloudConfigurationManager** class to load configuration settings. For more information, see [Lifecycle (Cloud Services)](#lifecycle-cloud-services). +* Use startup tasks to execute background tasks when the application starts. To force the tasks to continue to run in the background, set the **taskType** property to **background** (if you do not do this, the application startup process will halt and wait for the task to finish). For more information, see [Run startup tasks in Azure](/azure/cloud-services/cloud-services-startup-tasks/). +* Use the WebJobs SDK to implement background tasks such as WebJobs that are initiated as a startup task. For more information, see [Get started with the Azure WebJobs SDK](/azure/app-service-web/websites-dotnet-webjobs-sdk-get-started/). +* Use a startup task to install a Windows service that executes one or more background tasks. You must set the **taskType** property to **background** so that the service executes in the background. For more information, see [Run startup tasks in Azure](/azure/cloud-services/cloud-services-startup-tasks/). + +### Running background tasks in the web role +The main advantage of running background tasks in the web role is the saving in hosting costs because there is no requirement to deploy additional roles. + +### Running background tasks in a worker role +Running background tasks in a worker role has several advantages: + +* It allows you to manage scaling separately for each type of role. For example, you might need more instances of a web role to support the current load, but fewer instances of the worker role that executes background tasks. By scaling background task compute instances separately from the UI roles, you can reduce hosting costs, while maintaining acceptable performance. +* It offloads the processing overhead for background tasks from the web role. The web role that provides the UI can remain responsive, and it may mean fewer instances are required to support a given volume of requests from users. +* It allows you to implement separation of concerns. Each role type can implement a specific set of clearly defined and related tasks. This makes designing and maintaining the code easier because there is less interdependence of code and functionality between each role. +* It can help to isolate sensitive processes and data. For example, web roles that implement the UI do not need to have access to data that is managed and controlled by a worker role. This can be useful in strengthening security, especially when you use a pattern such as the [Gatekeeper Pattern](http://msdn.microsoft.com/library/dn589793.aspx). + +### Considerations +Consider the following points when choosing how and where to deploy background tasks when using Cloud Services web and worker roles: + +* Hosting background tasks in an existing web role can save the cost of running a separate worker role just for these tasks. However, it is likely to affect the performance and availability of the application if there is contention for processing and other resources. Using a separate worker role protects the web role from the impact of long-running or resource-intensive background tasks. +* If you host background tasks by using the **RoleEntryPoint** class, you can easily move this to another role. For example, if you create the class in a web role and later decide that you need to run the tasks in a worker role, you can move the **RoleEntryPoint** class implementation into the worker role. +* Startup tasks are designed to execute a program or a script. Deploying a background job as an executable program might be more difficult, especially if it also requires deployment of dependent assemblies. It might be easier to deploy and use a script to define a background job when you use startup tasks. +* Exceptions that cause a background task to fail have a different impact, depending on the way that they are hosted: + * If you use the **RoleEntryPoint** class approach, a failed task will cause the role to restart so that the task automatically restarts. This can affect availability of the application. To prevent this, ensure that you include robust exception handling within the **RoleEntryPoint** class and all the background tasks. Use code to restart tasks that fail where this is appropriate, and throw the exception to restart the role only if you cannot gracefully recover from the failure within your code. + * If you use startup tasks, you are responsible for managing the task execution and checking if it fails. +* Managing and monitoring startup tasks is more difficult than using the **RoleEntryPoint** class approach. However, the Azure WebJobs SDK includes a dashboard to make it easier to manage WebJobs that you initiate through startup tasks. + +### More information +* [Compute Resource Consolidation Pattern](http://msdn.microsoft.com/library/dn589778.aspx) +* [Get started with the Azure WebJobs SDK](/azure/app-service-web/websites-dotnet-webjobs-sdk-get-started/) + +## Azure Virtual Machines +Background tasks might be implemented in a way that prevents them from being deployed to Azure Web Apps or Cloud Services, or these options might not be convenient. Typical examples are Windows services, and third-party utilities and executable programs. Another example might be programs written for an execution environment that is different than that hosting the application. For example, it might be a Unix or Linux program that you want to execute from a Windows or .NET application. You can choose from a range of operating systems for an Azure virtual machine, and run your service or executable on that virtual machine. + +To help you choose when to use Virtual Machines, see [Azure App Services, Cloud Services and Virtual Machines comparison](/azure/app-service-web/choose-web-site-cloud-service-vm/). For information about the options for Virtual Machines, see [Virtual Machine and Cloud Service sizes for Azure](http://msdn.microsoft.com/library/azure/dn197896.aspx). For more information about the operating systems and prebuilt images that are available for Virtual Machines, see [Azure Virtual Machines Marketplace](https://azure.microsoft.com/gallery/virtual-machines/). + +To initiate the background task in a separate virtual machine, you have a range of options: + +* You can execute the task on demand directly from your application by sending a request to an endpoint that the task exposes. This passes in any data that the task requires. This endpoint invokes the task. +* You can configure the task to run on a schedule by using a scheduler or timer that is available in your chosen operating system. For example, on Windows you can use Windows Task Scheduler to execute scripts and tasks. Or, if you have SQL Server installed on the virtual machine, you can use the SQL Server Agent to execute scripts and tasks. +* You can use Azure Scheduler to initiate the task by adding a message to a queue that the task listens on, or by sending a request to an API that the task exposes. + +See the earlier section [Triggers](#triggers) for more information about how you can initiate background tasks. + +### Considerations +Consider the following points when you are deciding whether to deploy background tasks in an Azure virtual machine: + +* Hosting background tasks in a separate Azure virtual machine provides flexibility and allows precise control over initiation, execution, scheduling, and resource allocation. However, it will increase runtime cost if a virtual machine must be deployed just to run background tasks. +* There is no facility to monitor the tasks in the Azure portal and no automated restart capability for failed tasks--although you can monitor the basic status of the virtual machine and manage it by using the [Azure Service Management Cmdlets](http://msdn.microsoft.com/library/azure/dn495240.aspx). However, there are no facilities to control processes and threads in compute nodes. Typically, using a virtual machine will require additional effort to implement a mechanism that collects data from instrumentation in the task, and from the operating system in the virtual machine. One solution that might be appropriate is to use the [System Center Management Pack for Azure](http://technet.microsoft.com/library/gg276383.aspx). +* You might consider creating monitoring probes that are exposed through HTTP endpoints. The code for these probes could perform health checks, collect operational information and statistics--or collate error information and return it to a management application. For more information, see [Health Endpoint Monitoring Pattern](http://msdn.microsoft.com/library/dn589789.aspx). + +### More information +* [Virtual Machines](https://azure.microsoft.com/services/virtual-machines/) on Azure +* [Azure Virtual Machines FAQ](/azure/virtual-machines/virtual-machines-linux-classic-faq?toc=%2fazure%2fvirtual-machines%2flinux%2fclassic%2ftoc.json) + +## Design considerations +There are several fundamental factors to consider when you design background tasks. The following sections discuss partitioning, conflicts, and coordination. + +## Partitioning +If you decide to include background tasks within an existing compute instance (such as a web app, web role, existing worker role, or virtual machine), you must consider how this will affect the quality attributes of the compute instance and the background task itself. These factors will help you to decide whether to colocate the tasks with the existing compute instance or separate them out into a separate compute instance: + +* **Availability**: Background tasks might not need to have the same level of availability as other parts of the application, in particular the UI and other parts that are directly involved in user interaction. Background tasks might be more tolerant of latency, retried connection failures, and other factors that affect availability because the operations can be queued. However, there must be sufficient capacity to prevent the backup of requests that could block queues and affect the application as a whole. +* **Scalability**: Background tasks are likely to have a different scalability requirement than the UI and the interactive parts of the application. Scaling the UI might be necessary to meet peaks in demand, while outstanding background tasks might be completed during less busy times by a fewer number of compute instances. +* **Resiliency**: Failure of a compute instance that just hosts background tasks might not fatally affect the application as a whole if the requests for these tasks can be queued or postponed until the task is available again. If the compute instance and/or tasks can be restarted within an appropriate interval, users of the application might not be affected. +* **Security**: Background tasks might have different security requirements or restrictions than the UI or other parts of the application. By using a separate compute instance, you can specify a different security environment for the tasks. You can also use patterns such as Gatekeeper to isolate the background compute instances from the UI in order to maximize security and separation. +* **Performance**: You can choose the type of compute instance for background tasks to specifically match the performance requirements of the tasks. This might mean using a less expensive compute option if the tasks do not require the same processing capabilities as the UI, or a larger instance if they require additional capacity and resources. +* **Manageability**: Background tasks might have a different development and deployment rhythm from the main application code or the UI. Deploying them to a separate compute instance can simplify updates and versioning. +* **Cost**: Adding compute instances to execute background tasks increases hosting costs. You should carefully consider the trade-off between additional capacity and these extra costs. + +For more information, see [Leader Election Pattern](http://msdn.microsoft.com/library/dn568104.aspx) and [Competing Consumers +Pattern](http://msdn.microsoft.com/library/dn568101.aspx). + +## Conflicts +If you have multiple instances of a background job, it is possible that they will compete for access to resources and services, such as databases and storage. This concurrent access can result in resource contention, which might cause conflicts in availability of the services and in the integrity of data in storage. You can resolve resource contention by using a pessimistic locking approach. This prevents competing instances of a task from concurrently accessing a service or corrupting data. + +Another approach to resolve conflicts is to define background tasks as a singleton, so that there is only ever one instance running. However, this eliminates the reliability and performance benefits that a multiple-instance configuration can provide. This is especially true if the UI can supply sufficient work to keep more than one background task busy. + +It is vital to ensure that the background task can automatically restart and that it has sufficient capacity to cope with peaks in demand. You can achieve this by allocating a compute instance with sufficient resources, by implementing a queueing mechanism that can store requests for later execution when demand decreases, or by using a combination of these techniques. + +## Coordination +The background tasks might be complex and might require multiple individual tasks to execute to produce a result or to fulfil all the requirements. It is common in these scenarios to divide the task into smaller discreet steps or subtasks that can be executed by multiple consumers. Multistep jobs can be more efficient and more flexible because individual steps might be reusable in multiple jobs. It is also easy to add, remove, or modify the order of the steps. + +Coordinating multiple tasks and steps can be challenging, but there are three common patterns that you can use to guide your implementation of a solution: + +* **Decomposing a task into multiple reusable steps**. An application might be required to perform a variety of tasks of varying complexity on the information that it processes. A straightforward but inflexible approach to implementing this application might be to perform this processing as a monolithic module. However, this approach is likely to reduce the opportunities for refactoring the code, optimizing it, or reusing it if parts of the same processing are required elsewhere within the application. For more information, see [Pipes and Filters Pattern](http://msdn.microsoft.com/library/dn568100.aspx). +* **Managing execution of the steps for a task**. An application might perform tasks that comprise a number of steps (some of which might invoke remote services or access remote resources). The individual steps might be independent of each other, but they are orchestrated by the application logic that implements the task. For more information, see [Scheduler Agent Supervisor Pattern](http://msdn.microsoft.com/library/dn589780.aspx). +* **Managing recovery for task steps that fail**. An application might need to undo the work that is performed by a series of steps (which together define an eventually consistent operation) if one or more of the steps fail. For more information, see [Compensating Transaction Pattern](http://msdn.microsoft.com/library/dn589804.aspx). + +## Lifecycle (Cloud Services) + If you decide to implement background jobs for Cloud Services applications that use web and worker roles by using the **RoleEntryPoint** class, it is important to understand the lifecycle of this class in order to use it correctly. + +Web and worker roles go through a set of distinct phases as they start, run, and stop. The **RoleEntryPoint** class exposes a series of events that indicate when these stages are occurring. You use these to initialize, run, and stop your custom background tasks. The complete cycle is: + +* Azure loads the role assembly and searches it for a class that derives from **RoleEntryPoint**. +* If it finds this class, it calls **RoleEntryPoint.OnStart()**. You override this method to initialize your background tasks. +* After the **OnStart** method has completed, Azure calls **Application_Start()** in the application’s Global file if this is present (for example, Global.asax in a web role running ASP.NET). +* Azure calls **RoleEntryPoint.Run()** on a new foreground thread that executes in parallel with **OnStart()**. You override this method to start your background tasks. +* When the Run method ends, Azure first calls **Application_End()** in the application’s Global file if this is present, and then calls **RoleEntryPoint.OnStop()**. You override the **OnStop** method to stop your background tasks, clean up resources, dispose of objects, and close connections that the tasks may have used. +* The Azure worker role host process is stopped. At this point, the role will be recycled and will restart. + +For more details and an example of using the methods of the **RoleEntryPoint** class, see [Compute Resource Consolidation Pattern](http://msdn.microsoft.com/library/dn589778.aspx). + +## Considerations +Consider the following points when you are planning how you will run background tasks in a web or worker role: + +* The default **Run** method implementation in the **RoleEntryPoint** class contains a call to **Thread.Sleep(Timeout.Infinite)** that keeps the role alive indefinitely. If you override the **Run** method (which is typically necessary to execute background tasks), you must not allow your code to exit from the method unless you want to recycle the role instance. +* A typical implementation of the **Run** method includes code to start each of the background tasks and a loop construct that periodically checks the state of all the background tasks. It can restart any that fail or monitor for cancellation tokens that indicate that jobs have completed. +* If a background task throws an unhandled exception, that task should be recycled while allowing any other background tasks in the role to continue running. However, if the exception is caused by corruption of objects outside the task, such as shared storage, the exception should be handled by your **RoleEntryPoint** class, all tasks should be cancelled, and the **Run** method should be allowed to end. Azure will then restart the role. +* Use the **OnStop** method to pause or kill background tasks and clean up resources. This might involve stopping long-running or multistep tasks. It is vital to consider how this can be done to avoid data inconsistencies. If a role instance stops for any reason other than a user-initiated shutdown, the code running in the **OnStop** method must be completed within five minutes before it is forcibly terminated. Ensure that your code can be completed in that time or can tolerate not running to completion. +* The Azure load balancer starts directing traffic to the role instance when the **RoleEntryPoint.OnStart** method returns the value **true**. Therefore, consider putting all your initialization code in the **OnStart** method so that role instances that do not successfully initialize will not receive any traffic. +* You can use startup tasks in addition to the methods of the **RoleEntryPoint** class. You should use startup tasks to initialize any settings that you need to change in the Azure load balancer because these tasks will execute before the role receives any requests. For more information, see [Run startup tasks in Azure](/azure/cloud-services/cloud-services-startup-tasks/). +* If there is an error in a startup task, it might force the role to continually restart. This can prevent you from performing a virtual IP (VIP) address swap back to a previously staged version because the swap requires exclusive access to the role. This cannot be obtained while the role is restarting. To resolve this: + + * Add the following code to the beginning of the **OnStart** and **Run** methods in your role: + + ```C# + var freeze = CloudConfigurationManager.GetSetting("Freeze"); + if (freeze != null) + { + if (Boolean.Parse(freeze)) + { + Thread.Sleep(System.Threading.Timeout.Infinite); + } + } + ``` + + * Add the definition of the **Freeze** setting as a Boolean value to the ServiceDefinition.csdef and ServiceConfiguration.*.cscfg files for the role and set it to **false**. If the role goes into a repeated restart mode, you can change the setting to **true** to freeze role execution and allow it to be swapped with a previous version. + +## Resiliency considerations +Background tasks must be resilient in order to provide reliable services to the application. When you are planning and designing background tasks, consider the following points: + +* Background tasks must be able to gracefully handle role or service restarts without corrupting data or introducing inconsistency into the application. For long-running or multistep tasks, consider using *check pointing* by saving the state of jobs in persistent storage, or as messages in a queue if this is appropriate. For example, you can persist state information in a message in a queue and incrementally update this state information with the task progress so that the task can be processed from the last known good checkpoint--instead of restarting from the beginning. When using Azure Service Bus queues, you can use message sessions to enable the same scenario. Sessions allow you to save and retrieve the application processing state by using the [SetState](http://msdn.microsoft.com/library/microsoft.servicebus.messaging.messagesession.setstate.aspx) and [GetState](http://msdn.microsoft.com/library/microsoft.servicebus.messaging.messagesession.getstate.aspx) methods. For more information about designing reliable multistep processes and workflows, see [Scheduler Agent Supervisor Pattern](http://msdn.microsoft.com/library/dn589780.aspx). +* When you use web or worker roles to host multiple background tasks, design your override of the **Run** method to monitor for failed or stalled tasks, and restart them. Where this is not practical, and you are using a worker role, force the worker role to restart by exiting from the **Run** method. +* When you use queues to communicate with background tasks, the queues can act as a buffer to store requests that are sent to the tasks while the application is under higher than usual load. This allows the tasks to catch up with the UI during less busy periods. It also means that recycling the role will not block the UI. For more information, see [Queue-Based Load Leveling Pattern](http://msdn.microsoft.com/library/dn589783.aspx). If some tasks are more important than others, consider implementing the [Priority Queue Pattern](http://msdn.microsoft.com/library/dn589794.aspx) to ensure that these tasks run before less important ones. +* Background tasks that are initiated by messages or process messages must be designed to handle inconsistencies, such as messages arriving out of order, messages that repeatedly cause an error (often referred to as *poison messages*), and messages that are delivered more than once. Consider the following: + * Messages that must be processed in a specific order, such as those that change data based on the existing data value (for example, adding a value to an existing value), might not arrive in the original order in which they were sent. Alternatively, they might be handled by different instances of a background task in a different order due to varying loads on each instance. Messages that must be processed in a specific order should include a sequence number, key, or some other indicator that background tasks can use to ensure that they are processed in the correct order. If you are using Azure Service Bus, you can use message sessions to guarantee the order of delivery. However, it is usually more efficient, where possible, to design the process so that the message order is not important. + * Typically, a background task will peek at messages in the queue, which temporarily hides them from other message consumers. Then it deletes the messages after they have been successfully processed. If a background task fails when processing a message, that message will reappear on the queue after the peek time-out expires. It will be processed by another instance of the task or during the next processing cycle of this instance. If the message consistently causes an error in the consumer, it will block the task, the queue, and eventually the application itself when the queue becomes full. Therefore, it is vital to detect and remove poison messages from the queue. If you are using Azure Service Bus, messages that cause an error can be moved automatically or manually to an associated dead letter queue. + * Queues are guaranteed at *least once* delivery mechanisms, but they might deliver the same message more than once. In addition, if a background task fails after processing a message but before deleting it from the queue, the message will become available for processing again. Background tasks should be idempotent, which means that processing the same message more than once does not cause an error or inconsistency in the application’s data. Some operations are naturally idempotent, such as setting a stored value to a specific new value. However, operations such as adding a value to an existing stored value without checking that the stored value is still the same as when the message was originally sent will cause inconsistencies. Azure Service Bus queues can be configured to automatically remove duplicated messages. + * Some messaging systems, such as Azure storage queues and Azure Service Bus queues, support a de-queue count property that indicates the number of times a message has been read from the queue. This can be useful in handling repeated and poison messages. For more information, see [Asynchronous Messaging Primer](http://msdn.microsoft.com/library/dn589781.aspx) and [Idempotency Patterns](http://blog.jonathanoliver.com/2010/04/idempotency-patterns/). + +## Scaling and performance considerations +Background tasks must offer sufficient performance to ensure they do not block the application, or cause inconsistencies due to delayed operation when the system is under load. Typically, performance is improved by scaling the compute instances that host the background tasks. When you are planning and designing background tasks, consider the following points around scalability and performance: + +* Azure supports autoscaling (both scaling out and scaling back in) based on current demand and load--or on a predefined schedule, for Web Apps, Cloud Services web and worker roles, and Virtual Machines hosted deployments. Use this feature to ensure that the application as a whole has sufficient performance capabilities while minimizing runtime costs. +* Where background tasks have a different performance capability from the other parts of a Cloud Services application (for example, the UI or components such as the data access layer), hosting the background tasks together in a separate worker role allows the UI and background task roles to scale independently to manage the load. If multiple background tasks have significantly different performance capabilities from each other, consider dividing them into separate worker roles and scaling each role type independently. However, note that this might increase runtime costs compared to combining all the tasks into fewer roles. +* Simply scaling the roles might not be sufficient to prevent loss of performance under load. You might also need to scale storage queues and other resources to prevent a single point of the overall processing chain from becoming a bottleneck. Also, consider other limitations, such as the maximum throughput of storage and other services that the application and the background tasks rely on. +* Background tasks must be designed for scaling. For example, they must be able to dynamically detect the number of storage queues in use in order to listen on or send messages to the appropriate queue. +* By default, WebJobs scale with their associated Azure Web Apps instance. However, if you want a WebJob to run as only a single instance, you can create a Settings.job file that contains the JSON data **{ "is_singleton": true }**. This forces Azure to only run one instance of the WebJob, even if there are multiple instances of the associated web app. This can be a useful technique for scheduled jobs that must run as only a single instance. + +## Related patterns +* [Asynchronous Messaging Primer](http://msdn.microsoft.com/library/dn589781.aspx) +* [Autoscaling Guidance](http://msdn.microsoft.com/library/dn589774.aspx) +* [Compensating Transaction Pattern](http://msdn.microsoft.com/library/dn589804.aspx) +* [Competing Consumers Pattern](http://msdn.microsoft.com/library/dn568101.aspx) +* [Compute Partitioning Guidance](http://msdn.microsoft.com/library/dn589773.aspx) +* [Compute Resource Consolidation Pattern](http://msdn.microsoft.com/library/dn589778.aspx) +* [Gatekeeper Pattern](http://msdn.microsoft.com/library/dn589793.aspx) +* [Leader Election Pattern](http://msdn.microsoft.com/library/dn568104.aspx) +* [Pipes and Filters Pattern](http://msdn.microsoft.com/library/dn568100.aspx) +* [Priority Queue Pattern](http://msdn.microsoft.com/library/dn589794.aspx) +* [Queue-based Load Leveling Pattern](http://msdn.microsoft.com/library/dn589783.aspx) +* [Scheduler Agent Supervisor Pattern](http://msdn.microsoft.com/library/dn589780.aspx) + +## More information +* [Scaling Azure Applications with Worker Roles](http://msdn.microsoft.com/library/hh534484.aspx#sec8) +* [Executing Background Tasks](http://msdn.microsoft.com/library/ff803365.aspx) +* [Azure Role Startup Life Cycle](http://blog.syntaxc4.net/post/2011/04/13/windows-azure-role-startup-life-cycle.aspx) (blog post) +* [Azure Cloud Services Role Lifecycle](http://channel9.msdn.com/Series/Windows-Azure-Cloud-Services-Tutorials/Windows-Azure-Cloud-Services-Role-Lifecycle) (video) +* [Get Started with the Azure WebJobs SDK](/azure/app-service-web/websites-dotnet-webjobs-sdk-get-started/) +* [Azure Queues and Service Bus Queues - Compared and Contrasted](/azure/service-bus-messaging/service-bus-azure-and-service-bus-queues-compared-contrasted/) +* [How to Enable Diagnostics in a Cloud Service](/azure/cloud-services/cloud-services-dotnet-diagnostics/) + diff --git a/docs/best-practices/caching.md b/docs/best-practices/caching.md new file mode 100644 index 00000000000..2337e7bb92b --- /dev/null +++ b/docs/best-practices/caching.md @@ -0,0 +1,1242 @@ +--- +title: Caching guidance +description: Guidance on caching to improve performance and scalability. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: c86e2d49-066b-43b0-b0b6-f70ff4f87cdd +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/14/2016 +ms.author: masashin + +--- +# Caching +[!INCLUDE [header](../_includes/header.md)] + +Caching is a common technique that aims to improve the performance and +scalability of a system. It does this by temporarily copying frequently accessed data +to fast storage that's located close to the application. If this fast data storage +is located closer to the application than the original source, then caching +can significantly improve response times for client applications by serving +data more quickly. + +Caching is most effective when a client instance repeatedly +reads the same data, especially if all the following conditions apply to the original data store: + +* It remains relatively static. +* It's slow compared to the speed of the cache. +* It's subject to a high level of contention. +* It's far away when network latency can cause access to be slow. + +## Caching in distributed applications +Distributed applications typically implement either or both of the +following strategies when caching data: + +* Using a private cache, where data is held locally on the computer that's running an instance of an application or service. +* Using a shared cache, serving as a common source which can be accessed by multiple processes and/or machines. + +In both cases, caching can be performed client-side and/or server-side. Client-side caching is done by the process that provides +the user interface for a system, such as a web browser or desktop application. +Server-side caching is done by the process that provides the business services +that are running remotely. + +### Private caching +The most basic type of cache is an in-memory store. It's held in the address +space of a single process and accessed directly by the code that runs +in that process. This type of cache is very quick to access. It can +also provide an extremely effective means for storing modest amounts of +static data, since the size of a cache is typically constrained by the +volume of memory that's available on the machine hosting the process. + +If you need to cache more information than is physically possible in memory, +you can write cached data to the local file system. This will +be slower to access than data that's held in-memory, but should +still be faster and more reliable than retrieving data across a network. + +If you have multiple instances of an application that uses this model +running concurrently, each application instance has its own +independent cache holding its own copy of the data. + +Think of a cache as a snapshot of the original data at some +point in the past. If this data is not static, it is likely that +different application instances hold different versions of the +data in their caches. Therefore, the same query performed by these +instances can return different results, as shown in Figure 1. + +![Using an in-memory cache in different instances of an application](./images/caching/Figure1.png) + +*Figure 1: Using an in-memory cache in different instances of an application* + +### Shared caching +Using a shared cache can help alleviate concerns that data might +differ in each cache, which can occur with in-memory caching. Shared +caching ensures that different application instances see the same +view of cached data. It does this by locating the cache in a separate location, +typically hosted as part of a separate service, as shown in Figure 2. + +![Using a shared cache](./images/caching/Figure2.png) + +*Figure 2: Using a shared cache* + +An important benefit of the shared caching approach is the +scalability it provides. Many shared cache services are +implemented by using a cluster of servers, and utilize software that +distributes the data across the cluster in a transparent manner. An +application instance simply sends a request to the cache service. +The underlying infrastructure is responsible for determining the +location of the cached data in the cluster. You can easily scale the +cache by adding more servers. + +There are two main disadvantages of the shared caching approach: + +* The cache is slower to access because it is no longer held locally to each + application instance. +* The requirement to implement a separate + cache service might add complexity to the solution. + +## Considerations for using caching +The following sections describe in more detail the considerations +for designing and using a cache. + +### Decide when to cache data +Caching can dramatically improve performance, scalability, and availability. The more data +that you have and the larger the number of users that need to access this data, the greater +the benefits of caching become. That's because caching reduces the latency and contention that's associated with handling +large volumes of concurrent requests in the original data store. + +For example, a database +might support a limited number of concurrent connections. Retrieving data from a shared +cache, however, rather than the underlying database, makes it possible for a client application to access this data +even if the number of available connections is currently exhausted. Additionally, if the +database becomes unavailable, client applications might be able to continue by using the +data that's held in the cache. + +Consider caching data that is read frequently but modified infrequently +(for example, data that has a higher proportion of read operations than write operations). However, +we don't recommend that you use the cache as the authoritative store of critical information. Instead, +ensure that all changes that your application cannot afford to lose are always saved to a +persistent data store. This means that if the cache is unavailable, your application can +still continue to operate by using the data store, and you won't lose important +information. + +### Determine how to cache data effectively +The key to using a cache effectively lies in determining the most appropriate data to +cache, and caching it at the appropriate time. The data can be added to the cache on +demand the first time it is retrieved by an application. This means that the application needs to +fetch the data only once from the data store, and that subsequent access can be satisfied +by using the cache. + +Alternatively, a cache can be partially or fully populated with data in advance, +typically when the application starts (an approach known as seeding). However, it might +not be advisable to implement seeding for a large cache because this approach can impose +a sudden, high load on the original data store when the application starts running. + +Often an analysis of usage patterns can help you decide whether to fully or partially +prepopulate a cache, and to choose the data to cache. For example, it +can be useful to seed the cache with the static user profile data for +customers who use the application regularly (perhaps every day), but not for +customers who use the application only once a week. + +Caching typically works well with data that is immutable or that changes +infrequently. Examples include reference information such as product and pricing +information in an e-commerce application, or shared static resources that are costly +to construct. Some or all of this data can be loaded into the cache at application +startup to minimize demand on resources and to improve performance. It might also be +appropriate to have a background process that periodically updates reference data +in the cache to ensure it is up to date, or that refreshes the cache when reference +data changes. + +Caching is less useful for dynamic data, although there are some exceptions to +this consideration (see the section Cache highly dynamic data later in this +article for more information). When the original data changes regularly, either +the cached information becomes stale very quickly or the overhead of synchronizing the cache with the original data store reduces the effectiveness of +caching. + +Note that a cache does not have to include the complete data for an +entity. For example, if a data item represents a multivalued object such as a bank +customer with a name, address, and account balance, some of these elements might +remain static (such as the name and address), while others (such as the account balance) +might be more dynamic. In these situations, it can be useful to cache the static +portions of the data and retrieve (or calculate) only the remaining information when it is required. + +We recommend that you carry out performance testing and usage analysis to determine whether +pre-population or on-demand loading of the cache, or a combination of both, is +appropriate. The decision should be based on the volatility and +usage pattern of the data. Cache utilization and performance analysis is +particularly important in applications that encounter heavy loads and must be +highly scalable. For example, in highly scalable scenarios it might make sense to +seed the cache to reduce the load on the data store at peak times. + +Caching can also be used to avoid repeating computations while the application is +running. If an operation transforms data or performs a complicated calculation, +it can save the results of the operation in the cache. If the same calculation +is required afterward, the application can simply retrieve the results from +the cache. + +An application can modify data that's held in a cache. However, we recommend thinking of the +cache as a transient data store that could disappear at any time. Do not store +valuable data in the cache only; make sure that you maintain the information +in the original data store as well. This means that if the cache becomes +unavailable, you minimize the chance of losing data. + +### Cache highly dynamic data +When you store rapidly-changing information in a persistent data store, it can impose +an overhead on the system. For example, consider a device that continually reports +status or some other measurement. If an application chooses not to cache this +data on the basis that the cached information will nearly always be outdated, then +the same consideration could be true when storing and retrieving this information +from the data store. In the time it takes to save and fetch this data, it might have +changed. + +In a situation such as this, consider the benefits of storing the dynamic +information directly in the cache instead of in the persistent data store. If the +data is non-critical and does not require auditing, then it doesn't matter +if the occasional change is lost. + +### Manage data expiration in a cache +In most cases, data that's held in a cache is a copy of data that's held in the original data +store. The data in the original data store might change after it was cached, causing +the cached data to become stale. Many caching systems enable you to configure the +cache to expire data and reduce the period for which data may be out of date. + +When cached data expires, it's removed from the cache, and the application must +retrieve the data from the original data store (it can put the newly-fetched +information back into cache). You can set a default expiration policy when you +configure the cache. In many cache services, you can also stipulate the expiration +period for individual objects when you store them programmatically in the cache. +Some caches enable you to specify the expiration period as an absolute value, or +as a sliding value that causes the item to be removed from the cache if it is not +accessed within the specified time. This setting overrides any cache-wide +expiration policy, but only for the specified objects. + +> [!NOTE] +> Consider the expiration period for the cache and the objects that it contains carefully. If you make it too short, objects will expire too quickly and you will reduce the benefits of using the cache. If you make the period too long, you risk the data becoming stale. +> +> + +It's also possible that the cache might fill up if data is allowed to remain +resident for a long time. In this case, any requests to add new items to the +cache might cause some items to be forcibly removed in a process known as +eviction. Cache services typically evict data on a least-recently-used (LRU) +basis, but you can usually override this policy and prevent items from being +evicted. However, if you adopt this approach, you risk exceeding the +memory that's available in the cache. An application that attempts to add an item +to the cache will fail with an exception. + +Some caching implementations might provide additional eviction policies. There are several types of eviction policies. These include: + +* A most-recently-used policy (in the expectation that the + data will not be required again). +* A first-in-first-out policy (oldest data is + evicted first). +* An explicit removal policy based on a triggered event (such as the + data being modified). + +### Invalidate data in a client-side cache +Data that's held in a client-side cache is generally considered to be outside +the auspices of the service that provides the data to the client. A service +cannot directly force a client to add or remove information from a +client-side cache. + +This means that it's possible for a client that uses +a poorly configured cache to continue using outdated information. For example, if the expiration policies of the cache aren't +properly implemented, a client might use outdated information that's cached +locally when the information in the original data source has changed. + +If you are building a web application that serves data over an HTTP +connection, you can implicitly force a web client (such as a browser or +web proxy) to fetch the most recent information. You can do this if a resource is updated by a change in the URI of that resource. Web clients typically use the URI +of a resource as the key in the client-side cache, so if the URI changes, +the web client ignores any previously cached versions of a +resource and fetches the new version instead. + +## Managing concurrency in a cache +Caches are often designed to be shared by multiple instances of an +application. Each application instance can read and modify data in +the cache. Consequently, the same concurrency issues that arise with +any shared data store also apply to a cache. In a situation +where an application needs to modify data that's held in the cache, you might +need to ensure that updates made by one instance of the application +do not overwrite the changes made by another instance. + +Depending on the nature of the data and the likelihood of collisions, +you can adopt one of two approaches to concurrency: + +* **Optimistic.** Immediately prior to updating the data, the application checks to see whether the data in the cache has changed since it was retrieved. If the data is still the same, the change can be made. Otherwise, the application has to decide whether to update it. (The business logic that drives this decision will be application-specific.) This approach is suitable for situations where updates are infrequent, or where collisions are unlikely to occur. +* **Pessimistic.** When it retrieves the data, the application locks it in the cache to prevent another instance from changing it. This process ensures that collisions cannot occur, but they can also block other instances that need to process the same data. Pessimistic concurrency can affect the scalability of a solution and is recommended only for short-lived operations. This approach might be appropriate for situations where collisions are more likely, especially if an application updates multiple items in the cache and must ensure that these changes are applied consistently. + +### Implement high availability and scalability, and improve performance +Avoid using a cache as the primary repository of data; this is the role +of the original data store from which the cache is populated. The +original data store is responsible for ensuring the persistence of the +data. + +Be careful not to introduce critical dependencies on the availability +of a shared cache service into your solutions. An application should be +able to continue functioning if the service that provides the shared cache +is unavailable. The application should not hang or fail while waiting +for the cache service to resume. + +Therefore, the application must be +prepared to detect the availability of the cache service and fall back +to the original data store if the cache is inaccessible. The +[Circuit-Breaker pattern](http://msdn.microsoft.com/library/dn589784.aspx) is useful for handling this scenario. The +service that provides the cache can be recovered, and once it becomes +available, the cache can be repopulated as data is read form the +original data store, following a strategy such as the [Cache-aside pattern](http://msdn.microsoft.com/library/dn589799.aspx). + +However, there might be a scalability impact on the system if the application falls back to the original data store when the cache is +temporarily unavailable. +While the data store is being recovered, the original data store +could be swamped with requests for data, resulting in timeouts and +failed connections. + +Consider +implementing a local, private cache in each instance of an application, +together with the shared cache that all application instances +access. When the application retrieves an item, it can check first +in its local cache, then in the shared cache, and finally in the original +data store. The local cache can be populated using the data in either the +shared cache, or in the database if the shared cache is unavailable. + +This approach requires careful configuration to prevent the local +cache from becoming too stale with respect to the shared cache. However, the local cache acts as a buffer if the shared cache is unreachable. Figure 3 +shows this structure. + +![Using a local, private cache with a shared cache](./images/caching/Caching3.png) +*Figure 3: Using a local, private cache with a shared cache* + +To support large caches that hold relatively long-lived data, some +cache services provide a high-availability option that implements +automatic failover if the cache becomes unavailable. This approach +typically involves replicating the cached data that's stored on a primary +cache server to a secondary cache server, and switching to the +secondary server if the primary server fails or connectivity is +lost. + +To reduce the latency that's associated with writing to multiple +destinations, the replication to the secondary server might occur +asynchronously when data is written to the cache on the primary +server. This approach leads to the possibility that some +cached information might be lost in the event of a failure, but the +proportion of this data should be small compared to the overall +size of the cache. + +If a shared cache is large, it might be beneficial to partition the +cached data across nodes to reduce the chances of contention and +improve scalability. Many shared caches support the ability to +dynamically add (and remove) nodes and rebalance the data across +partitions. This approach might involve clustering, in which the +collection of nodes is presented to client applications as a +seamless, single cache. Internally, however, the data is dispersed +between nodes following a predefined distribution strategy +that balances the load evenly. The [Data partitioning guidance document](http://msdn.microsoft.com/library/dn589795.aspx) +on the Microsoft website provides more information about possible +partitioning strategies. + +Clustering can also increase the availability of the cache. If a +node fails, the remainder of the cache is still accessible. +Clustering is frequently used in conjunction with replication +and failover. Each node can be replicated, and the replica can be +quickly brought online if the node fails. + +Many read and write operations are likely to involve single data +values or objects. However, at times it might be +necessary to store or retrieve large volumes of data quickly. +For example, seeding a cache could involve writing hundreds or +thousands of items to the cache. An application might also need to +retrieve a large number of related items from the cache as +part of the same request. + +Many large-scale caches provide batch +operations for these purposes. This enables a client application to +package up a large volume of items into a single request and +reduces the overhead that's associated with performing a large number +of small requests. + +## Caching and eventual consistency +For the cache-aside pattern to work, the instance of the application +that populates the cache must have access to the most recent and +consistent version of the data. In a system that implements +eventual consistency (such as a replicated data store) this might +not be the case. + +One instance of an application could modify a +data item and invalidate the cached version of that item. Another +instance of the application might attempt to read this item from +a cache, which causes a cache-miss, so it reads the data from the +data store and adds it to the cache. However, if the data store +has not been fully synchronized with the other replicas, the +application instance could read and populate the cache with the +old value. + +For more information about handling data consistency, see the +[Data consistency primer](http://msdn.microsoft.com/library/dn589800.aspx) page on the Microsoft website. + +### Protect cached data +Irrespective of the cache service you use, consider +how to protect the data that's held in the cache from unauthorized +access. There are two main concerns: + +* The privacy of the data in the cache +* The privacy of data as it flows between the cache and the + application that's using the cache + +To protect data in the cache, the cache service might implement +an authentication mechanism that requires that applications specify the following: + +* Which identities can access data in the cache. +* Which operations (read and write) that these identities are + allowed to perform. + +To reduce overhead that's associated with +reading and writing data, after an identity has been granted +write and/or read access to the cache, that identity can use +any data in the cache. + +If you need to restrict access to +subsets of the cached data, you can do one of the following: + +* Split the cache into partitions (by using different cache + servers) and only grant access to identities for the + partitions that they should be allowed to use. +* Encrypt the data in each subset by using different keys, + and provide the encryption keys only to identities that + should have access to each subset. A client application + might still be able to retrieve all of the data in the cache, + but it will only be able to decrypt the data for which it + has the keys. + +You must also protect the data as it flows in and out of the cache. To do this, +you depend on the security features provided by the network +infrastructure that client applications use to connect to the +cache. If the cache is implemented using an on-site server +within the same organization that hosts the client applications, +then the isolation of the network itself might not require you to +take additional steps. If the cache is located remotely and +requires a TCP or HTTP connection over a public network (such +as the Internet), consider implementing SSL. + +## Considerations for implementing caching with Microsoft Azure +Azure provides the Azure Redis Cache. This is an implementation +of the open source Redis cache that runs as a service in an +Azure datacenter. It provides a caching service that can be +accessed from any Azure application, whether the application +is implemented as a cloud service, a website, or inside an +Azure virtual machine. Caches can be shared by client +applications that have the appropriate access key. + +Azure Redis Cache is a high-performance caching solution that provides +availability, scalability and security. It typically runs +as a service spread across one or more dedicated machines. It +attempts to store as much information as it can in memory to +ensure fast access. This architecture is intended to provide +low latency and high throughput by reducing the need to +perform slow I/O operations. + + Azure Redis Cache is compatible with many of the various +APIs that are used by client applications. If you have existing +applications that already use Azure Redis Cache running on-premises, the +Azure Redis Cache provides a quick migration path to caching +in the cloud. + +> [!NOTE] +> Azure also provides the Managed Cache Service. This +> service is based on the Azure Service Fabric Cache engine. It +> enables you to create a distributed cache that can be shared +> by loosely-coupled applications. The cache is hosted on +> high-performance servers running in an Azure datacenter. +> However, this option is no longer recommended and is only +> provided to support existing applications that have been built +> to use it. For all new development, use Azure Redis +> Cache instead. +> +> Additionally, Azure supports in-role caching. This feature +> enables you to create a cache that's specific to a cloud service. +> The cache is hosted by instances of a web or worker role, and +> can only be accessed by roles that are operating as part of the same +> cloud service deployment unit. (A deployment unit is the set +> of role instances that are deployed as a cloud service to a specific +> region.) The cache is clustered, and all instances of the +> role within the same deployment unit that hosts the cache +> become part of the same cache cluster. However, this option is +> no longer recommended and is only provided to support existing +> applications that have been built to use it. For all new +> development, use Azure Redis Cache instead. +> +> Both Azure Managed Cache Service and Azure In-Role Cache +> are currently slated for retirement on November 16th, 2016. +> It is recommended that you migrate to Azure Redis Cache in +> preparation for this retirement. For more information, see +> [What is Azure Redis Cache offering and what size should I use?](/azure/redis-cache/cache-faq#what-redis-cache-offering-and-size-should-i-use)e. +> +> + +### Features of Redis + Redis is more than a simple cache server. It provides a distributed in-memory +database with an extensive command set that supports many common scenarios. These +are described later in this document, in the section Using Redis caching. This section summarizes some of the key features that Redis +provides. + +### Redis as an in-memory database +Redis supports both read and write operations. In Redis, writes can be protected from system failure either by being stored periodically in a local snapshot file or in an append-only log file. This is not the case in many caches (which should be considered transitory data stores). + + All writes are asynchronous and do not block clients from reading and writing data. When Redis starts running, it reads the data from the snapshot or log file and uses it to construct the in-memory cache. For more information, see [Redis persistence](http://redis.io/topics/persistence) on the Redis website. + +> [!NOTE] +> Redis does not guarantee that all writes will be saved in the event +> of a catastrophic failure, but at worst you might lose only a few seconds +> worth of data. Remember that a cache is not intended to act as an +> authoritative data source, and it is the responsibility of the applications +> using the cache to ensure that critical data is saved successfully to an +> appropriate data store. For more information, see the [cache-aside pattern](http://msdn.microsoft.com/library/dn589799.aspx). +> +> + +#### Redis data types +Redis is a key-value store, where values can contain simple types or complex data structures such as hashes, lists, and sets. It supports a set of atomic operations on these data types. Keys can be permanent or tagged with a limited time-to-live, at which point the key and its corresponding value are automatically removed from the cache. For more information about Redis keys and values, visit the page [An introduction to Redis data types and abstractions](http://redis.io/topics/data-types-intro) on the Redis website. + +#### Redis replication and clustering +Redis supports master/subordinate replication to help ensure availability and maintain throughput. Write operations to a Redis master node are replicated to one or more subordinate nodes. Read operations can be served by the master or any of the subordinates. + +In the event of a network partition, subordinates can continue to serve data and then transparently resynchronize with the master when the connection is reestablished. For further details, visit the [Replication](http://redis.io/topics/replication) page on the Redis website. + +Redis also provides clustering, which enables you to transparently partition data into shards across servers and spread the load. This feature improves scalability, because new Redis servers can be added and the data repartitioned as the size of the cache increases. + +Furthermore, each server in the cluster can be replicated by using master/subordinate replication. This ensures availability across each node in the cluster. For more information about clustering and sharding, visit the [Redis cluster tutorial page](http://redis.io/topics/cluster-tutorial) on the Redis website. + +### Redis memory use +A Redis cache has a finite size that depends on the resources available on the host computer. When you configure a Redis server, you can specify the maximum amount of memory it can use. You can also configure a key in a Redis cache to have an expiration time, after which it is automatically removed from the cache. This feature can help prevent the in-memory cache from filling with old or stale data. + +As memory fills up, Redis can automatically evict keys and their values by following a number of policies. The default is LRU (least recently used), but you can also select other policies such as evicting keys at random or turning off eviction altogether (in which, case attempts to add items to the cache fail if it is full). The page [Using Redis as an LRU cache](http://redis.io/topics/lru-cache) provides more information. + +### Redis transactions and batches +Redis enables a client application to submit a series of operations that read and write data in the cache as an atomic transaction. All the commands in the transaction are guaranteed to run sequentially, and no commands issued by other concurrent clients will be interwoven between them. + +However, these are not true transactions as a relational database would perform them. Transaction processing consists of two stages--the first is when the commands are queued, and the second is when the commands are run. During the command queuing stage, the commands that comprise the transaction are submitted by the client. If some sort of error occurs at this point (such as a syntax error, or the wrong number of parameters) then Redis refuses to process the entire transaction and discards it. + +During the run phase, Redis performs each queued command in sequence. If a command fails during this phase, Redis continues with the next queued command and does not roll back the effects of any commands that have already been run. This simplified form of transaction helps to maintain performance and avoid performance problems that are caused by contention. + +Redis does implement a form of optimistic locking to assist in maintaining consistency. For detailed information about transactions and locking with Redis, visit the [Transactions page](http://redis.io/topics/transactions) on the Redis website. + +Redis also supports non-transactional batching of requests. The Redis protocol that clients use to send commands to a Redis server enables a client to send a series of operations as part of the same request. This can help to reduce packet fragmentation on the network. When the batch is processed, each command is performed. If any of these commands are malformed, they will be rejected (which doesn't happen with a transaction), but the remaining commands will be performed. There is also no guarantee about the order in which the commands in the batch will be processed. + +### Redis security +Redis is focused purely on providing fast access to data, and is designed to run inside a trusted environment that can be accessed only by trusted clients. Redis supports a limited security model based on password authentication. (It is possible to remove authentication completely, although we don't recommend this.) + +All authenticated clients share the same global password and have access to the same resources. If you need more comprehensive sign-in security, you must implement your own security layer in front of the Redis server, and all client requests should pass through this additional layer. Redis should not be directly exposed to untrusted or unauthenticated clients. + +You can restrict access to commands by disabling them or renaming them (and by providing only privileged clients with the new names). + +Redis does not directly support any form of data encryption, so all encoding must be performed by client applications. Additionally, Redis does not provide any form of transport security. If you need to protect data as it flows across the network, we recommend implementing an SSL proxy. + +For more information, visit the [Redis security](http://redis.io/topics/security) page on the Redis website. + +> [!NOTE] +> Azure Redis Cache provides its own security layer through which clients connect. The underlying Redis +> servers are not exposed to the public network. +> +> + +### Using the Azure Redis cache +The Azure Redis Cache provides access to Redis servers running on servers hosted at an Azure datacenter; it acts as a façade that provides access control and security. You can provision a cache by using the Azure Management portal. The portal provides a number of predefined configurations, ranging from a 53GB cache running as a dedicated service that supports SSL communications (for privacy) and master/subordinate replication with an SLA of 99.9% availability, down to a 250MB cache without replication (no availability guarantees) running on shared hardware. + +Using the Azure Management portal, you can also configure the eviction policy of the cache, and control access to the cache by adding users to the roles provided; Owner, Contributor, Reader. These roles define the operations that members can perform. For example, members of the Owner role have complete control over the cache (including security) and its contents, members of the Contributor role can read and write information in the cache, and members of the Reader role can only retrieve data from the cache. + +Most administrative tasks are performed through the Azure Management portal, and for this reason many of the administrative commands available in the standard version of Redis are not available, including the ability to modify the configuration programmatically, shutdown the Redis server, configure additional slaves, or forcibly save data to disk. + +The Azure management portal includes a convenient graphical display that enables you to monitor the performance of the cache. For example, you can view the number of connections being made, the number of requests performed, the volume of reads and writes, and the number of cache hits versus cache misses. Using this information, you can determine the effectiveness of the cache and if necessary switch to a different configuration or change the eviction policy. Additionally, you can create alerts that send email messages to an administrator if one or more critical metrics fall outside of an expected range. For example, if the number of cache misses exceeds a specified value in the last hour, an administrator could be alerted as the cache may be too small or data may be being evicted too quickly. + +You can also monitor CPU, memory, and network usage for the cache. + +For further information and examples showing how to create and configure an Azure Redis Cache, visit the page [Lap around Azure Redis Cache](https://azure.microsoft.com/blog/2014/06/04/lap-around-azure-redis-cache-preview/) on the Azure blog. + +## Caching session state and HTML output +If you building ASP.NET web applications that run by using Azure web roles, you can save session state information and HTML output in an Azure Redis Cache. The Session State Provider for Azure Redis Cache enables you to share session information between different instances of an ASP.NET web application, and is very useful in web farm situations where client-server affinity is not available and caching session data in-memory would not be appropriate. + +Using the Session State Provider with Azure Redis Cache delivers several benefits, including: + +* It can share session state amongst a large number of instances of an ASP.NET web application, providing improved scalability, +* It supports controlled, concurrent access to the same session state data for multiple readers and a single writer, and +* It can use compression to save memory and improve network performance. + +For more information visit the [ASP.NET Session State Provider for Azure Redis Cache](/azure/redis-cache/cache-aspnet-session-state-provider/) page on the Microsoft website. + +> [!NOTE] +> Do not use the Session State Provider for Azure Redis Cache for ASP.NET applications that run outside of the Azure environment. The latency of accessing the cache from outside of Azure can eliminate the performance benefits of caching data. +> +> + +Similarly, the Output Cache Provider for Azure Redis Cache enables you to save the HTTP responses generated by an ASP.NET web application. Using the Output Cache Provider with Azure Redis Cache can improve the response times of applications that render complex HTML output; application instances generating similar responses can make use of the shared output fragments in the cache rather than generating this HTML output afresh. For more information visit the [ASP.NET Output Cache Provider for Azure Redis Cache](/azure/redis-cache/cache-aspnet-output-cache-provider/) page on the Microsoft website. + +### Azure Redis cache +Azure Redis Cache provides access to Redis servers that are hosted at an Azure datacenter. It acts as a façade that provides access control and security. You can provision a cache by using the Azure portal. + +The portal provides a number of predefined configurations. These range from a 53 GB cache running as a dedicated service that supports SSL communications (for privacy) and master/subordinate replication with an SLA of 99.9% availability, down to a 25 0MB cache without replication (no availability guarantees) running on shared hardware. + +Using the Azure portal, you can also configure the eviction policy of the cache, and control access to the cache by adding users to the roles provided. These roles, which define the operations that members can perform, include Owner, Contributor, and Reader. For example, members of the Owner role have complete control over the cache (including security) and its contents, members of the Contributor role can read and write information in the cache, and members of the Reader role can only retrieve data from the cache. + +Most administrative tasks are performed through the Azure portal. For this reason, many of the administrative commands that are available in the standard version of Redis are not available, including the ability to modify the configuration programmatically, shut down the Redis server, configure additional subordinates, or forcibly save data to disk. + +The Azure portal includes a convenient graphical display that enables you to monitor the performance of the cache. For example, you can view the number of connections being made, the number of requests being performed, the volume of reads and writes, and the number of cache hits versus cache misses. Using this information, you can determine the effectiveness of the cache and if necessary, switch to a different configuration or change the eviction policy. + +Additionally, you can create alerts that send email messages to an administrator if one or more critical metrics fall outside of an expected range. For example, you might want to alert an administrator if the number of cache misses exceeds a specified value in the last hour, because it means the cache might be too small or data might be being evicted too quickly. + +You can also monitor the CPU, memory, and network usage for the cache. + +For further information and examples showing how to create and configure an Azure Redis Cache, visit the page [Lap around Azure Redis Cache](https://azure.microsoft.com/blog/2014/06/04/lap-around-azure-redis-cache-preview/) on the Azure blog. + +## Caching session state and HTML output +If you're building ASP.NET web applications that run by using Azure web roles, you can save session state information and HTML output in an Azure Redis Cache. The session state provider for Azure Redis Cache enables you to share session information between different instances of an ASP.NET web application, and is very useful in web farm situations where client-server affinity is not available and caching session data in-memory would not be appropriate. + +Using the session state provider with Azure Redis Cache delivers several benefits, including: + +* Sharing session state with a large number of instances of ASP.NET web applications. +* Providing improved scalability. +* Supporting controlled, concurrent access to the same session state data for multiple readers and a single writer. +* Using compression to save memory and improve network performance. + +For more information, visit the [ASP.NET session state provider for Azure Redis Cache](/azure/redis-cache/cache-aspnet-session-state-provider/) page on the Microsoft website. + +> [!NOTE] +> Do not use the session state provider for Azure Redis Cache with ASP.NET applications that run outside of the Azure environment. The latency of accessing the cache from outside of Azure can eliminate the performance benefits of caching data. +> +> + +Similarly, the output cache provider for Azure Redis Cache enables you to save the HTTP responses generated by an ASP.NET web application. Using the output cache provider with Azure Redis Cache can improve the response times of applications that render complex HTML output. Application instances that generate similar responses can make use of the shared output fragments in the cache rather than generating this HTML output afresh. For more information, visit the [ASP.NET output cache provider for Azure Redis Cache](/azure/redis-cache/cache-aspnet-output-cache-provider/) page on the Microsoft website. + +## Building a custom Redis cache +Azure Redis Cache acts as a façade to the underlying Redis servers. Currently it supports a fixed set of configurations but does not provide for Redis clustering. If you require an advanced configuration that is not covered by the Azure Redis cache (such as a cache bigger than 53 GB) you can build and host your own Redis servers by using Azure virtual machines. + +This is a potentially complex process because you might need to create several VMs to act as master and subordinate nodes if you want to implement replication. Furthermore, if you wish to create a cluster, then you need multiple masters and subordinate servers. A minimal clustered replication topology that provides a high degree of availability and scalability comprises at least six VMs organized as three pairs of master/subordinate servers (a cluster must contain at least three master nodes). + +Each master/subordinate pair should be located close together to minimize latency. However, each set of pairs can be running in different Azure datacenters located in different regions, if you wish to locate cached data close to the applications that are most likely to use it. The page [Running Redis on a CentOS Linux VM in Azure](http://blogs.msdn.com/b/tconte/archive/2012/06/08/running-redis-on-a-centos-linux-vm-in-windows-azure.aspx) on the Microsoft website walks through an example that shows how to build and configure a Redis node running as an Azure VM. + +[AZURE.NOTE] Please note that if you implement your own Redis cache in this way, you are responsible for monitoring, managing, and securing the service. + +## Partitioning a Redis cache +Partitioning the cache involves splitting the cache across multiple computers. This structure gives you several advantages over using a single cache server, including: + +* Creating a cache that is much bigger than can be stored on a single server. +* Distributing data across servers, improving availability. If one server fails or becomes inaccessible, the data that it holds is unavailable, but the data on the remaining servers can still be accessed. For a cache, this is not crucial because the cached data is only a transient copy of the data that's held in a database. Cached data on a server that becomes inaccessible can be cached on a different server instead. +* Spreading the load across servers, thereby improving performance and scalability. +* Geolocating data close to the users that access it, thus reducing latency. + +For a cache, the most common form of partitioning is sharding. In this strategy, each partition (or shard) is a Redis cache in its own right. Data is directed to a specific partition by using sharding logic, which can use a variety of approaches to distribute the data. The [Sharding pattern](http://msdn.microsoft.com/library/dn589797.aspx) provides more information about implementing sharding. + +To implement partitioning in a Redis cache, you can take one of the following approaches: + +* *Server-side query routing.* In this technique, a client application sends a request to any of the + Redis servers that comprise the cache (probably the closest server). Each Redis server stores + metadata that describes the partition that it holds, and also contains information about which + partitions are located on other servers. The Redis server examines the client request. If it + can be resolved locally, it will perform the requested operation. Otherwise it will forward the + request on to the appropriate server. This model is implemented by Redis clustering, and is + described in more detail on the [Redis cluster tutorial](http://redis.io/topics/cluster-tutorial) page on the Redis website. Redis clustering + is transparent to client applications, and additional Redis servers can be added to the cluster + (and the data re-partitioned) without requiring that you reconfigure the clients. +* *Client-side partitioning.* In this model, the client application contains logic (possibly in + the form of a library) that routes requests to the appropriate Redis server. This approach + can be used with Azure Redis Cache. Create multiple Azure Redis Caches (one for each data + partition) and implement the client-side logic that routes the requests to the correct + cache. If the partitioning scheme changes (if additional Azure Redis Caches are created, + for example), client applications might need to be reconfigured. +* *Proxy-assisted partitioning.* In this scheme, client applications send requests to an + intermediary proxy service which understands how the data is partitioned and then routes + the request to the appropriate Redis server. This approach can also be used with Azure + Redis Cache; the proxy service can be implemented as an Azure cloud service. This + approach requires an additional level of complexity to implement the service, and + requests might take longer to perform than using client-side partitioning. + +The page [Partitioning: how to split data among multiple Redis instances](http://redis.io/topics/partitioning) +on the Redis website provides further information about implementing partitioning with Redis. + +### Implement Redis cache client applications +Redis supports client applications written in numerous programming languages. If you are building new applications by using the .NET Framework, the recommended approach is to use the StackExchange.Redis client library. This library provides a .NET Framework object model that abstracts the details for connecting to a Redis server, sending commands, and receiving responses. It is available in Visual Studio as a NuGet package. You can use this same library to connect to an Azure Redis Cache, or a custom Redis cache hosted on a VM. + +To connect to a Redis server you use the static `Connect` method of the `ConnectionMultiplexer` class. The connection that this method creates is designed to be used throughout the lifetime of the client application, and the same connection can be used by multiple concurrent threads. Do not reconnect and disconnect each time you perform a Redis operation because this can degrade performance. + +You can specify the connection parameters, such as the address of the Redis host and the password. If you are using Azure Redis Cache, the password is either the primary or secondary key that is generated for Azure Redis Cache by using the Azure Management portal. + +After you have connected to the Redis server, you can obtain a handle on the Redis database that acts as the cache. The Redis connection provides the `GetDatabase` method to do this. You can then retrieve items from the cache and store data in the cache by using the `StringGet` and `StringSet` methods. These methods expect a key as a parameter, and return the item either in the cache that has a matching value (`StringGet`) or add the item to the cache with this key (`StringSet`). + +Depending on the location of the Redis server, many operations might incur some latency while a request is transmitted to the server and a response is returned to the client. The StackExchange library provides asynchronous versions of many of the methods that it exposes to help client applications remain responsive. These methods support the [Task-based Asynchronous Pattern](http://msdn.microsoft.com/library/hh873175.aspx) in the .NET Framework. + +The following code snippet shows a method named `RetrieveItem`. It illustrates an implementation of the cache-aside pattern based on Redis and the StackExchange library. The method takes a string key value and attempts to retrieve the corresponding item from the Redis cache by calling the `StringGetAsync` method (the asynchronous version of `StringGet`). + +If the item is not found, it is fetched from the underlying data source using the `GetItemFromDataSourceAsync` method (which is a local method and not part of the StackExchange library). It's then added to the cache by using the `StringSetAsync` method so it can be retrieved more quickly next time. + +```csharp +// Connect to the Azure Redis cache +ConfigurationOptions config = new ConfigurationOptions(); +config.EndPoints.Add(".redis.cache.windows.net"); +config.Password = ""; +ConnectionMultiplexer redisHostConnection = ConnectionMultiplexer.Connect(config); +IDatabase cache = redisHostConnection.GetDatabase(); +... +private async Task RetrieveItem(string itemKey) +{ + // Attempt to retrieve the item from the Redis cache + string itemValue = await cache.StringGetAsync(itemKey); + + // If the value returned is null, the item was not found in the cache + // So retrieve the item from the data source and add it to the cache + if (itemValue == null) + { + itemValue = await GetItemFromDataSourceAsync(itemKey); + await cache.StringSetAsync(itemKey, itemValue); + } + + // Return the item + return itemValue; +} +``` + +The `StringGet` and `StringSet` methods are not restricted to retrieving or storing string values. They can take any item that is serialized as an array of bytes. If you need to save a .NET object, you can serialize it as a byte stream and use the `StringSet` method to write it to the cache. + +Similarly, you can read an object from the cache by using the `StringGet` method and deserializing it as a .NET object. The following code shows a set of extension methods for the IDatabase interface (the `GetDatabase` method of a Redis connection returns an `IDatabase` object), and some sample code that uses these methods to read and write a `BlogPost` object to the cache: + +```csharp +public static class RedisCacheExtensions +{ + public static async Task GetAsync(this IDatabase cache, string key) + { + return Deserialize(await cache.StringGetAsync(key)); + } + + public static async Task GetAsync(this IDatabase cache, string key) + { + return Deserialize(await cache.StringGetAsync(key)); + } + + public static async Task SetAsync(this IDatabase cache, string key, object value) + { + await cache.StringSetAsync(key, Serialize(value)); + } + + static byte[] Serialize(object o) + { + byte[] objectDataAsStream = null; + + if (o != null) + { + BinaryFormatter binaryFormatter = new BinaryFormatter(); + using (MemoryStream memoryStream = new MemoryStream()) + { + binaryFormatter.Serialize(memoryStream, o); + objectDataAsStream = memoryStream.ToArray(); + } + } + + return objectDataAsStream; + } + + static T Deserialize(byte[] stream) + { + T result = default(T); + + if (stream != null) + { + BinaryFormatter binaryFormatter = new BinaryFormatter(); + using (MemoryStream memoryStream = new MemoryStream(stream)) + { + result = (T)binaryFormatter.Deserialize(memoryStream); + } + } + + return result; + } +} +``` + +The following code illustrates a method named `RetrieveBlogPost` that uses these extension methods to read and write a serializable `BlogPost` object to the cache following the cache-aside pattern: + +```csharp +// The BlogPost type +[Serializable] +private class BlogPost +{ + private HashSet tags = new HashSet(); + + public BlogPost(int id, string title, int score, IEnumerable tags) + { + this.Id = id; + this.Title = title; + this.Score = score; + this.tags = new HashSet(tags); + } + + public int Id { get; set; } + public string Title { get; set; } + public int Score { get; set; } + public ICollection Tags { get { return this.tags; } } +} +... +private async Task RetrieveBlogPost(string blogPostKey) +{ + BlogPost blogPost = await cache.GetAsync(blogPostKey); + if (blogPost == null) + { + blogPost = await GetBlogPostFromDataSourceAsync(blogPostKey); + await cache.SetAsync(blogPostKey, blogPost); + } + + return blogPost; +} +``` + +Redis supports command pipelining if a client application sends multiple asynchronous requests. Redis can multiplex the requests using the same connection rather than receiving and responding to commands in a strict sequence. + +This approach helps to reduce latency by making more efficient use of the network. The following code snippet shows an example that retrieves the details of two customers concurrently. The code submits two requests and then performs some other processing (not shown) before waiting to receive the results. The `Wait` method of the cache object is similar to the .NET Framework `Task.Wait` method: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +var task1 = cache.StringGetAsync("customer:1"); +var task2 = cache.StringGetAsync("customer:2"); +... +var customer1 = cache.Wait(task1); +var customer2 = cache.Wait(task2); +``` + +The page [Azure Redis Cache documentation](https://azure.microsoft.com/documentation/services/cache/) on the Microsoft website provides more information about how to write client applications that can use the Azure Redis Cache. Additional information is available on the [Basic usage page](https://github.com/StackExchange/StackExchange.Redis/blob/master/Docs/Basics.md) on the StackExchange.Redis website. + +The page [Pipelines and multiplexers](https://github.com/StackExchange/StackExchange.Redis/blob/master/Docs/PipelinesMultiplexers.md) on the same website provides more information about asynchronous operations and pipelining with Redis and the StackExchange library. The next section in this article, Using Redis Caching, provides examples of some of the more advanced techniques that you can apply to data that's held in a Redis cache. + +## Using Redis caching +The simplest use of Redis for caching concerns is key-value pairs where the value is an uninterpreted string of arbitrary length that can contain any binary data. (It is essentially an array of bytes that can be treated as a string). This scenario was illustrated in the section Implement Redis Cache client applications earlier in this article. + +Note that keys also contain uninterpreted data, so you can use any binary information as the key. The longer the key is, however, the more space it will take to store, and the longer it will take to perform lookup operations. For usability and ease of maintenance, design your keyspace carefully and use meaningful (but not verbose) keys. + +For example, use structured keys such as "customer:100" to represent the key for the customer with ID 100 rather than simply "100". This scheme enables you to easily distinguish between values that store different data types. For example, you could also use the key "orders:100" to represent the key for the order with ID 100. + +Apart from one-dimensional binary strings, a value in a Redis key-value pair can also hold more structured information, including lists, sets (sorted and unsorted), and hashes. Redis provides a comprehensive command set that can manipulate these types, and many of these commands are available to .NET Framework applications through a client library such as StackExchange. The page [An introduction to Redis data types and abstractions](http://redis.io/topics/data-types-intro) on the Redis website provides a more detailed overview of these types and the commands that you can use to manipulate them. + +This section summarizes some common use cases for these data types and commands. + +### Perform atomic and batch operations +Redis supports a series of atomic get-and-set operations on string values. These operations remove the possible race hazards that might occur when using separate `GET` and `SET` commands. The operations that are available include: + +* `INCR`, `INCRBY`, `DECR`, and `DECRBY`, which perform atomic increment and decrement operations on + integer numeric data values. The StackExchange library provides overloaded versions of the + `IDatabase.StringIncrementAsync` and `IDatabase.StringDecrementAsync` methods to perform + these operations and return the resulting value that is stored in the cache. The following code + snippet illustrates how to use these methods: + + ```csharp + ConnectionMultiplexer redisHostConnection = ...; + IDatabase cache = redisHostConnection.GetDatabase(); + ... + await cache.StringSetAsync("data:counter", 99); + ... + long oldValue = await cache.StringIncrementAsync("data:counter"); + // Increment by 1 (the default) + // oldValue should be 100 + + long newValue = await cache.StringDecrementAsync("data:counter", 50); + // Decrement by 50 + // newValue should be 50 + ``` +* `GETSET`, which retrieves the value that's associated with a key and changes it to a new value. The + StackExchange library makes this operation available through the `IDatabase.StringGetSetAsync` + method. The code snippet below shows an example of this method. This code returns the current + value that's associated with the key "data:counter" from the previous example. Then it resets the value + for this key back to zero, all as part of the same operation: + + ```csharp + ConnectionMultiplexer redisHostConnection = ...; + IDatabase cache = redisHostConnection.GetDatabase(); + ... + string oldValue = await cache.StringGetSetAsync("data:counter", 0); + ``` +* `MGET` and `MSET`, which can return or change a set of string values as a single operation. The + `IDatabase.StringGetAsync` and `IDatabase.StringSetAsync` methods are overloaded to support + this functionality, as shown in the following example: + + ```csharp + ConnectionMultiplexer redisHostConnection = ...; + IDatabase cache = redisHostConnection.GetDatabase(); + ... + // Create a list of key-value pairs + var keysAndValues = + new List>() + { + new KeyValuePair("data:key1", "value1"), + new KeyValuePair("data:key99", "value2"), + new KeyValuePair("data:key322", "value3") + }; + + // Store the list of key-value pairs in the cache + cache.StringSet(keysAndValues.ToArray()); + ... + // Find all values that match a list of keys + RedisKey[] keys = { "data:key1", "data:key99", "data:key322"}; + RedisValue[] values = null; + values = cache.StringGet(keys); + // values should contain { "value1", "value2", "value3" } + ``` + +You can also combine multiple operations into a single Redis transaction as described in the Redis transactions and batches section earlier in this article. The StackExchange library provides support for transactions through the `ITransaction` interface. + +You create an `ITransaction` object by using the `IDatabase.CreateTransaction` method. You invoke commands to the transaction by using the methods provided by the `ITransaction` object. + +The `ITransaction` interface provides access to a set of methods that's similar to those accessed by the `IDatabase` interface, except that all the methods are asynchronous. This means that they are only performed when the `ITransaction.Execute` method is invoked. The value that's returned by the `ITransaction.Execute` method indicates whether the transaction was created successfully (true) or if it failed (false). + +The following code snippet shows an example that increments and decrements two counters as part of the same transaction: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +ITransaction transaction = cache.CreateTransaction(); +var tx1 = transaction.StringIncrementAsync("data:counter1"); +var tx2 = transaction.StringDecrementAsync("data:counter2"); +bool result = transaction.Execute(); +Console.WriteLine("Transaction {0}", result ? "succeeded" : "failed"); +Console.WriteLine("Result of increment: {0}", tx1.Result); +Console.WriteLine("Result of decrement: {0}", tx2.Result); +``` + +Remember that Redis transactions are unlike transactions in relational databases. The `Execute` method simply queues all the commands that comprise the transaction to be run, and if any of them is malformed then the transaction is stopped. If all the commands have been queued successfully, each command runs asynchronously. + +If any command fails, the others still continue processing. If you need to verify that a command has completed successfully, you must fetch the results of the command by using the **Result** property of the corresponding task, as shown in the example above. Reading the **Result** property will block the calling thread until the task has completed. + +For more information, see the [Transactions in Redis](https://github.com/StackExchange/StackExchange.Redis/blob/master/Docs/Transactions.md) page on the StackExchange.Redis website. + +When performing batch operations, you can use the `IBatch` interface of the StackExchange library. This interface provides access to a set of methods similar to those accessed by the `IDatabase` interface, except that all the methods are asynchronous. + +You create an `IBatch` object by using the `IDatabase.CreateBatch` method, and then run the batch by using the `IBatch.Execute` method, as shown in the following example. This code simply sets a string value, increments and decrements the same counters used in the previous example, and displays the results: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +IBatch batch = cache.CreateBatch(); +batch.StringSetAsync("data:key1", 11); +var t1 = batch.StringIncrementAsync("data:counter1"); +var t2 = batch.StringDecrementAsync("data:counter2"); +batch.Execute(); +Console.WriteLine("{0}", t1.Result); +Console.WriteLine("{0}", t2.Result); +``` + +It is important to understand that unlike a transaction, if a command in a batch fails because it is malformed, the other commands might still run. The `IBatch.Execute` method does not return any indication of success or failure. + +### Perform fire and forget cache operations +Redis supports fire and forget operations by using command flags. In this situation, the client simply initiates an operation but has no interest in the result and does not wait for the command to be completed. The example below shows how to perform the INCR command as a fire and forget operation: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +await cache.StringSetAsync("data:key1", 99); +... +cache.StringIncrement("data:key1", flags: CommandFlags.FireAndForget); +``` + +### Specify automatically expiring keys +When you store an item in a Redis cache, you can specify a timeout after which the item will be automatically removed from the cache. You can also query how much more time a key has before it expires by using the `TTL` command. This command is available to StackExchange applications by using the `IDatabase.KeyTimeToLive` method. + +The following code snippet shows how to set an expiration time of 20 seconds on a key, and query the remaining lifetime of the key: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +// Add a key with an expiration time of 20 seconds +await cache.StringSetAsync("data:key1", 99, TimeSpan.FromSeconds(20)); +... +// Query how much time a key has left to live +// If the key has already expired, the KeyTimeToLive function returns a null +TimeSpan? expiry = cache.KeyTimeToLive("data:key1"); +``` + +You can also set the expiration time to a specific date and time by using the EXPIRE command, which is available in the StackExchange library as the `KeyExpireAsync` method: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +// Add a key with an expiration date of midnight on 1st January 2015 +await cache.StringSetAsync("data:key1", 99); +await cache.KeyExpireAsync("data:key1", + new DateTime(2015, 1, 1, 0, 0, 0, DateTimeKind.Utc)); +... +``` + +> *Tip:* You can manually remove an item from the cache by using the DEL command, which is available through the StackExchange library as the `IDatabase.KeyDeleteAsync` method. +> +> + +### Use tags to cross-correlate cached items +A Redis set is a collection of multiple items that share a single key. You can create a set by using the SADD command. You can retrieve the items in a set by using the SMEMBERS command. The StackExchange library implements the SADD command with the `IDatabase.SetAddAsync` method, and the SMEMBERS command with the `IDatabase.SetMembersAsync` method. + +You can also combine existing sets to create new sets by using the SDIFF (set difference), SINTER (set intersection), and SUNION (set union) commands. The StackExchange library unifies these operations in the `IDatabase.SetCombineAsync` method. The first parameter to this method specifies the set operation to perform. + +The following code snippets show how sets can be useful for quickly storing and retrieving collections of related items. This code uses the `BlogPost` type that was described in the section Implement Redis Cache Client Applications earlier in this article. + +A `BlogPost` object contains four fields—an ID, a title, a ranking score, and a collection of tags. The first code snippet below shows the sample data that's used for populating a C# list of `BlogPost` objects: + +```csharp +List tags = new List() +{ + new string[] { "iot","csharp" }, + new string[] { "iot","azure","csharp" }, + new string[] { "csharp","git","big data" }, + new string[] { "iot","git","database" }, + new string[] { "database","git" }, + new string[] { "csharp","database" }, + new string[] { "iot" }, + new string[] { "iot","database","git" }, + new string[] { "azure","database","big data","git","csharp" }, + new string[] { "azure" } +}; + +List posts = new List(); +int blogKey = 0; +int blogPostId = 0; +int numberOfPosts = 20; +Random random = new Random(); +for (int i = 0; i < numberOfPosts; i++) +{ + blogPostId = blogKey++; + posts.Add(new BlogPost( + blogPostId, // Blog post ID + string.Format(CultureInfo.InvariantCulture, "Blog Post #{0}", + blogPostId), // Blog post title + random.Next(100, 10000), // Ranking score + tags[i % tags.Count])); // Tags--assigned from a collection + // in the tags list +} +``` + +You can store the tags for each `BlogPost` object as a set in a Redis cache and associate each set with the ID of the `BlogPost`. This enables an application to quickly find all the tags that belong to a specific blog post. To enable searching in the opposite direction and find all blog posts that share a specific tag, you can create another set that holds the blog posts referencing the tag ID in the key: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +// Tags are easily represented as Redis Sets +foreach (BlogPost post in posts) +{ + string redisKey = string.Format(CultureInfo.InvariantCulture, + "blog:posts:{0}:tags", post.Id); + // Add tags to the blog post in Redis + await cache.SetAddAsync( + redisKey, post.Tags.Select(s => (RedisValue)s).ToArray()); + + // Now do the inverse so we can figure how which blog posts have a given tag + foreach (var tag in post.Tags) + { + await cache.SetAddAsync(string.Format(CultureInfo.InvariantCulture, + "tag:{0}:blog:posts", tag), post.Id); + } +} +``` + +These structures enable you to perform many common queries very efficiently. For example, you can find and display all of the tags for blog post 1 like this: + +```csharp +// Show the tags for blog post #1 +foreach (var value in await cache.SetMembersAsync("blog:posts:1:tags")) +{ + Console.WriteLine(value); +} +``` + +You can find all tags that are common to blog post 1 and blog post 2 by performing a set intersection operation, as follows: + +```csharp +// Show the tags in common for blog posts #1 and #2 +foreach (var value in await cache.SetCombineAsync(SetOperation.Intersect, new RedisKey[] + { "blog:posts:1:tags", "blog:posts:2:tags" })) +{ + Console.WriteLine(value); +} +``` + +And you can find all blog posts that contain a specific tag: + +```csharp +// Show the ids of the blog posts that have the tag "iot". +foreach (var value in await cache.SetMembersAsync("tag:iot:blog:posts")) +{ + Console.WriteLine(value); +} +``` + +### Find recently accessed items +A common task required of many applications is to find the most recently accessed items. For example, a blogging site might want to display information about the most recently read blog posts. + +You can implement this functionality by using a Redis list. A Redis list contains multiple items that share the same key. The list acts as a double-ended queue. You can push items to either end of the list by using the LPUSH (left push) and RPUSH (right push) commands. You can retrieve items from either end of the list by using the LPOP and RPOP commands. You can also return a set of elements by using the LRANGE and RRANGE commands. + +The code snippets below show how you can perform these operations by using the StackExchange library. This code uses the `BlogPost` type from the previous examples. As a blog post is read by a user, the `IDatabase.ListLeftPushAsync` method pushes the title of the blog post onto a list that's associated with the key "blog:recent_posts" in the Redis cache. + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +string redisKey = "blog:recent_posts"; +BlogPost blogPost = ...; // Reference to the blog post that has just been read +await cache.ListLeftPushAsync( + redisKey, blogPost.Title); // Push the blog post onto the list +``` + +As more blog posts are read, their titles are pushed onto the same list. The list is ordered by the sequence in which the titles have been added. The most recently read blog posts are towards the left end of the list. (If the same blog post is read more than once, it will have multiple entries in the list.) + +You can display the titles of the most recently read posts by using the `IDatabase.ListRange` method. This method takes the key that contains the list, a starting point, and an ending point. The following code retrieves the titles of the 10 blog posts (items from 0 to 9) at the left-most end of the list: + +```csharp +// Show latest ten posts +foreach (string postTitle in await cache.ListRangeAsync(redisKey, 0, 9)) +{ + Console.WriteLine(postTitle); +} +``` + +Note that the `ListRangeAsync` method does not remove items from the list. To do this, you can use the `IDatabase.ListLeftPopAsync` and `IDatabase.ListRightPopAsync` methods. + +To prevent the list from growing indefinitely, you can periodically cull items by trimming the list. The code snippet below shows you how to remove all but the five left-most items from the list: + +```csharp +await cache.ListTrimAsync(redisKey, 0, 5); +``` + +### Implement a leader board +By default, the items in a set are not held in any specific order. You can create an ordered set by using the ZADD command (the `IDatabase.SortedSetAdd` method in the StackExchange library). The items are ordered by using a numeric value called a score, which is provided as a parameter to the command. + +The following code snippet adds the title of a blog post to an ordered list. In this example, each blog post also has a score field that contains the ranking of the blog post. + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +IDatabase cache = redisHostConnection.GetDatabase(); +... +string redisKey = "blog:post_rankings"; +BlogPost blogPost = ...; // Reference to a blog post that has just been rated +await cache.SortedSetAddAsync(redisKey, blogPost.Title, blogpost.Score); +``` + +You can retrieve the blog post titles and scores in ascending score order by using the `IDatabase.SortedSetRangeByRankWithScores` method: + +```csharp +foreach (var post in await cache.SortedSetRangeByRankWithScoresAsync(redisKey)) +{ + Console.WriteLine(post); +} +``` + +> [!NOTE] +> The StackExchange library also provides the `IDatabase.SortedSetRangeByRankAsync` method, which returns the data in score order, but does not return the scores. +> +> + +You can also retrieve items in descending order of scores, and limit the number of items that are returned by providing additional parameters to the `IDatabase.SortedSetRangeByRankWithScoresAsync` method. The next example displays the titles and scores of the top 10 ranked blog posts: + +```csharp +foreach (var post in await cache.SortedSetRangeByRankWithScoresAsync( + redisKey, 0, 9, Order.Descending)) +{ + Console.WriteLine(post); +} +``` + +The next example uses the `IDatabase.SortedSetRangeByScoreWithScoresAsync` method, which you can use to limit the items that are returned to those that fall within a given score range: + +```csharp +// Blog posts with scores between 5000 and 100000 +foreach (var post in await cache.SortedSetRangeByScoreWithScoresAsync( + redisKey, 5000, 100000)) +{ + Console.WriteLine(post); +} +``` + +### Message by using channels +Apart from acting as a data cache, a Redis server provides messaging through a high-performance publisher/subscriber mechanism. Client applications can subscribe to a channel, and other applications or services can publish messages to the channel. Subscribing applications will then receive these messages and can process them. + +Redis provides the SUBSCRIBE command for client applications to use to subscribe to channels. This command expects the name of one or more channels on which the application will accept messages. The StackExchange library includes the `ISubscription` interface, which enables a .NET Framework application to subscribe and publish to channels. + +You create an `ISubscription` object by using the `GetSubscriber` method of the connection to the Redis server. Then you listen for messages on a channel by using the `SubscribeAsync` method of this object. The following code example shows how to subscribe to a channel named "messages:blogPosts": + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +ISubscriber subscriber = redisHostConnection.GetSubscriber(); +... +await subscriber.SubscribeAsync("messages:blogPosts", (channel, message) => +{ + Console.WriteLine("Title is: {0}", message); +}); +``` + +The first parameter to the `Subscribe` method is the name of the channel. This name follows the same conventions that are used by keys in the cache. The name can contain any binary data, although it is advisable to use relatively short, meaningful strings to help ensure good performance and maintainability. + +Note also that the namespace used by channels is separate from that used by keys. This means you can have channels and keys that have the same name, although this may make your application code more difficult to maintain. + +The second parameter is an Action delegate. This delegate runs asynchronously whenever a new message appears on the channel. This example simply displays the message on the console (the message will contain the title of a blog post). + +To publish to a channel, an application can use the Redis PUBLISH command. The StackExchange library provides the `IServer.PublishAsync` method to perform this operation. The next code snippet shows how to publish a message to the "messages:blogPosts" channel: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +ISubscriber subscriber = redisHostConnection.GetSubscriber(); +... +BlogPost blogpost = ...; +subscriber.PublishAsync("messages:blogPosts", blogPost.Title); +``` + +There are several points you should understand about the publish/subscribe mechanism: + +* Multiple subscribers can subscribe to the same channel, and they will all receive the messages that are published to that channel. +* Subscribers only receive messages that have been published after they have subscribed. Channels are not buffered, and once a message is published, the Redis infrastructure pushes the message to each subscriber and then removes it. +* By default, messages are received by subscribers in the order in which they are sent. In a highly active system with a large number + of messages and many subscribers and publishers, guaranteed sequential delivery of messages can slow performance of the system. If + each message is independent and the order is unimportant, you can enable concurrent processing by the Redis system, which can help to + improve responsiveness. You can achieve this in a StackExchange client by setting the PreserveAsyncOrder of the connection used by + the subscriber to false: + +```csharp +ConnectionMultiplexer redisHostConnection = ...; +redisHostConnection.PreserveAsyncOrder = false; +ISubscriber subscriber = redisHostConnection.GetSubscriber(); +``` + +## Related patterns and guidance +The following pattern might also be relevant to your scenario when you implement caching in your applications: + +* [Cache-aside pattern](http://msdn.microsoft.com/library/dn589799.aspx): This pattern describes how to load data on demand into a cache from a data store. This pattern also helps to maintain consistency between data that's held in the cache and the data in the original data store. +* The [Sharding pattern](http://msdn.microsoft.com/library/dn589797.aspx) provides information about implementing horizontal partitioning to help improve scalability when storing and accessing large volumes of data. + +## More information +* The [MemoryCache class](http://msdn.microsoft.com/library/system.runtime.caching.memorycache.aspx) page on the Microsoft website +* The [Azure Redis Cache documentation](https://azure.microsoft.com/documentation/services/cache/) page on the Microsoft website +* The [Azure Redis Cache FAQ](/azure/redis-cache/cache-faq/) page on the Microsoft website +* The [Configuration model](http://msdn.microsoft.com/library/windowsazure/hh914149.aspx) page on the Microsoft website +* The [Task-based Asynchronous Pattern](http://msdn.microsoft.com/library/hh873175.aspx) page on the Microsoft website +* The [Pipelines and multiplexers](https://github.com/StackExchange/StackExchange.Redis/blob/master/Docs/PipelinesMultiplexers.md) page on the StackExchange.Redis GitHub repo +* The [Redis persistence](http://redis.io/topics/persistence) page on the Redis website +* The [Replication page](http://redis.io/topics/replication) on the Redis website +* The [Redis cluster tutorial](http://redis.io/topics/cluster-tutorial) page on the Redis website +* The [Partitioning: how to split data among multiple Redis instances](http://redis.io/topics/partitioning) page on the Redis website +* The [Using Redis as an LRU Cache](http://redis.io/topics/lru-cache) page on the Redis website +* The [Transactions](http://redis.io/topics/transactions) page on the Redis website +* The [Redis security](http://redis.io/topics/security) page on the Redis website +* The [Lap around Azure Redis Cache](https://azure.microsoft.com/blog/2014/06/04/lap-around-azure-redis-cache-preview/) page on the Azure blog +* The [Running Redis on a CentOS Linux VM in Azure](http://blogs.msdn.com/b/tconte/archive/2012/06/08/running-redis-on-a-centos-linux-vm-in-windows-azure.aspx) page on the Microsoft website +* The [ASP.NET session state provider for Azure Redis Cache](/azure/redis-cache/cache-aspnet-session-state-provider/) page on the Microsoft website +* The [ASP.NET output cache provider for Azure Redis Cache](/azure/redis-cache/cache-aspnet-output-cache-provider/) page on the Microsoft website +* The [An Introduction to Redis data types and abstractions](http://redis.io/topics/data-types-intro) page on the Redis website +* The [Basic usage](https://github.com/StackExchange/StackExchange.Redis/blob/master/Docs/Basics.md) page on the StackExchange.Redis website +* The [Transactions in Redis](https://github.com/StackExchange/StackExchange.Redis/blob/master/Docs/Transactions.md) page on the StackExchange.Redis repo +* The [Data partitioning guide](http://msdn.microsoft.com/library/dn589795.aspx) on the Microsoft website + diff --git a/docs/best-practices/cdn.md b/docs/best-practices/cdn.md new file mode 100644 index 00000000000..3954a9ac60e --- /dev/null +++ b/docs/best-practices/cdn.md @@ -0,0 +1,244 @@ +--- +title: Content Delivery Network guidance +description: Guidance on Content Delivery Network (CDN) to deliver high bandwidth content hosted in Azure. +services: cdn +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: 57df0e00-d540-46e2-930e-f800c2301bf4 +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/30/2016 +ms.author: masashin + +--- +# Content Delivery Network +[!INCLUDE [header](../_includes/header.md)] + +The Microsoft Azure Content Delivery Network (CDN) offers developers a global solution for delivering high-bandwidth content that is hosted in Azure or any other location. Using the CDN, you can cache publicly available objects loaded from Azure blob storage, a web application, virtual machine, application folder, or other HTTP/HTTPS location. The CDN cache can be held at strategic locations to provide maximum bandwidth for delivering content to users. The CDN is typically used for delivering static content such as images, style sheets, documents, files, client-side scripts, and HTML pages. + +You can also use the CDN as a cache for serving dynamic content, such as a PDF report or graph based on specified inputs; if the same input values are provided by different users the result should be the same. + +The major advantages of using the CDN are lower latency and faster delivery of content to users irrespective of their geographical location in relation to the datacenter where the application is hosted. + +![CDN diagram](./images/cdn/CDN.png) + +Using the CDN should also help to reduce the load on application because it is relieved of the processing required to access and deliver the content. This reduction in load can help to increase the performance and scalability of the application, as well as minimizing hosting costs by reducing the processing resources required to achieve a specific level of performance and availability. + +## How and why a CDN is used +Typical uses for a CDN include: + +* Delivering static resources for client applications, often from a website. These resources can be images, style sheets, documents, files, client-side scripts, HTML pages, HTML fragments, or any other content that the server does not need to modify for each request. The application can create items at runtime and make them available to the CDN (for example, by creating a list of current news headlines), but it does not do so for each request. +* Delivering public static and shared content to devices such as mobile phones and tablet computers. The application itself is a web service that offers an API to clients running on the various devices. The CDN can also deliver static datasets (via the web service) for the clients to use, perhaps to generate the client UI. For example, the CDN could be used to distribute JSON or XML documents. +* Serving entire websites that consist of only public static content to clients, without requiring any dedicated compute resources. +* Streaming video files to the client on demand. Video benefits from the low latency and reliable connectivity available from the globally located datacenters that offer CDN connections. +* Generally improving the experience for users, especially those located far from the datacenter hosting the application. These users might otherwise suffer higher latency. A large proportion of the total size of the content in a web application is often static, and using the CDN can help to maintain performance and overall user experience while eliminating the requirement to deploy the application to multiple data centers. +* Handling the growing load on applications that support IoT (Internet of Things) solutions. The huge numbers of such devices and appliances involved could easily overwhelm an application if it was required to process broadcast messages and manage firmware update distribution directly to each device. +* Coping with peaks and surges in demand without requiring the application to scale, avoiding the consequent increased running costs. For example, when an update to an operating system is released for a hardware device such as a specific model of router, or for a consumer device such as a smart TV, there will be a huge peak in demand as it is downloaded by millions of users and devices over a short period. + +The following list shows examples of the median time to first byte from various geographic locations. The target web role is deployed to Azure West US. There is a strong correlation between greater boost due to the CDN and proximity to a CDN node. A complete list of Azure CDN node locations is available at [Azure Content Delivery Network (CDN) Node Locations](/azure/cdn/cdn-pop-locations/). + +| | Time (ms) to First Byte (Origin) | Time (ms) to First (CDN) | %CDN time improvement | +| --- | --- | --- | --- | +| \*San Jose, CA |47.5 |46.5 |2 % | +| \*\*Dulles, VA |109 |40.5 |169% | +| Buenos Aires, AR |210 |151 |39% | +| \*London, UK |195 |44 |343% | +| Shanghai, CN |242 |206 |17% | +| \*Singapore |214 |74 |189 % | +| \*Tokyo, JP |163 |48 |204 % | +| Seoul, KR |190 |190 |0% | + +\* Has an Azure CDN node in the same city. +\*\* Has an Azure CDN node in a neighboring city. + +## Challenges +There are several challenges to take into account when planning to use the CDN: + +* **Deployment**. You must decide the origin from which the CDN will fetch the content, and whether you need to deploy the content in more than one storage system (such as in the CDN and an alternative location). + + Your application deployment mechanism must take into account the process for deploying static content and resources as well as deploying the application files, such as ASPX pages. For example, you may need to implement a separate step to load content into Azure blob storage. +* **Versioning and cache-control**. You must consider how you will update static content and deploy new versions. The CDN content may be [purged](/azure/cdn/cdn-purge-endpoint/) using the Azure Portal when new versions of your assets are available. This is a similar challenge to managing client side caching, such as that which occurs in a web browser. +* **Testing**. It can be difficult to perform local testing of your CDN settings when developing and testing an application locally or in a staging environment. +* **Search engine optimisation (SEO)**. Content such as images and documents are served from a different domain when you use the CDN. This can have an effect on SEO for this content. +* **Content security**. Many CDN services such as Azure CDN do not currently offer any type of access control for the content. +* **Client security**. Clients might connect from an environment that does not allow access to resources on the CDN. This could be a security-constrained environment that limits access to only a set of known sources, or one that prevents loading of resources from anything other than the page origin. A fallback implementation is required to handle these cases. +* **Resilience**. The CDN is a potential single point of failure for an application. It has a lower availability SLA than blob storage (which can be used to deliver content directly) so you may need to consider implementing a fallback mechanism for critical content. + + You can monitor your CDN content availability, bandwidth, data transferred, hits, cache hit ratio and cache metrics from the Azure Portal in [real-time](/azure/cdn/cdn-real-time-stats/) and [aggregate reports](/azure/cdn/cdn-analyze-usage-patterns/). + +Scenarios where CDN may be less useful include: + +* If the content has a low hit rate it might be accessed only few times while it is valid (determined by its time-to-live setting). The first time an item is downloaded you incur two transaction charges from the origin to the CDN, and then from the CDN to the customer. +* If the data is private, such as for large enterprises or supply chain ecosystems. + +## General guidelines and good practices +Using the CDN is a good way to minimize the load on your application, and maximize availability and performance. You should consider adopting this strategy for all of the appropriate content and resources you application uses. Consider the points in the following sections when designing your strategy to use the CDN: + +### Origin +Deploying content through the CDN simply requires you to specify an HTTP and/or HTTPS endpoint that the CDN service will use to access and cache the content. + +The endpoint can specify an Azure blob storage container that holds the static content you want to deliver through the CDN. The container must be marked as public. Only blobs in a public container that have public read access will be available through the CDN. + +The endpoint can specify a folder named **cdn** in the root of one of application’s compute layers (such as a web role or a virtual machine). The results from requests for resources, including dynamic resources such as ASPX pages, will be cached on the CDN. The minimum cache period is 300 seconds. Any shorter period will prevent the content from being deployed to the CDN (see the heading *Cache control* below for more information). + +If you are using Azure Web Apps, the endpoint is set to the root folder of the site by selecting the site when creating the CDN instance. All of the content for the site will be available through the CDN. + +In most cases, pointing your CDN endpoint at a folder within one of the compute layers of your application will offer more flexibility and control. For instance, it makes it easier to manage current and future routing requirements, and dynamically generate static content such as image thumbnails. + +You can use [query strings](/azure/cdn/cdn-query-string/) to differentiate objects in the cache when content is delivered from dynamic sources, such as ASPX pages. However, this behavior can be disabled by a setting in the Azure Portal when you specify the CDN endpoint. When delivering content from blob storage, query strings are treated as string literals so two items that have the same name but different query strings will be stored as separate items on the CDN. + +You can utilize URL rewriting for resources, such as scripts and other content, to avoid moving your files to the CDN origin folder. + +When using Azure storage blobs to hold content for the CDN, the URL of the resources in blobs is case sensitive for the container and blob name. + +When using custom origins or Azure Web Apps, you specify the path to the CDN instance in the links to resources. For example, the following specifies an image file in the **Images** folder of the site that will be delivered through the CDN: + +```XML + +``` + +### Deployment +Static content may need to be provisioned and deployed independently from the application if you do not include it in the application deployment package or process. Consider how this will affect the versioning approach you use to manage both the application components and the static resource content. + +Consider how bundling (combining several files into one file) and minification (removing unnecessary characters such as white space, new line characters, comments, and other characters) for script and CSS files will be handled. These are commonly used techniques that can reduce load times for clients, and are compatible with delivering content through the CDN. For more information, see [Bundling and Minification](http://www.asp.net/mvc/tutorials/mvc-4/bundling-and-minification). + +If you need to deploy the content to an additional location, this will be an extra step in the deployment process. If the application updates the content for the CDN, perhaps at regular intervals or in response to an event, it must store the updated content in any additional locations as well as the endpoint for the CDN. + +You cannot set up a CDN endpoint for an application in the local Azure emulator in Visual Studio. This restriction will affect unit testing, functional testing, and final pre-deployment testing. You must allow for this by implementing an alternative mechanism. For example, you could pre-deploy the content to the CDN using a custom application or utility, and perform testing during the period in which it is cached. Alternatively, use compile directives or global constants to control from where the application loads the resources. For example, when running in debug mode it could load resources such as client-side script bundles and other content from a local folder, and use the CDN when running in release mode. + +Consider which compression approach you want your CDN to support: + +* You can [enable compression](/azure/cdn/cdn-improve-performance/) on your origin server, in which case the CDN will support compression by default and deliver compressed content to clients in a format such as zip or gzip. When using an application folder as the CDN endpoint, the server may compress some content automatically in the same way as when delivering it directly to a web browser or other type of client. The format depends on the value of the **Accept-Encoding** header in the request sent by the client. In Azure the default mechanism is to automatically compress content when CPU utilization is below 50%. If you are using a cloud service to host the application, changing the settings may require using a startup task to turn on compression of dynamic output in IIS. See [Enabling gzip compression with Microsoft Azure CDN through a Web Role](http://blogs.msdn.com/b/avkashchauhan/archive/2012/03/05/enableing-gzip-compression-with-windows-azure-cdn-through-web-role.aspx) for more information. +* You can enable compression directly on CDN edge servers, in which case the CDN will compress the files and serve it to end users. For more information, see [Azure CDN Compression](/azure/cdn/cdn-improve-performance/). + +### Routing and versioning +You may need to use different CDN instances at various times. For example, when you deploy a new version of the application you may want to use a new CDN and retain the old CDN (holding content in an older format) for previous versions. If you use Azure blob storage as the content origin, you can simply create a separate storage account or a separate container and point the CDN endpoint to it. If you use the *cdn* root folder within the application as the CDN endpoint you can use URL rewriting techniques to direct requests to a different folder. + +Do not use the query string to denote different versions of the application in links to resources on the CDN because, when retrieving content from Azure blob storage, the query string is part of the resource name (the blob name). This approach can also affect how the client caches resources. + +Deploying new versions of static content when you update an application can be a challenge if the previous resources are cached on the CDN. For more information, see the section *Cache control*). + +Consider restricting the CDN content access by country. Azure CDN allows you to filter requests based on the country of origin and restrict the content delivered. For more information, see [Restrict access to your content by country](/azure/cdn/cdn-restrict-access-by-country/). + +### Cache control +Consider how to manage caching within the system. For example, when using a folder as the CDN origin you can specify the cacheability of pages that generate the content, and the content expiry time for all the resources in a specific folder. You can also specify cache properties for the CDN, and for the client using standard HTTP headers. Although you should already be managing caching on the server and client, using the CDN will help to make you more aware of how your content is cached, and where. + +To prevent objects from being available on the CDN you can delete them from the origin (blob container or application *cdn* root folder), remove or delete the CDN endpoint, or, in the case of blob storage, make the container or blob private. However, items will be removed from the CDN only when their time-to-live expires. If no cache expiry period is specified (such as when content is loaded from blob storage), it will be cached on the CDN for up to 7 days. You can also manually [purge a CDN endpoint](/azure/cdn/cdn-purge-endpoint/). + +In a web application, you can set the caching and expiry for all content by using the *clientCache* element in the *system.webServer/staticContent* section of the web.config file. Remember that when you place a web.config file in a folder it affects the files in that folder and all subfolders. + +If you create the content for the CDN dynamically (in your application code for example), ensure that you specify the *Cache.SetExpires* property on each page. The CDN will not cache the output from pages that use the default cacheability setting of *public*. Set the cache expiry period to a suitable value to ensure that the content is not discarded and reloaded from the application at very short intervals. + +### Security +The CDN can deliver content over HTTPS (SSL) using the certificate provided by the CDN, but it will also be available over HTTP as well. You cannot block HTTP access to items in the CDN. You may need to use HTTPS to request static content that is displayed in pages loaded through HTTPS (such as a shopping cart) to avoid browser warnings about mixed content. + +The Azure CDN does not provide any facilities for access control to secure access to content. You cannot use Shared Access Signatures (SAS) with the CDN. + +If you deliver client-side scripts using the CDN, you may encounter issues if these scripts use an *XMLHttpRequest* call to make HTTP requests for other resources such as data, images, or fonts in a different domain. Many web browsers prevent cross-origin resource sharing (CORS) unless the web server is configured to set the appropriate response headers. You can configure the CDN to support CORS: + +* If the origin from which you are delivering content is Azure blob storage, you can add a *CorsRule* to the service properties. The rule can specify the allowed origins for CORS requests, the allowed methods such as GET, and the maximum age in seconds for the rule (the period within which the client must request the linked resources after loading the original content). For more information, see [Cross-Origin Resource Sharing (CORS) Support for the Azure Storage Services](http://msdn.microsoft.com/library/azure/dn535601.aspx). +* If the origin from which you are delivering content is a folder within the application, such as the *cdn* root folder, you can configure outbound rules in the application configuration file to set an *Access-Control-Allow-Origin* header on all responses. For more information about using rewrite rules, see [URL Rewrite Module](http://www.iis.net/learn/extensions/url-rewrite-module). + +### Custom domains +The Azure CDN allows you to specify a [custom domain name](/azure/cdn/cdn-map-content-to-custom-domain/) and use it to access resources through the CDN. You can also set up a custom subdomain name using a *CNAME* record in your DNS. Using this approach can provide an additional layer of abstraction and control. + +If you use a *CNAME*, you cannot use SSL because the CDN uses its own single SSL certificate, and this certificate will not match your custom domain/subdomain names. + +### CDN fallback +You should consider how your application will cope with a failure or temporary unavailability of the CDN. Client applications may be able to use copies of the resources that were cached locally (on the client) during previous requests, or you can include code that detects failure and instead requests resources from the origin (the application folder or Azure blob container that holds the resources) if the CDN is unavailable. + +### Search engine optimisation +If SEO is an important consideration in your application, perform the following tasks: + +* Include a *Rel* canonical header in each page or resource. +* Use a *CNAME* subdomain record and access the resources using this name. +* Consider the impact of the fact that the IP address of the CDN may be a country or region that differs from that of the application itself. +* When using Azure blob storage as the origin, maintain the same file structure for resources on the CDN as in the application folders. + +### Monitoring and logging +Include the CDN as part of your application monitoring strategy to detect and measure failures or extended latency occurrences. Monitoring is available from the CDN profile manager located on the Azure portal site + +Enable logging for the CDN and monitor this log as part of your daily operations. + +Consider analyzing the CDN traffic for usage patterns. The Azure portal provides tools that enable you to monitor: + +* Bandwidth, +* Data Transferred, +* Hits (status codes), +* Cache Status, +* Cache HIT Ratio, and +* Ratio of IPV4/IPV6 requests. + +For more information, see [Analyze CDN usage patterns](/azure/cdn/cdn-analyze-usage-patterns/). + +### Cost implications +You are charged for outbound data transfers from the CDN. Additionally, if you're using blob storage to host your assets, you are charged for storage transactions when the CDN loads data from your application. You should set realistic cache expiry periods for content to ensure freshness, but not so short as to cause repeated reloading of content from the application or blob storage to the CDN. + +Items that are rarely downloaded will incur the two transaction charges without providing any significant reduction in server load. + +### Bundling and minification +Use bundling and minification to reduce the size of resources such as JavaScript code and HTML pages stored in the CDN. This strategy can help to reduce the time taken to download these items to the client. + +Bundling and minification can be handled by ASP.NET. In an MVC project, you define your bundles in *BundleConfig.cs*. A reference to the minified script bundle is created by calling the *Script.Render* method, typically in code in the view class. This reference contains a query string that includes a hash, which is based on the content of the bundle. If the bundle contents change, the generated hash will also change. + +By default, Azure CDN instances have the *Query String Status* setting disabled. In order for updated script bundles to be handled properly by the CDN, you must enable the *Query String Status* setting for the CDN instance. Note that it may take an hour or more before the setting takes effect. + +## Example code +This section contains some examples of code and techniques for working with the CDN. + +### URL rewriting +The following excerpt from a Web.config file in the root of a Cloud Services hosted application demonstrates how to perform [URL rewriting](https://technet.microsoft.com/library/ee215194.aspx) when using the CDN. Requests from the CDN for content that is cached are redirected to specific folders within the application root based on the type of the resource (such as scripts and images). + +```XML + + ... + + + + + + + + + + + + + + + + + + + + + + + + + ... + +``` + +These rewrite rules perform the following redirections: + +* The first rule allows you to embed a version in the file name of a resource, which is then ignored. For example, *Filename_v123.jpg *is rewritten as *Filename.jpg*. +* The next four rules show how to redirect requests if you do not want to store the resources in a folder named *cdn** in the root of the web role. The rules map the *cdn/Images*, *cdn/Content*, *cdn/Scripts*, and *cdn/bundles* URLs to their respective root folders in the web role. + +Note that using URL rewriting requires you to make some changes to the bundling of resources. + +## More information +* [Azure CDN](https://azure.microsoft.com/services/cdn/) +* [Azure Content Delievery Network (CDN) Documentation](https://azure.microsoft.com/documentation/services/cdn/) +* [Using Azure CDN](/azure/cdn/cdn-create-new-endpoint/) +* [Integrate a cloud service with Azure CDN](/azure/cdn/cdn-cloud-service-with-cdn/) +* [Best Practices for the Microsoft Azure Content Delivery Network](https://azure.microsoft.com/blog/2011/03/18/best-practices-for-the-windows-azure-content-delivery-network/) diff --git a/docs/best-practices/data-partitioning.md b/docs/best-practices/data-partitioning.md new file mode 100644 index 00000000000..148e25cb66d --- /dev/null +++ b/docs/best-practices/data-partitioning.md @@ -0,0 +1,585 @@ +--- +title: Data partitioning guidance +description: Guidance for how to separate partitions to be managed and accessed separately. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: 401559b5-f25f-4010-9811-5743fbb72aeb +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/14/2016 +ms.author: masashin + +--- +# Data partitioning +[!INCLUDE [header](../_includes/header.md)] + +In many large-scale solutions, data is divided into separate partitions that can be managed and accessed separately. The partitioning strategy must be chosen carefully to maximize the benefits while minimizing adverse effects. Partitioning can help improve scalability, reduce contention, and optimize performance. Another benefit of partitioning is that it can provide a mechanism for dividing data by the pattern of use. For example, you can archive older, less active (cold) data in cheaper data storage. + +## Why partition data? +Most cloud applications and services store and retrieve data as part of their operations. The design of the data stores that an application uses can have a significant bearing on the performance, throughput, and scalability of a system. One technique that is commonly applied in large-scale systems is to divide the data into separate partitions. + +> The term *partitioning* that's used in this guidance refers to the process of physically dividing data into separate data stores. This is not the same as SQL Server table partitioning, which is a different concept. +> +> + +Partitioning data can offer a number of benefits. For example, it can be applied in order to: + +* **Improve scalability**. When you scale up a single database system, it will eventually reach a physical hardware limit. If you divide data across multiple partitions, each of which is hosted on a separate server, you can scale out the system almost indefinitely. +* **Improve performance**. Data access operations on each partition take place over a smaller volume of data. Provided that the data is partitioned in a suitable way, partitioning can make your system more efficient. Operations that affect more than one partition can run in parallel. Each partition can be located near the application that uses it to minimize network latency. +* **Improve availability**. Separating data across multiple servers avoids a single point of failure. If a server fails, or is undergoing planned maintenance, only the data in that partition is unavailable. Operations on other partitions can continue. Increasing the number of partitions reduces the relative impact of a single server failure by reducing the percentage of data that will be unavailable. Replicating each partition can further reduce the chance of a single partition failure affecting operations. It also makes it possible to separate critical data that must be continually and highly available from low-value data that has lower availability requirements (log files, for example). +* **Improve security**. Depending on the nature of the data and how it is partitioned, it might be possible to separate sensitive and non-sensitive data into different partitions, and therefore into different servers or data stores. Security can then be specifically optimized for the sensitive data. +* **Provide operational flexibility**. Partitioning offers many opportunities for fine tuning operations, maximizing administrative efficiency, and minimizing cost. For example, you can define different strategies for management, monitoring, backup and restore, and other administrative tasks based on the importance of the data in each partition. +* **Match the data store to the pattern of use**. Partitioning allows each partition to be deployed on a different type of data store, based on cost and the built-in features that data store offers. For example, large binary data can be stored in a blob data store, while more structured data can be held in a document database. For more information, see [Building a polyglot solution] in the patterns & practices guide and [Data access for highly-scalable solutions: Using SQL, NoSQL, and polyglot persistence] on the Microsoft website. + +Some systems do not implement partitioning because it is considered a cost rather than an advantage. Common reasons for this rationale include: + +* Many data storage systems do not support joins across partitions, and it can be difficult to maintain referential integrity in a partitioned system. It is frequently necessary to implement joins and integrity checks in application code (in the partitioning layer), which can result in additional I/O and application complexity. +* Maintaining partitions is not always a trivial task. In a system where the data is volatile, you might need to rebalance partitions periodically to reduce contention and hot spots. +* Some common tools do not work naturally with partitioned data. + +## Designing partitions +Data can be partitioned in different ways: horizontally, vertically, or functionally. The strategy you choose depends on the reason for partitioning the data, and the requirements of the applications and services that will use the data. + +> [!NOTE] +> The partitioning schemes described in this guidance are explained in a way that is independent of the underlying data storage technology. They can be applied to many types of data stores, including relational and NoSQL databases. +> +> + +### Partitioning strategies +The three typical strategies for partitioning data are: + +* **Horizontal partitioning** (often called *sharding*). In this strategy, each partition is a data store in its own right, but all partitions have the same schema. Each partition is known as a *shard* and holds a specific subset of the data, such as all the orders for a specific set of customers in an e-commerce application. +* **Vertical partitioning**. In this strategy, each partition holds a subset of the fields for items in the data store. The fields are divided according to their pattern of use. For example, frequently accessed fields might be placed in one vertical partition and less frequently accessed fields in another. +* **Functional partitioning**. In this strategy, data is aggregated according to how it is used by each bounded context in the system. For example, an e-commerce system that implements separate business functions for invoicing and managing product inventory might store invoice data in one partition and product inventory data in another. + +It’s important to note that the three strategies described here can be combined. They are not mutually exclusive, and we recommend that you consider them all when you design a partitioning scheme. For example, you might divide data into shards and then use vertical partitioning to further subdivide the data in each shard. Similarly, the data in a functional partition can be split into shards (which can also be vertically partitioned). + +However, the differing requirements of each strategy can raise a number of conflicting issues. You must evaluate and balance all of these when designing a partitioning scheme that meets the overall data processing performance targets for your system. The following sections explore each of the strategies in more detail. + +### Horizontal partitioning (sharding) +Figure 1 shows an overview of horizontal partitioning or sharding. In this example, product inventory data is divided into shards based on the product key. Each shard holds the data for a contiguous range of shard keys (A-G and H-Z), organized alphabetically. + +![Horizontally partitioning (sharding) data based on a partition key](./images/data-partitioning/DataPartitioning01.png) + +*Figure 1. Horizontally partitioning (sharding) data based on a partition key* + +Sharding helps you spread the load over more computers, which reduces contention and improves performance. You can scale the system out by adding further shards that run on additional servers. + +The most important factor when implementing this partitioning strategy is the choice of sharding key. It can be difficult to change the key after the system is in operation. The key must ensure that data is partitioned so that the workload is as even as possible across the shards. + +Note that different shards do not have to contain similar volumes of data. Rather, the more important consideration is to balance the number of requests. Some shards might be very large, but each item is the subject of a low number of access operations. Other shards might be smaller, but each item is accessed much more frequently. It is also important to ensure that a single shard does not exceed the scale limits (in terms of capacity and processing resources) of the data store that's being used to host that shard. + +If you use a sharding scheme, avoid creating hotspots (or hot partitions) that can affect performance and availability. For example, if you use a hash of a customer identifier instead of the first letter of a customer’s name, you prevent the unbalanced distribution that results from common and less common initial letters. This is a typical technique that helps distribute data more evenly across partitions. + +Choose a sharding key that minimizes any future requirements to split large shards into smaller pieces, coalesce small shards into larger partitions, or change the schema that describes the data stored in a set of partitions. These operations can be very time consuming, and might require taking one or more shards offline while they are performed. + +If shards are replicated, it might be possible to keep some of the replicas online while others are split, merged, or reconfigured. However, the system might need to limit the operations that can be performed on the data in these shards while the reconfiguration is taking place. For example, the data in the replicas can be marked as read-only to limit the scope of inconsistences that might occur while shards are being restructured. + +> For more detailed information and guidance about many of these considerations, and good practice techniques for designing data stores that implement horizontal partitioning, see [Sharding pattern]. +> +> + +### Vertical partitioning +The most common use for vertical partitioning is to reduce the I/O and performance costs associated with fetching the items that are accessed most frequently. Figure 2 shows an example of vertical partitioning. In this example, different properties for each data item are held in different partitions. One partition holds data that is accessed more frequently, including the name, description, and price information for products. Another holds the volume in stock and the last ordered date. + +![Vertically partitioning data by its pattern of use](./images/data-partitioning/DataPartitioning02.png) + +*Figure 2. Vertically partitioning data by its pattern of use* + +In this example, the application regularly queries the product name, description, and price when displaying the product details to customers. The stock level and date when the product was last ordered from the manufacturer are held in a separate partition because these two items are commonly used together. + +This partitioning scheme has the added advantage that the relatively slow-moving data (product name, description, and price) is separated from the more dynamic data (stock level and last ordered date). An application might find it beneficial to cache the slow-moving data in memory if it is frequently accessed. + +Another typical scenario for this partitioning strategy is to maximize the security of sensitive data. For example, you can do this by storing credit card numbers and the corresponding card security verification numbers in separate partitions. + +Vertical partitioning can also reduce the amount of concurrent access that's needed to the data. + +> Vertical partitioning operates at the entity level within a data store, partially normalizing an entity to break it down from a *wide* item to a set of *narrow* items. It is ideally suited for column-oriented data stores such as HBase and Cassandra. If the data in a collection of columns is unlikely to change, you can also consider using column stores in SQL Server. +> +> + +### Functional partitioning +For systems where it is possible to identify a bounded context for each distinct business area or service in the application, functional partitioning provides a technique for improving isolation and data access performance. Another common use of functional partitioning is to separate read-write data from read-only data that's used for reporting purposes. Figure 3 shows an overview of functional partitioning where inventory data is separated from customer data. + +![Functionally partitioning data by bounded context or subdomain](./images/data-partitioning/DataPartitioning03.png) + +*Figure 3. Functionally partitioning data by bounded context or subdomain* + +This partitioning strategy can help reduce data access contention across different parts of a system. + +## Designing partitions for scalability +It's vital to consider size and workload for each partition and balance them so that data is distributed to achieve maximum scalability. However, you must also partition the data so that it does not exceed the scaling limits of a single partition store. + +Follow these steps when designing partitions for scalability: + +1. Analyze the application to understand the data access patterns, such as the size of the result set returned by each query, the frequency of access, the inherent latency, and the server-side compute processing requirements. In many cases, a few major entities will demand most of the processing resources. +2. Use this analysis to determine the current and future scalability targets, such as data size and workload. Then distribute the data across the partitions to meet the scalability target. In the horizontal partitioning strategy, choosing the appropriate shard key is important to make sure distribution is even. For more information, see the [Sharding pattern]. +3. Make sure that the resources available to each partition are sufficient to handle the scalability requirements in terms of data size and throughput. For example, the node that's hosting a partition might impose a hard limit on the amount of storage space, processing power, or network bandwidth that it provides. If the data storage and processing requirements are likely to exceed these limits, it might be necessary to refine your partitioning strategy or split data out further. For example, one scalability approach might be to separate logging data from the core application features. You do this by using separate data stores to prevent the total data storage requirements from exceeding the scaling limit of the node. If the total number of data stores exceeds the node limit, it might be necessary to use separate storage nodes. +4. Monitor the system under use to verify that the data is distributed as expected and that the partitions can handle the load that is imposed on them. It's possible that the usage does not match the usage that's anticipated by the analysis. In that case, it might be possible to rebalance the partitions. Failing that, it might be necessary to redesign some parts of the system to gain the required balance. + +Note that some cloud environments allocate resources in terms of infrastructure boundaries. Ensure that the limits of your selected boundary provide enough room for any anticipated growth in the volume of data, in terms of data storage, processing power, and bandwidth. + +For example, if you use Azure table storage, a busy shard might require more resources than are available to a single partition to handle requests. (There is a limit to the volume of requests that can be handled by a single partition in a particular period of time. See the page [Azure storage scalability and performance targets] on the Microsoft website for more details.) + + If this is the case, the shard might need to be repartitioned to spread the load. If the total size or throughput of these tables exceeds the capacity of a storage account, it might be necessary to create additional storage accounts and spread the tables across these accounts. If the number of storage accounts exceeds the number of accounts that are available to a subscription, then it might be necessary to use multiple subscriptions. + +## Designing partitions for query performance +Query performance can often be boosted by using smaller data sets and by running parallel queries. Each partition should contain a small proportion of the entire data set. This reduction in volume can improve the performance of queries. However, partitioning is not an alternative for designing and configuring a database appropriately. For example, make sure that you have the necessary indexes in place if you are using a relational database. + +Follow these steps when designing partitions for query performance: + +1. Examine the application requirements and performance: + * Use the business requirements to determine the critical queries that must always perform quickly. + * Monitor the system to identify any queries that perform slowly. + * Establish which queries are performed most frequently. A single instance of each query might have minimal cost, but the cumulative consumption of resources could be significant. It might be beneficial to separate the data that's retrieved by these queries into a distinct partition, or even a cache. +2. Partition the data that is causing slow performance: + * Limit the size of each partition so that the query response time is within target. + * Design the shard key so that the application can easily find the partition if you are implementing horizontal partitioning. This prevents the query from having to scan through every partition. + * Consider the location of a partition. If possible, try to keep data in partitions that are geographically close to the applications and users that access it. +3. If an entity has throughput and query performance requirements, use functional partitioning based on that entity. If this still doesn't satisfy the requirements, apply horizontal partitioning as well. In most cases a single partitioning strategy will suffice, but in some cases it is more efficient to combine both strategies. +4. Consider using asynchronous queries that run in parallel across partitions to improve performance. + +## Designing partitions for availability +Partitioning data can improve the availability of applications by ensuring that the entire dataset does not constitute a single point of failure and that individual subsets of the dataset can be managed independently. Replicating partitions that contain critical data can also improve availability. + +When designing and implementing partitions, consider the following factors that affect availability: + +* **How critical the data is to business operations**. Some data might include critical business information such as invoice details or bank transactions. Other data might include less critical operational data, such as log files, performance traces, and so on. After identifying each type of data, consider: + * Storing critical data in highly-available partitions with an appropriate backup plan. + * Establishing separate management and monitoring mechanisms or procedures for the different criticalities of each dataset. Place data that has the same level of criticality in the same partition so that it can be backed up together at an appropriate frequency. For example, partitions that hold data for bank transactions might need to be backed up more frequently than partitions that hold logging or trace information. +* **How individual partitions can be managed**. Designing partitions to support independent management and maintenance provides several advantages. For example: + * If a partition fails, it can be recovered independently without affecting instances of applications that access data in other partitions. + * Partitioning data by geographical area allows scheduled maintenance tasks to occur at off-peak hours for each location. Ensure that partitions are not too big to prevent any planned maintenance from being completed during this period. +* **Whether to replicate critical data across partitions**. This strategy can improve availability and performance, although it can also introduce consistency issues. It takes time for changes made to data in a partition to be synchronized with every replica. During this period, different partitions will contain different data values. + +## Understanding how partitioning affects design and development +Using partitioning adds complexity to the design and development of your system. Consider partitioning as a fundamental part of system design even if the system initially only contains a single partition. If you address partitioning as an afterthought, when the system starts to suffer performance and scalability issues, the complexity increases because you already have a live system to maintain. + +If you update the system to incorporate partitioning in this environment, it necessitates modifying the data access logic. It can also involve migrating large quantities of existing data to distribute it across partitions, often while users expect to be able to continue using the system. + +In some cases, partitioning is not considered important because the initial dataset is small and can be easily handled by a single server. This might be true in a system that is not expected to scale beyond its initial size, but many commercial systems need to expand as the number of users increases. This expansion is typically accompanied by a growth in the volume of data. + +It's also important to understand that partitioning is not always a function of large data stores. For example, a small data store might be heavily accessed by hundreds of concurrent clients. Partitioning the data in this situation can help to reduce contention and improve throughput. + +Consider the following points when you design a data partitioning scheme: + +* **Where possible, keep data for the most common database operations together in each partition to minimize cross-partition data access operations**. Querying across partitions can be more time-consuming than querying only within a single partition, but optimizing partitions for one set of queries might adversely affect other sets of queries. When you can't avoid querying across partitions, minimize query time by running parallel queries and aggregating the results within the application. This approach might not be possible in some cases, such as when it's necessary to obtain a result from one query and use it in the next query. +* **If queries make use of relatively static reference data, such as postal code tables or product lists, consider replicating this data in all of the partitions to reduce the requirement for separate lookup operations in different partitions**. This approach can also reduce the likelihood of the reference data becoming a "hot" dataset that is subject to heavy traffic from across the entire system. However, there is an additional cost associated with synchronizing any changes that might occur to this reference data. +* **Where possible, minimize requirements for referential integrity across vertical and functional partitions**. In these schemes, the application itself is responsible for maintaining referential integrity across partitions when data is updated and consumed. Queries that must join data across multiple partitions run more slowly than queries that join data only within the same partition because the application typically needs to perform consecutive queries based on a key and then on a foreign key. Instead, consider replicating or de-normalizing the relevant data. To minimize the query time where cross-partition joins are necessary, run parallel queries over the partitions and join the data within the application. +* **Consider the effect that the partitioning scheme might have on the data consistency across partitions.** Evaluate whether strong consistency is actually a requirement. Instead, a common approach in the cloud is to implement eventual consistency. The data in each partition is updated separately, and the application logic ensures that the updates are all completed successfully. It also handles the inconsistencies that can arise from querying data while an eventually consistent operation is running. For more information about implementing eventual consistency, see the [Data consistency primer]. +* **Consider how queries locate the correct partition**. If a query must scan all partitions to locate the required data, there is a significant impact on performance, even when multiple parallel queries are running. Queries that are used with vertical and functional partitioning strategies can naturally specify the partitions. However, horizontal partitioning (sharding) can make locating an item difficult because every shard has the same schema. A typical solution for sharding is to maintain a map that can be used to look up the shard location for specific items of data. This map can be implemented in the sharding logic of the application, or maintained by the data store if it supports transparent sharding. +* **When using a horizontal partitioning strategy, consider periodically rebalancing the shards**. This helps distribute the data evenly by size and by workload to minimize hotspots, maximize query performance, and work around physical storage limitations. However, this is a complex task that often requires the use of a custom tool or process. +* **If you replicate each partition, it provides additional protection against failure**. If a single replica fails, queries can be directed towards a working copy. +* **If you reach the physical limits of a partitioning strategy, you might need to extend the scalability to a different level**. For example, if partitioning is at the database level, you might need to locate or replicate partitions in multiple databases. If partitioning is already at the database level, and physical limitations are an issue, it might mean that you need to locate or replicate partitions in multiple hosting accounts. +* **Avoid transactions that access data in multiple partitions**. Some data stores implement transactional consistency and integrity for operations that modify data, but only when the data is located in a single partition. If you need transactional support across multiple partitions, you will probably need to implement this as part of your application logic because most partitioning systems do not provide native support. + +All data stores require some operational management and monitoring activity. The tasks can range from loading data, backing up and restoring data, reorganizing data, and ensuring that the system is performing correctly and efficiently. + +Consider the following factors that affect operational management: + +* **How to implement appropriate management and operational tasks when the data is partitioned**. These tasks might include backup and restore, archiving data, monitoring the system, and other administrative tasks. For example, maintaining logical consistency during backup and restore operations can be a challenge. +* **How to load the data into multiple partitions and add new data that's arriving from other sources**. Some tools and utilities might not support sharded data operations such as loading data into the correct partition. This means that you might have to create or obtain new tools and utilities. +* **How to archive and delete the data on a regular basis**. To prevent the excessive growth of partitions, you need to archive and delete data on a regular basis (perhaps monthly). It might be necessary to transform the data to match a different archive schema. +* **How to locate data integrity issues**. Consider running a periodic process to locate any data integrity issues such as data in one partition that references missing information in another. The process can either attempt to fix these issues automatically or raise an alert to an operator to correct the problems manually. For example, in an e-commerce application, order information might be held in one partition but the line items that constitute each order might be held in another. The process of placing an order needs to add data to other partitions. If this process fails, there might be line items stored for which there is no corresponding order. + +Different data storage technologies typically provide their own features to support partitioning. The following sections summarize the options that are implemented by data stores commonly used by Azure applications. They also describe considerations for designing applications that can best take advantage of these features. + +## Partitioning strategies for Azure SQL Database +Azure SQL Database is a relational database-as-a-service that runs in the cloud. It is based on Microsoft SQL Server. A relational database divides information into tables, and each table holds information about entities as a series of rows. Each row contains columns that hold the data for the individual fields of an entity. The page [What is Azure SQL Database?] on the Microsoft website provides detailed documentation about creating and using SQL databases. + +## Horizontal partitioning with Elastic Database +A single SQL database has a limit to the volume of data that it can contain. Throughput is constrained by architectural factors and the number of concurrent connections that it supports. The Elastic Database feature of SQL Database supports horizontal scaling for a SQL database. Using Elastic Database, you can partition your data into shards that are spread across multiple SQL databases. You can also add or remove shards as the volume of data that you need to handle grows and shrinks. Using Elastic Database can also help reduce contention by distributing the load across databases. + +> [!NOTE] +> Elastic Database is a replacement for the Federations feature of Azure SQL Database. Existing SQL Database Federation installations can be migrated to Elastic Database by using the Federations migration utility. Alternatively, you can implement your own sharding mechanism if your scenario does not lend itself naturally to the features that are provided by Elastic Database. +> +> + +Each shard is implemented as a SQL database. A shard can hold more than one dataset (referred to as a *shardlet*). Each database maintains metadata that describes the shardlets that it contains. A shardlet can be a single data item, or it can be a group of items that share the same shardlet key. For example, if you are sharding data in a multitenant application, the shardlet key can be the tenant ID, and all data for a given tenant can be held as part of the same shardlet. Data for other tenants would be held in different shardlets. + +It is the programmer's responsibility to associate a dataset with a shardlet key. A separate SQL database acts as a global shard map manager. This database contains a list of all the shards and shardlets in the system. A client application that accesses data connects first to the global shard map manager database to obtain a copy of the shard map (listing shards and shardlets), which it then caches locally. + +The application then uses this information to route data requests to the appropriate shard. This functionality is hidden behind a series of APIs that are contained in the Azure SQL Database Elastic Database Client Library, which is available as a NuGet package. The page [Elastic Database features overview] on the Microsoft website provides a more comprehensive introduction to Elastic Database. + +> [!NOTE] +> You can replicate the global shard map manager database to reduce latency and improve availability. If you implement the database by using one of the Premium pricing tiers, you can configure active geo-replication to continuously copy data to databases in different regions. Create a copy of the database in each region in which users are based. Then configure your application to connect to this copy to obtain the shard map. +> +> An alternative approach is to use Azure SQL Data Sync or an Azure Data Factory pipeline to replicate the shard map manager database across regions. This form of replication runs periodically and is more suitable if the shard map changes infrequently. Additionally, the shard map manager database does not have to be created by using a Premium pricing tier. +> +> + +Elastic Database provides two schemes for mapping data to shardlets and storing them in shards: + +* A **list shard map** describes an association between a single key and a shardlet. For example, in a multitenant system, the data for each tenant can be associated with a unique key and stored in its own shardlet. To guarantee privacy and isolation (that is, to prevent one tenant from exhausting the data storage resources available to others), each shardlet can be held within its own shard. + +![Using a list shard map to store tenant data in separate shards](./images/data-partitioning/PointShardlet.png) + +*Figure 4. Using a list shard map to store tenant data in separate shards* + +* A **range shard map** describes an association between a set of contiguous key values and a shardlet. In the multitenant example described previously, as an alternative to implementing dedicated shardlets, you can group the data for a set of tenants (each with their own key) within the same shardlet. This scheme is less expensive than the first (because tenants share data storage resources), but it also creates a risk of reduced data privacy and isolation. + +![Using a range shard map to store data for a range of tenants in a shard](./images/data-partitioning/RangeShardlet.png) + +*Figure 5. Using a range shard map to store data for a range of tenants in a shard* + +Note that a single shard can contain the data for several shardlets. For example, you can use list shardlets to store data for different non-contiguous tenants in the same shard. You can also mix range shardlets and list shardlets in the same shard, although they will be addressed through different maps in the global shard map manager database. (The global shard map manager database can contain multiple shard maps.) Figure 6 depicts this approach. + +![Implementing multiple shard maps](./images/data-partitioning/MultipleShardMaps.png) + +*Figure 6. Implementing multiple shard maps* + +The partitioning scheme that you implement can have a significant bearing on the performance of your system. It can also affect the rate at which shards have to be added or removed, or the rate at which data must be repartitioned across shards. Consider the following points when you use Elastic Database to partition data: + +* Group data that is used together in the same shard, and avoid operations that need to access data that's held in multiple shards. Keep in mind that with Elastic Database, a shard is a SQL database in its own right, and Azure SQL Database does not support cross-database joins (which have to be performed on the client side). Remember also that in Azure SQL Database, referential integrity constraints, triggers, and stored procedures in one database cannot reference objects in another. Therefore, don't design a system that has dependencies between shards. A SQL database can, however, contain tables that hold copies of reference data frequently used by queries and other operations. These tables do not have to belong to any specific shardlet. Replicating this data across shards can help remove the need to join data that spans databases. Ideally, such data should be static or slow-moving to minimize the replication effort and reduce the chances of it becoming stale. + + > [!NOTE] + > Although SQL Database does not support cross-database joins, you can perform cross-shard queries with the Elastic Database API. These queries can transparently iterate through the data held in all the shardlets that are referenced by a shard map. The Elastic Database API breaks cross-shard queries down into a series of individual queries (one for each database) and then merges the results. For more information, see the page [Multi-shard querying] on the Microsoft website. + > + > +* The data stored in shardlets that belong to the same shard map should have the same schema. For example, don't create a list shard map that points to some shardlets containing tenant data and other shardlets containing product information. This rule is not enforced by Elastic Database, but data management and querying becomes very complex if each shardlet has a different schema. In the example just cited, a good is solution is to create two list shard maps: one that references tenant data and another that points to product information. Remember that the data belonging to different shardlets can be stored in the same shard. + + > [!NOTE] + > The cross-shard query functionality of the Elastic Database API depends on each shardlet in the shard map containing the same schema. + > + > +* Transactional operations are only supported for data that's held within the same shard, and not across shards. Transactions can span shardlets as long as they are part of the same shard. Therefore, if your business logic needs to perform transactions, either store the affected data in the same shard or implement eventual consistency. For more information, see the [Data consistency primer]. +* Place shards close to the users that access the data in those shards (in other words, geo-locate the shards). This strategy helps reduce latency. +* Avoid having a mixture of highly active (hotspots) and relatively inactive shards. Try to spread the load evenly across shards. This might require hashing the shardlet keys. +* If you are geo-locating shards, make sure that the hashed keys map to shardlets held in shards stored close to the users that access that data. +* Currently, only a limited set of SQL data types are supported as shardlet keys; *int, bigint, varbinary,* and *uniqueidentifier*. The SQL *int* and *bigint* types correspond to the *int* and *long* data types in C#, and have the same ranges. The SQL *varbinary* type can be handled by using a *Byte* array in C#, and the SQL *uniqueidentier* type corresponds to the *Guid* class in the .NET Framework. + +As the name implies, Elastic Database makes it possible for a system to add and remove shards as the volume of data shrinks and grows. The APIs in the Azure SQL Database Elastic Database client library enable an application to create and delete shards dynamically (and transparently update the shard map manager). However, removing a shard is a destructive operation that also requires deleting all the data in that shard. + +If an application needs to split a shard into two separate shards or combine shards, Elastic Database provides a separate split-merge service. This service runs in a cloud-hosted service (which must be created by the developer) and migrates data safely between shards. For more information, see the topic [Scaling using the Elastic Database split-merge tool] on the Microsoft website. + +## Partitioning strategies for Azure Storage +Azure storage provides three abstractions for managing data: + +* Table storage, which implements scalable structure storage. A table contains a collection of entities, each of which can include a set of properties and values. +* Blob storage, which supplies storage for large objects and files. +* Storage queues, which support reliable asynchronous messaging between applications. + +Table storage and blob storage are essentially key-value stores that are optimized to hold structured and unstructured data respectively. Storage queues provide a mechanism for building loosely coupled, scalable applications. Table storage, blob storage, and storage queues are created within the context of an Azure storage account. Storage accounts support three forms of redundancy: + +* **Locally redundant storage**, which maintains three copies of data within a single datacenter. This form of redundancy protects against hardware failure but not against a disaster that encompasses the entire datacenter. +* **Zone-redundant storage**, which maintains three copies of data spread across different datacenters within the same region (or across two geographically close regions). This form of redundancy can protect against disasters that occur within a single datacenter, but cannot protect against large-scale network disconnects that affect an entire region. Note that zone-redundant storage is currently only currently available for block blobs. +* **Geo-redundant storage**, which maintains six copies of data: three copies in one region (your local region), and another three copies in a remote region. This form of redundancy provides the highest level of disaster protection. + +Microsoft has published scalability targets for Azure Storage. For more information, see the page [Azure Storage scalability and performance targets] on the Microsoft website. Currently, the total storage account capacity cannot exceed 500 TB. (This includes the size of data that's held in table storage and blob storage, as well as outstanding messages that are held in storage queue). + +The maximum request rate (assuming a 1-KB entity, blob, or message size) is 20 KBps. If your system is likely to exceed these limits, consider partitioning the load across multiple storage accounts. A single Azure subscription can create up to 100 storage accounts. However, note that these limits might change over time. + +## Partitioning Azure table storage +Azure table storage is a key-value store that's designed around partitioning. All entities are stored in a partition, and partitions are managed internally by Azure table storage. Each entity that's stored in a table must provide a two-part key that includes: + +* **The partition key**. This is a string value that determines in which partition Azure table storage will place the entity. All entities with the same partition key will be stored in the same partition. +* **The row key**. This is another string value that identifies the entity within the partition. All entities within a partition are sorted lexically, in ascending order, by this key. The partition key/row key combination must be unique for each entity and cannot exceed 1 KB in length. + +The remainder of the data for an entity consists of application-defined fields. No particular schemas are enforced, and each row can contain a different set of application-defined fields. The only limitation is that the maximum size of an entity (including the partition and row keys) is currently 1 MB. The maximum size of a table is 200 TB, although these figures might change in the future. (Check the page [Azure Storage scalability and performance targets] on the Microsoft website for the most recent information about these limits.) + +If you are attempting to store entities that exceed this capacity, then consider splitting them into multiple tables. Use vertical partitioning to divide the fields into the groups that are most likely to be accessed together. + +Figure 7 shows the logical structure of an example storage account (Contoso Data) for a fictitious e-commerce application. The storage account contains three tables: Customer Info, Product Info, and Order Info. Each table has multiple partitions. + +In the Customer Info table, the data is partitioned according to the city in which the customer is located, and the row key contains the customer ID. In the Product Info table, the products are partitioned by product category, and the row key contains the product number. In the Order Info table, the orders are partitioned by the date on which they were placed, and the row key specifies the time the order was received. Note that all data is ordered by the row key in each partition. + +![The tables and partitions in an example storage account](./images/data-partitioning/TableStorage.png) + +*Figure 7. The tables and partitions in an example storage account* + +> [!NOTE] +> Azure table storage also adds a timestamp field to each entity. The timestamp field is maintained by table storage and is updated each time the entity is modified and written back to a partition. The table storage service uses this field to implement optimistic concurrency. (Each time an application writes an entity back to table storage, the table storage service compares the value of the timestamp in the entity that's being written with the value that's held in table storage. If the values are different, it means that another application must have modified the entity since it was last retrieved, and the write operation fails. Don't modify this field in your own code, and don't specify a value for this field when you create a new entity. +> +> + +Azure table storage uses the partition key to determine how to store the data. If an entity is added to a table with a previously unused partition key, Azure table storage creates a new partition for this entity. Other entities with the same partition key will be stored in the same partition. + +This mechanism effectively implements an automatic scale-out strategy. Each partition is stored on a single server in an Azure datacenter to help ensure that queries that retrieve data from a single partition run quickly. However, different partitions can be distributed across multiple servers. Additionally, a single server can host multiple partitions if these partitions are limited in size. + +Consider the following points when you design your entities for Azure table storage: + +* The selection of partition key and row key values should be driven by the way in which the data is accessed. Choose a partition key/row key combination that supports the majority of your queries. The most efficient queries retrieve data by specifying the partition key and the row key. Queries that specify a partition key and a range of row keys can be completed by scanning a single partition. This is relatively fast because the data is held in row key order. If queries don't specify which partition to scan, the partition key might require Azure table storage to scan every partition for your data. + + > [!TIP] + > If an entity has one natural key, then use it as the partition key and specify an empty string as the row key. If an entity has a composite key comprising two properties, select the slowest changing property as the partition key and the other as the row key. If an entity has more than two key properties, use a concatenation of properties to provide the partition and row keys. + > + > +* If you regularly perform queries that look up data by using fields other than the partition and row keys, consider implementing the [index table pattern]. +* If you generate partition keys by using a monotonic increasing or decreasing sequence (such as "0001", "0002", "0003", and so on) and each partition only contains a limited amount of data, then Azure table storage can physically group these partitions together on the same server. This mechanism assumes that the application is most likely to perform queries across a contiguous range of partitions (range queries) and is optimized for this case. However, this approach can lead to hotspots focused on a single server because all insertions of new entities are likely to be concentrated at one end or the other of the contiguous ranges. It can also reduce scalability. To spread the load more evenly across servers, consider hashing the partition key to make the sequence more random. +* Azure table storage supports transactional operations for entities that belong to the same partition. This means that an application can perform multiple insert, update, delete, replace, or merge operations as an atomic unit (as long as the transaction doesn't include more than 100 entities and the payload of the request doesn't exceed 4 MB). Operations that span multiple partitions are not transactional, and might require you to implement eventual consistency as described by the [Data consistency primer]. For more information about table storage and transactions, go to the page [Performing entity group transactions] on the Microsoft website. +* Give careful attention to the granularity of the partition key because of the following reasons: + * Using the same partition key for every entity causes the table storage service to create a single large partition that's held on one server. This prevents it from scaling out and instead focuses the load on a single server. As a result, this approach is only suitable for systems that manage a small number of entities. However, this approach does ensure that all entities can participate in entity group transactions. + * Using a unique partition key for every entity causes the table storage service to create a separate partition for each entity, possibly resulting in a large number of small partitions (depending on the size of the entities). This approach is more scalable than using a single partition key, but entity group transactions are not possible. Also, queries that fetch more than one entity might involve reading from more than one server. However, if the application performs range queries, then using a monotonic sequence to generate the partition keys might help to optimize these queries. + * Sharing the partition key across a subset of entities makes it possible for you to group related entities in the same partition. Operations that involve related entities can be performed by using entity group transactions, and queries that fetch a set of related entities can be satisfied by accessing a single server. + +For additional information about partitioning data in Azure table storage, see the article [Azure storage table design guide] on the Microsoft website. + +## Partitioning Azure blob storage +Azure blob storage makes it possible to hold large binary objects--currently up to 200 GB in size for block blobs or 1 TB for page blobs. (For the most recent information, go to the page [Azure Storage scalability and performance targets] on the Microsoft website.) Use block blobs in scenarios such as streaming where you need to upload or download large volumes of data quickly. Use page blobs for applications that require random rather than serial access to parts of the data. + +Each blob (either block or page) is held in a container in an Azure storage account. You can use containers to group related blobs that have the same security requirements, although this grouping is logical rather than physical. Inside a container, each blob has a unique name. + +Blob storage is automatically partitioned based on the blob name. Each blob is held in its own partition. Blobs in the same container do not share a partition. This architecture helps Azure blob storage to balance the load across servers transparently because different blobs in the same container can be distributed across different servers. + +The actions of writing a single block (block blob) or page (page blob) are atomic, but operations that span blocks, pages, or blobs are not. If you need to ensure consistency when performing write operations across blocks, pages, and blobs, take out a write lock by using a blob lease. + +Azure blob storage supports transfer rates of up to 60 MB per second or 500 requests per second for each blob. If you anticipate surpassing these limits, and the blob data is relatively static, then consider replicating blobs by using the Azure Content Delivery Network. For more information, see the page [Using Delivery Content Network for Azure] on the Microsoft website. For additional guidance and considerations, see [Using Content Delivery Network for Azure]. + +## Partitioning Azure storage queues +Azure storage queues enable you to implement asynchronous messaging between processes. An Azure storage account can contain any number of queues, and each queue can contain any number of messages. The only limitation is the space that's available in the storage account. The maximum size of an individual message is 64 KB. If you require messages bigger than this, then consider using Azure Service Bus queues instead. + +Each storage queue has a unique name within the storage account that contains it. Azure partitions queues based on the name. All messages for the same queue are stored in the same partition, which is controlled by a single server. Different queues can be managed by different servers to help balance the load. The allocation of queues to servers is transparent to applications and users. + + In a large-scale application, don't use the same storage queue for all instances of the application because this approach might cause the server that's hosting the queue to become a hotspot. Instead, use different queues for different functional areas of the application. Azure storage queues do not support transactions, so directing messages to different queues should have little impact on messaging consistency. + +An Azure storage queue can handle up to 2,000 messages per second. If you need to process messages at a greater rate than this, consider creating multiple queues. For example, in a global application, create separate storage queues in separate storage accounts to handle application instances that are running in each region. + +## Partitioning strategies for Azure Service Bus +Azure Service Bus uses a message broker to handle messages that are sent to a Service Bus queue or topic. By default, all messages that are sent to a queue or topic are handled by the same message broker process. This architecture can place a limitation on the overall throughput of the message queue. However, you can also partition a queue or topic when it is created. You do this by setting the *EnablePartitioning* property of the queue or topic description to *true*. + +A partitioned queue or topic is divided into multiple fragments, each of which is backed by a separate message store and message broker. Service Bus takes responsibility for creating and managing these fragments. When an application posts a message to a partitioned queue or topic, Service Bus assigns the message to a fragment for that queue or topic. When an application receives a message from a queue or subscription, Service Bus checks each fragment for the next available message and then passes it to the application for processing. + +This structure helps distribute the load across message brokers and message stores, increasing scalability and improving availability. If the message broker or message store for one fragment is temporarily unavailable, Service Bus can retrieve messages from one of the remaining available fragments. + +Service Bus assigns a message to a fragment as follows: + +* If the message belongs to a session, all messages with the same value for the * SessionId* property are sent to the same fragment. +* If the message does not belong to a session, but the sender has specified a value for the *PartitionKey* property, then all messages with the same *PartitionKey* value are sent to the same fragment. + + > [!NOTE] + > If the *SessionId* and *PartitionKey* properties are both specified, then they must be set to the same value or the message will be rejected. + > + > +* If the *SessionId* and *PartitionKey* properties for a message are not specified, but duplicate detection is enabled, the *MessageId* property will be used. All messages with the same *MessageId* will be directed to the same fragment. +* If messages do not include a *SessionId, PartitionKey,* or *MessageId* property, then Service Bus assigns messages to fragments sequentially. If a fragment is unavailable, Service Bus will move on to the next. This means that a temporary fault in the messaging infrastructure does not cause the message-send operation to fail. + +Consider the following points when deciding if or how to partition a Service Bus message queue or topic: + +* Service Bus queues and topics are created within the scope of a Service Bus namespace. Service Bus currently allows up to 100 partitioned queues or topics per namespace. +* Each Service Bus namespace imposes quotas on the available resources, such as the number of subscriptions per topic, the number of concurrent send and receive requests per second, and the maximum number of concurrent connections that can be established. These quotas are documented on the Microsoft website on the page [Service Bus quotas]. If you expect to exceed these values, then create additional namespaces with their own queues and topics, and spread the work across these namespaces. For example, in a global application, create separate namespaces in each region and configure application instances to use the queues and topics in the nearest namespace. +* Messages that are sent as part of a transaction must specify a partition key. This can be a *SessionId*, *PartitionKey*, or *MessageId* property. All messages that are sent as part of the same transaction must specify the same partition key because they must be handled by the same message broker process. You cannot send messages to different queues or topics within the same transaction. +* Partitioned queues and topics can't be configured to be automatically deleted when they become idle. +* Partitioned queues and topics can't currently be used with the Advanced Message Queuing Protocol (AMQP) if you are building cross-platform or hybrid solutions. + +## Partitioning strategies for Azure DocumentDB databases +Azure DocumentDB is a NoSQL database that can store documents. A document in a DocumentDB database is a JSON-serialized representation of an object or other piece of data. No fixed schemas are enforced except that every document must contain a unique ID. + +Documents are organized into collections. You can group related documents together in a collection. For example, in a system that maintains blog postings, you can store the contents of each blog post as a document in a collection. You can also create collections for each subject type. Alternatively, in a multitenant application, such as a system where different authors control and manage their own blog posts, you can partition blogs by author and create separate collections for each author. The storage space that's allocated to collections is elastic and can shrink or grow as needed. + +Document collections provide a natural mechanism for partitioning data within a single database. Internally, a DocumentDB database can span several servers and might attempt to spread the load by distributing collections across servers. The simplest way to implement sharding is to create a collection for each shard. + +> [!NOTE] +> Each DocumentDB database has a *performance level* that determines the amount of resources it gets. A performance level is associated with a *request unit* (RU) rate limit. The RU rate limit specifies the volume of resources that's reserved and available for exclusive use by that collection. The cost of a collection depends on the performance level that's selected for that collection. The higher the performance level (and RU rate limit) the higher the charge. You can adjust the performance level of a collection by using the Azure portal. For more information, see the page [Performance levels in DocumentDB] on the Microsoft website. +> +> + +All databases are created in the context of a DocumentDB account. A single DocumentDB account can contain several databases, and it specifies in which region the databases are created. Each DocumentDB account also enforces its own access control. You can use DocumentDB accounts to geo-locate shards (collections within databases) close to the users who need to access them, and enforce restrictions so that only those users can connect to them. + +Each DocumentDB account has a quota that limits the number of databases and collections that it can contain and the amount of document storage that's available. These limits are subject to change, but are described on the page [DocumentDB limits and quotas] on the Microsoft website. It is theoretically possible that if you implement a system where all shards belong to the same database, you might reach the storage capacity limit of the account. + +In this case, you might need to create additional DocumentDB accounts and databases, and distribute the shards across these databases. However, even if you are unlikely to reach the storage capacity of a database, it's a good practice to use multiple databases. That's because each database has its own set of users and permissions, and you can use this mechanism to isolate access to collections on a per-database basis. + +Figure 8 illustrates the high-level structure of the DocumentDB architecture. + +![The structure of DocumentDB](./images/data-partitioning/DocumentDBStructure.png) + +*Figure 8. The structure of the DocumentDB architecture* + +It is the task of the client application to direct requests to the appropriate shard, usually by implementing its own mapping mechanism based on some attributes of the data that define the shard key. Figure 9 shows two DocumentDB databases, each containing two collections that are acting as shards. The data is sharded by a tenant ID and contains the data for a specific tenant. The databases are created in separate DocumentDB accounts. These accounts are located in the same region as the tenants for which they contain data. The routing logic in the client application uses the tenant ID as the shard key. + +![Implementing sharding using Azure DocumentDB](./images/data-partitioning/DocumentDBPartitions.png) + +*Figure 9. Implementing sharding using an Azure DocumentDB database* + +Consider the following points when deciding how to partition data with a DocumentDB database: + +* **The resources available to a DocumentDB database are subject to the quota limitations of the DocumentDB account**. Each database can hold a number of collections (again, there is a limit), and each collection is associated with a performance level that governs the RU rate limit (reserved throughput) for that collection. For more information, go to the page [DocumentDB limits and quotas] on the Microsoft website. +* **Each document must have an attribute that can be used to uniquely identify that document within the collection in which it is held**. This attribute is different from the shard key, which defines which collection holds the document. A collection can contain a large number of documents. In theory, it's limited only by the maximum length of the document ID. The document ID can be up to 255 characters. +* **All operations against a document are performed within the context of a transaction. Transactions in DocumentDB databases are scoped to the collection in which the document is contained.** If an operation fails, the work that it has performed is rolled back. While a document is subject to an operation, any changes that are made are subject to snapshot-level isolation. This mechanism guarantees that if, for example, a request to create a new document fails, another user who's querying the database simultaneously will not see a partial document that is then removed. +* **DocumentDB database queries are also scoped to the collection level**. A single query can retrieve data from only one collection. If you need to retrieve data from multiple collections, you must query each collection individually and merge the results in your application code. +* **DocumentDB databases supports programmable items that can all be stored in a collection alongside documents**. These include stored procedures, user-defined functions, and triggers (written in JavaScript). These items can access any document within the same collection. Furthermore, these items run either inside the scope of the ambient transaction (in the case of a trigger that fires as the result of a create, delete, or replace operation performed against a document), or by starting a new transaction (in the case of a stored procedure that is run as the result of an explicit client request). If the code in a programmable item throws an exception, the transaction is rolled back. You can use stored procedures and triggers to maintain integrity and consistency between documents, but these documents must all be part of the same collection. +* **The collections that you intend to hold in the databases in a DocumentDB account should be unlikely to exceed the throughput limits defined by the performance levels of the collections**. These limits are described on the page [Manage DocumentDB capacity needs] on the Microsoft website. If you anticipate reaching these limits, consider splitting collections across databases in different DocumentDB accounts to reduce the load per collection. + +## Partitioning strategies for Azure Search +The ability to search for data is often the primary method of navigation and exploration that's provided by many web applications. It helps users find resources quickly (for example, products in an e-commerce application) based on combinations of search criteria. The Azure Search service provides full-text search capabilities over web content, and includes features such as type-ahead, suggested queries based on near matches, and faceted navigation. A full description of these capabilities is available on the page [What is Azure Search?] on the Microsoft website. + +Azure Search stores searchable content as JSON documents in a database. You define indexes that specify the searchable fields in these documents and provide these definitions to Azure Search. When a user submits a search request, Azure Search uses the appropriate indexes to find matching items. + +To reduce contention, the storage that's used by Azure Search can be divided into 1, 2, 3, 4, 6, or 12 partitions, and each partition can be replicated up to 6 times. The product of the number of partitions multiplied by the number of replicas is called the *search unit* (SU). A single instance of Azure Search can contain a maximum of 36 SUs (a database with 12 partitions only supports a maximum of 3 replicas). + +You are billed for each SU that is allocated to your service. As the volume of searchable content increases or the rate of search requests grows, you can add SUs to an existing instance of Azure Search to handle the extra load. Azure Search itself distributes the documents evenly across the partitions. No manual partitioning strategies are currently supported. + +Each partition can contain a maximum of 15 million documents or occupy 300 GB of storage space (whichever is smaller). You can create up to 50 indexes. The performance of the service varies and depends on the complexity of the documents, the available indexes, and the effects of network latency. On average, a single replica (1 SU) should be able to handle 15 queries per second (QPS), although we recommend performing benchmarking with your own data to obtain a more precise measure of throughput. For more information, see the page [Service limits in Azure Search] on the Microsoft website. + +> [!NOTE] +> You can store a limited set of data types in searchable documents, including strings, Booleans, numeric data, datetime data, and some geographical data. For more details, see the page [Supported data types (Azure Search)] on the Microsoft website. +> +> + +You have limited control over how Azure Search partitions data for each instance of the service. However, in a global environment you might be able to improve performance and reduce latency and contention further by partitioning the service itself using either of the following strategies: + +* Create an instance of Azure Search in each geographic region, and ensure that client applications are directed towards the nearest available instance. This strategy requires that any updates to searchable content are replicated in a timely manner across all instances of the service. +* Create two tiers of Azure Search: + + * A local service in each region that contains the data that's most frequently accessed by users in that region. Users can direct requests here for fast but limited results. + * A global service that encompasses all the data. Users can direct requests here for slower but more complete results. + +This approach is most suitable when there is a significant regional variation in the data that's being searched. + +## Partitioning strategies for Azure Redis Cache +Azure Redis Cache provides a shared caching service in the cloud that's based on the Redis key-value data store. As its name implies, Azure Redis Cache is intended as a caching solution. Use it only for holding transient data and not as a permanent data store. Applications that utilize Azure Redis Cache should be able to continue functioning if the cache is unavailable. Azure Redis Cache supports primary/secondary replication to provide high availability, but currently limits the maximum cache size to 53 GB. If you need more space than this, you must create additional caches. For more information, go to the page [Azure Redis Cache] on the Microsoft website. + +Partitioning a Redis data store involves splitting the data across instances of the Redis service. Each instance constitutes a single partition. Azure Redis Cache abstracts the Redis services behind a façade and does not expose them directly. The simplest way to implement partitioning is to create multiple Azure Redis Cache instances and spread the data across them. + +You can associate each data item with an identifier (a partition key) that specifies which cache stores the data item. The client application logic can then use this identifier to route requests to the appropriate partition. This scheme is very simple, but if the partitioning scheme changes (for example, if additional Azure Redis Cache instances are created), client applications might need to be reconfigured. + +Native Redis (not Azure Redis Cache) supports server-side partitioning based on Redis clustering. In this approach, you can divide the data evenly across servers by using a hashing mechanism. Each Redis server stores metadata that describes the range of hash keys that the partition holds, and also contains information about which hash keys are located in the partitions on other servers. + +Client applications simply send requests to any of the participating Redis servers (probably the closest one). The Redis server examines the client request. If it can be resolved locally, it performs the requested operation. Otherwise it forwards the request on to the appropriate server. + +This model is implemented by using Redis clustering, and is described in more detail on the [Redis cluster tutorial] page on the Redis website. Redis clustering is transparent to client applications. Additional Redis servers can be added to the cluster (and the data can be re-partitioned) without requiring that you reconfigure the clients. + +> [!IMPORTANT] +> Azure Redis Cache does not currently support Redis clustering. If you want to implement this approach with Azure, then you must implement your own Redis servers by installing Redis on a set of Azure virtual machines and configuring them manually. The page [Running Redis on a CentOS Linux VM in Azure] on the Microsoft website walks through an example that shows you how to build and configure a Redis node running as an Azure VM. +> +> + +The page [Partitioning: how to split data among multiple Redis instances] on the Redis website provides more information about implementing partitioning with Redis. The remainder of this section assumes that you are implementing client-side or proxy-assisted partitioning. + +Consider the following points when deciding how to partition data with Azure Redis Cache: + +* Azure Redis Cache is not intended to act as a permanent data store, so whatever partitioning scheme you implement, your application code must be able to retrieve data from a location that's not the cache. +* Data that is frequently accessed together should be kept in the same partition. Redis is a powerful key-value store that provides several highly optimized mechanisms for structuring data. These mechanisms can be one of the following: + + * Simple strings (binary data up to 512 MB in length) + * Aggregate types such as lists (which can act as queues and stacks) + * Sets (ordered and unordered) + * Hashes (which can group related fields together, such as the items that represent the fields in an object) +* The aggregate types enable you to associate many related values with the same key. A Redis key identifies a list, set, or hash rather than the data items that it contains. These types are all available with Azure Redis Cache and are described by the [Data types] page on the Redis website. For example, in part of an e-commerce system that tracks the orders that are placed by customers, the details of each customer can be stored in a Redis hash that is keyed by using the customer ID. Each hash can hold a collection of order IDs for the customer. A separate Redis set can hold the orders, again structured as hashes, and keyed by using the order ID. Figure 10 shows this structure. Note that Redis does not implement any form of referential integrity, so it is the developer's responsibility to maintain the relationships between customers and orders. + +![Suggested structure in Redis storage for recording customer orders and their details](./images/data-partitioning/RedisCustomersandOrders.png) + +*Figure 10. Suggested structure in Redis storage for recording customer orders and their details* + +> [!NOTE] +> In Redis, all keys are binary data values (like Redis strings) and can contain up to 512 MB of data. In theory, a key can contain almost any information. However, we recommend adopting a consistent naming convention for keys that is descriptive of the type of data and that identifies the entity, but is not excessively long. A common approach is to use keys of the form "entity_type:ID". For example, you can use "customer:99" to indicate the key for a customer with the ID 99. +> +> + +* You can implement vertical partitioning by storing related information in different aggregations in the same database. For example, in an e-commerce application, you can store commonly accessed information about products in one Redis hash and less frequently used detailed information in another. + Both hashes can use the same product ID as part of the key. For example, you can use "product: *nn*" (where *nn* is the product ID) for the product information and "product_details: *nn*" for the detailed data. This strategy can help reduce the volume of data that most queries are likely to retrieve. +* You can repartition a Redis data store, but keep in mind that it's a complex and time-consuming task. Redis clustering can repartition data automatically, but this capability is not available with Azure Redis Cache. Therefore, when you design your partitioning scheme, try to leave sufficient free space in each partition to allow for expected data growth over time. However, remember that Azure Redis Cache is intended to cache data temporarily, and that data held in the cache can have a limited lifetime specified as a time-to-live (TTL) value. For relatively volatile data, the TTL can be short, but for static data the TTL can be a lot longer. Avoid storing large amounts of long-lived data in the cache if the volume of this data is likely to fill the cache. You can specify an eviction policy that causes Azure Redis Cache to remove data if space is at a premium. + + > [!NOTE] + > When you use Azure Redis cache, you specify the maximum size of the cache (from 250 MB to 53 GB) by selecting the appropriate pricing tier. However, after an Azure Redis Cache has been created, you cannot increase (or decrease) its size. + > + > +* Redis batches and transactions cannot span multiple connections, so all data that is affected by a batch or transaction should be held in the same database (shard). + + > [!NOTE] + > A sequence of operations in a Redis transaction is not necessarily atomic. The commands that compose a transaction are verified and queued before they run. If an error occurs during this phase, the entire queue is discarded. However, after the transaction has been successfully submitted, the queued commands run in sequence. If any command fails, only that command stops running. All previous and subsequent commands in the queue are performed. For more information, go to the [Transactions] page on the Redis website. + > + > +* Redis supports a limited number of atomic operations. The only operations of this type that support multiple keys and values are MGET and MSET operations. MGET operations return a collection of values for a specified list of keys, and MSET operations store a collection of values for a specified list of keys. If you need to use these operations, the key-value pairs that are referenced by the MSET and MGET commands must be stored within the same database. + +## Rebalancing partitions +As a system matures and you understand the usage patterns better, you might have to adjust the partitioning scheme. For example, individual partitions might start attracting a disproportionate volume of traffic and become hot, leading to excessive contention. Additionally, you might have underestimated the volume of data in some partitions, causing you to approach the limits of the storage capacity in these partitions. Whatever the cause, it is sometimes necessary to rebalance partitions to spread the load more evenly. + +In some cases, data storage systems that don't publicly expose how data is allocated to servers can automatically rebalance partitions within the limits of the resources available. In other situations, rebalancing is an administrative task that consists of two stages: + +1. Determining the new partitioning strategy to ascertain: + * Which partitions might need to be split (or possibly combined). + * How to allocate data to these new partitions by designing new partition keys. +2. Migrating the affected data from the old partitioning scheme to the new set of partitions. + +> [!NOTE] +> The mapping of DocumentDB database collections to servers is transparent, but you can still reach the storage capacity and throughput limits of a DocumentDB account. If this happens, you might need to redesign your partitioning scheme and migrate the data. +> +> + +Depending on the data storage technology and the design of your data storage system, you might be able to migrate data between partitions while they are in use (online migration). If this isn't possible, you might need to make the affected partitions temporarily unavailable while the data is relocated (offline migration). + +## Offline migration +Offline migration is arguably the simplest approach because it reduces the chances of contention occurring. Don't make any changes to the data while it is being moved and restructured. + +Conceptually, this process includes the following steps: + +1. Mark the shard offline. +2. Split-merge and move the data to the new shards. +3. Verify the data. +4. Bring the new shards online. +5. Remove the old shard. + +To retain some availability, you can mark the original shard as read-only in step 1 rather than making it unavailable. This allows applications to read the data while it is being moved but not to change it. + +## Online migration +Online migration is more complex to perform but less disruptive to users because data remains available during the entire procedure. The process is similar to that used by offline migration, except that the original shard is not marked offline (step 1). Depending on the granularity of the migration process (for example, whether it's done item by item or shard by shard), the data access code in the client applications might have to handle reading and writing data that's held in two locations (the original shard and the new shard). + +For an example of a solution that supports online migration, see the article [Scaling using the Elastic Database split-merge tool] on the Microsoft website. + +## Related patterns and guidance +When considering strategies for implementing data consistency, the following patterns might also be relevant to your scenario: + +* The [Data consistency primer] page on the Microsoft website describes strategies for maintaining consistency in a distributed environment such as the cloud. +* The [Data partitioning guidance] page on the Microsoft website provides a general overview of how to design partitions to meet various criteria in a distributed solution. +* The [sharding pattern] as described on the Microsoft website summarizes some common strategies for sharding data. +* The [index table pattern] as described on the Microsoft website illustrates how to create secondary indexes over data. An application can quickly retrieve data with this approach, by using queries that do not reference the primary key of a collection. +* The [materialized view pattern] as described on the Microsoft website describes how to generate pre-populated views that summarize data to support fast query operations. This approach can be useful in a partitioned data store if the partitions that contain the data being summarized are distributed across multiple sites. +* The [Using Azure Content Delivery Network] article on the Microsoft website provides additional guidance on configuring and using Content Delivery Network with Azure. + +## More information +* The page [What is Azure SQL Database?] on the Microsoft website provides detailed documentation that describes how to create and use SQL databases. +* The page [Elastic Database features overview] on the Microsoft website provides a comprehensive introduction to Elastic Database. +* The page [Scaling using the Elastic Database split-merge tool] on the Microsoft website contains information about using the split-merge service to manage Elastic Database shards. +* The page [Azure storage scalability and performance targets](https://msdn.microsoft.com/library/azure/dn249410.aspx) on the Microsoft website documents the current sizing and throughput limits of Azure Storage. +* The page [Performing entity group transactions] on the Microsoft website provides detailed information about implementing transactional operations over entities that are stored in Azure table storage. +* The article [Azure Storage table design guide] on the Microsoft website contains detailed information about partitioning data in Azure table storage. +* The page [Using Azure Content Delivery Network] on the Microsoft website describes how to replicate data that's held in Azure blob storage by using the Azure Content Delivery Network. +* The page [Manage DocumentDB capacity needs] on the Microsoft website contains information about how Azure DocumentDB databases allocate resources. +* The page [What is Azure Search?] on the Microsoft website provides a full description of the capabilities that are available in Azure Search. +* The page [Service limits in Azure Search] on the Microsoft website contains information about the capacity of each instance of Azure Search. +* The page [Supported data types (Azure Search)] on the Microsoft website summarizes the data types that you can use in searchable documents and indexes. +* The page [Azure Redis Cache] on the Microsoft website provides an introduction to Azure Redis Cache. +* The [Partitioning: how to split data among multiple Redis instances] page on the Redis website provides information about how to implement partitioning with Redis. +* The page [Running Redis on a CentOS Linux VM in Azure] on the Microsoft website walks through an example that shows you how to build and configure a Redis node running as an Azure VM. +* The [Data types] page on the Redis website describes the data types that are available with Redis and Azure Redis Cache. + +[Azure Redis Cache]: http://azure.microsoft.com/services/cache/ +[Azure Storage Scalability and Performance Targets]: /azure/storage/storage-scalability-targets +[Azure Storage Table Design Guide]: /azure/storage/storage-table-design-guide +[Building a Polyglot Solution]: https://msdn.microsoft.com/library/dn313279.aspx +[Data Access for Highly-Scalable Solutions: Using SQL, NoSQL, and Polyglot Persistence]: https://msdn.microsoft.com/library/dn271399.aspx +[Data consistency primer]: http://aka.ms/Data-Consistency-Primer +[Data Partitioning Guidance]: https://msdn.microsoft.com/library/dn589795.aspx +[Data Types]: http://redis.io/topics/data-types +[DocumentDB limits and quotas]: /azure/documentdb/documentdb-limits +[Elastic Database features overview]: /azure/sql-database/sql-database-elastic-scale-introduction +[Federations Migration Utility]: https://code.msdn.microsoft.com/vstudio/Federations-Migration-ce61e9c1 +[Index Table Pattern]: http://aka.ms/Index-Table-Pattern +[Manage DocumentDB capacity needs]: /azure/documentdb/documentdb-manage +[Materialized View Pattern]: http://aka.ms/Materialized-View-Pattern +[Multi-shard querying]: /azure/sql-database/sql-database-elastic-scale-multishard-querying +[Partitioning: how to split data among multiple Redis instances]: http://redis.io/topics/partitioning +[Performance levels in DocumentDB]: /azure/documentdb/documentdb-performance-levels +[Performing Entity Group Transactions]: https://msdn.microsoft.com/library/azure/dd894038.aspx +[Redis cluster tutorial]: http://redis.io/topics/cluster-tutorial +[Running Redis on a CentOS Linux VM in Azure]: http://blogs.msdn.com/b/tconte/archive/2012/06/08/running-redis-on-a-centos-linux-vm-in-windows-azure.aspx +[Scaling using the Elastic Database split-merge tool]: /azure/sql-database/sql-database-elastic-scale-overview-split-and-merge +[Using Azure Content Delivery Network]: /azure/cdn/cdn-create-new-endpoint +[Service Bus quotas]: /azure/service-bus-messaging/service-bus-quotas +[Service limits in Azure Search]: /azure/search/search-limits-quotas-capacity +[Sharding pattern]: http://aka.ms/Sharding-Pattern +[Supported Data Types (Azure Search)]: https://msdn.microsoft.com/library/azure/dn798938.aspx +[Transactions]: http://redis.io/topics/transactions +[What is Azure Search?]: /azure/search/search-what-is-azure-search +[What is Azure SQL Database?]: /azure/sql-database/sql-database-technical-overview diff --git a/docs/best-practices/images/caching/Caching3.png b/docs/best-practices/images/caching/Caching3.png new file mode 100644 index 00000000000..5eae3c79817 Binary files /dev/null and b/docs/best-practices/images/caching/Caching3.png differ diff --git a/docs/best-practices/images/caching/Figure1.png b/docs/best-practices/images/caching/Figure1.png new file mode 100644 index 00000000000..fd74585e029 Binary files /dev/null and b/docs/best-practices/images/caching/Figure1.png differ diff --git a/docs/best-practices/images/caching/Figure2.png b/docs/best-practices/images/caching/Figure2.png new file mode 100644 index 00000000000..0436f84784c Binary files /dev/null and b/docs/best-practices/images/caching/Figure2.png differ diff --git a/docs/best-practices/images/caching/Figure3.png b/docs/best-practices/images/caching/Figure3.png new file mode 100644 index 00000000000..4ea94bd7375 Binary files /dev/null and b/docs/best-practices/images/caching/Figure3.png differ diff --git a/docs/best-practices/images/cdn/CDN.png b/docs/best-practices/images/cdn/CDN.png new file mode 100644 index 00000000000..591bf9b671a Binary files /dev/null and b/docs/best-practices/images/cdn/CDN.png differ diff --git a/docs/best-practices/images/data-partitioning/DataPartitioning01.png b/docs/best-practices/images/data-partitioning/DataPartitioning01.png new file mode 100644 index 00000000000..5b484a9afaa Binary files /dev/null and b/docs/best-practices/images/data-partitioning/DataPartitioning01.png differ diff --git a/docs/best-practices/images/data-partitioning/DataPartitioning02.png b/docs/best-practices/images/data-partitioning/DataPartitioning02.png new file mode 100644 index 00000000000..a706443dd0b Binary files /dev/null and b/docs/best-practices/images/data-partitioning/DataPartitioning02.png differ diff --git a/docs/best-practices/images/data-partitioning/DataPartitioning03.png b/docs/best-practices/images/data-partitioning/DataPartitioning03.png new file mode 100644 index 00000000000..fff83491ad5 Binary files /dev/null and b/docs/best-practices/images/data-partitioning/DataPartitioning03.png differ diff --git a/docs/best-practices/images/data-partitioning/DocumentDBPartitions.png b/docs/best-practices/images/data-partitioning/DocumentDBPartitions.png new file mode 100644 index 00000000000..59dad0dca64 Binary files /dev/null and b/docs/best-practices/images/data-partitioning/DocumentDBPartitions.png differ diff --git a/docs/best-practices/images/data-partitioning/DocumentDBStructure.png b/docs/best-practices/images/data-partitioning/DocumentDBStructure.png new file mode 100644 index 00000000000..e9d16898abd Binary files /dev/null and b/docs/best-practices/images/data-partitioning/DocumentDBStructure.png differ diff --git a/docs/best-practices/images/data-partitioning/MultipleShardMaps.png b/docs/best-practices/images/data-partitioning/MultipleShardMaps.png new file mode 100644 index 00000000000..659a8bd188e Binary files /dev/null and b/docs/best-practices/images/data-partitioning/MultipleShardMaps.png differ diff --git a/docs/best-practices/images/data-partitioning/PointShardlet.png b/docs/best-practices/images/data-partitioning/PointShardlet.png new file mode 100644 index 00000000000..75648e9d188 Binary files /dev/null and b/docs/best-practices/images/data-partitioning/PointShardlet.png differ diff --git a/docs/best-practices/images/data-partitioning/RangeShardlet.png b/docs/best-practices/images/data-partitioning/RangeShardlet.png new file mode 100644 index 00000000000..ec30aa61659 Binary files /dev/null and b/docs/best-practices/images/data-partitioning/RangeShardlet.png differ diff --git a/docs/best-practices/images/data-partitioning/RedisCustomersandOrders.png b/docs/best-practices/images/data-partitioning/RedisCustomersandOrders.png new file mode 100644 index 00000000000..5f452e4e823 Binary files /dev/null and b/docs/best-practices/images/data-partitioning/RedisCustomersandOrders.png differ diff --git a/docs/best-practices/images/data-partitioning/TableStorage.png b/docs/best-practices/images/data-partitioning/TableStorage.png new file mode 100644 index 00000000000..20e414108f5 Binary files /dev/null and b/docs/best-practices/images/data-partitioning/TableStorage.png differ diff --git a/docs/best-practices/images/monitoring/BufferedQueue.png b/docs/best-practices/images/monitoring/BufferedQueue.png new file mode 100644 index 00000000000..cfd861b5756 Binary files /dev/null and b/docs/best-practices/images/monitoring/BufferedQueue.png differ diff --git a/docs/best-practices/images/monitoring/Consolidation.png b/docs/best-practices/images/monitoring/Consolidation.png new file mode 100644 index 00000000000..67f7c04d3bd Binary files /dev/null and b/docs/best-practices/images/monitoring/Consolidation.png differ diff --git a/docs/best-practices/images/monitoring/DataStorage.png b/docs/best-practices/images/monitoring/DataStorage.png new file mode 100644 index 00000000000..4a69f009cfc Binary files /dev/null and b/docs/best-practices/images/monitoring/DataStorage.png differ diff --git a/docs/best-practices/images/monitoring/Pipeline.png b/docs/best-practices/images/monitoring/Pipeline.png new file mode 100644 index 00000000000..695c61d2eed Binary files /dev/null and b/docs/best-practices/images/monitoring/Pipeline.png differ diff --git a/docs/best-practices/images/monitoring/PullModel.png b/docs/best-practices/images/monitoring/PullModel.png new file mode 100644 index 00000000000..b06f3f75644 Binary files /dev/null and b/docs/best-practices/images/monitoring/PullModel.png differ diff --git a/docs/best-practices/images/monitoring/TelemetryService.png b/docs/best-practices/images/monitoring/TelemetryService.png new file mode 100644 index 00000000000..f70b3c940d5 Binary files /dev/null and b/docs/best-practices/images/monitoring/TelemetryService.png differ diff --git a/docs/best-practices/images/naming-conventions/guidance-naming-convention-example.png b/docs/best-practices/images/naming-conventions/guidance-naming-convention-example.png new file mode 100644 index 00000000000..cf2aa29acff Binary files /dev/null and b/docs/best-practices/images/naming-conventions/guidance-naming-convention-example.png differ diff --git a/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable4.png b/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable4.png new file mode 100644 index 00000000000..fed88c71382 Binary files /dev/null and b/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable4.png differ diff --git a/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable7.png b/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable7.png new file mode 100644 index 00000000000..6619aa69177 Binary files /dev/null and b/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable7.png differ diff --git a/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable8.png b/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable8.png new file mode 100644 index 00000000000..3ad1df9526a Binary files /dev/null and b/docs/best-practices/images/retry-service-specific/RetryServiceSpecificGuidanceTable8.png differ diff --git a/docs/best-practices/index.md b/docs/best-practices/index.md new file mode 100644 index 00000000000..3c2b5ec0c75 --- /dev/null +++ b/docs/best-practices/index.md @@ -0,0 +1,3 @@ +--- +redirect_url: /azure/architecture/best-practices/api-design +--- diff --git a/docs/best-practices/monitoring.md b/docs/best-practices/monitoring.md new file mode 100644 index 00000000000..58425865a22 --- /dev/null +++ b/docs/best-practices/monitoring.md @@ -0,0 +1,713 @@ +--- +title: Monitoring and diagnostics guidance +description: Best practices for monitoring distributed applications in the cloud. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: 2d2a8497-73d0-4a46-aac6-6d504003de2b +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/13/2016 +ms.author: masashin + +--- +# Monitoring and diagnostics +[!INCLUDE [header](../_includes/header.md)] + +Distributed applications and services running in the cloud are, by their nature, complex pieces of software that comprise many moving parts. In a production environment, it's important to be able to track the way in which users utilize your system, trace resource utilization, and generally monitor the health and performance of your system. You can use this information as a diagnostic aid to detect and correct issues, and also to help spot potential problems and prevent them from occurring. + +## Monitoring and diagnostics scenarios +You can use monitoring to gain an insight into how well a system is functioning. Monitoring is a crucial part of maintaining quality-of-service targets. Common scenarios for collecting monitoring data include: + +* Ensuring that the system remains healthy. +* Tracking the availability of the system and its component elements. +* Maintaining performance to ensure that the throughput of the system does not degrade unexpectedly as the volume of work increases. +* Guaranteeing that the system meets any service-level agreements (SLAs) established with customers. +* Protecting the privacy and security of the system, users, and their data. +* Tracking the operations that are performed for auditing or regulatory purposes. +* Monitoring the day-to-day usage of the system and spotting trends that might lead to problems if they're not addressed. +* Tracking issues that occur, from initial report through to analysis of possible causes, rectification, consequent software updates, and deployment. +* Tracing operations and debugging software releases. + +> [!NOTE] +> This list is not intended to be comprehensive. This document focuses on these scenarios as the most common situations for performing monitoring. There might be others that are less common or are specific to your environment. +> +> + +The following sections describe these scenarios in more detail. The information for each scenario is discussed in the following format: + +1. A brief overview of the scenario +2. The typical requirements of this scenario +3. The raw instrumentation data that's required to support the scenario, and possible sources of this information +4. How this raw data can be analyzed and combined to generate meaningful diagnostic information + +## Health monitoring +A system is healthy if it is running and capable of processing requests. The purpose of health monitoring is to generate a snapshot of the current health of the system so that you can verify that all components of the system are functioning as expected. + +### Requirements for health monitoring +An operator should be alerted quickly (within a matter of seconds) if any part of the system is deemed to be unhealthy. The operator should be able to ascertain which parts of the system are functioning normally, and which parts are experiencing problems. System health can be highlighted through a traffic-light system: + +* Red for unhealthy (the system has stopped) +* Yellow for partially healthy (the system is running with reduced functionality) +* Green for completely healthy + +A comprehensive health-monitoring system enables an operator to drill down through the system to view the health status of subsystems and components. For example, if the overall system is depicted as partially healthy, the operator should be able to zoom in and determine which functionality is currently unavailable. + +### Data sources, instrumentation, and data-collection requirements +The raw data that's required to support health monitoring can be generated as a result of: + +* Tracing execution of user requests. This information can be used to determine which requests have succeeded, which have failed, and how long each request takes. +* Synthetic user monitoring. This process simulates the steps performed by a user and follows a predefined series of steps. The results of each step should be captured. +* Logging exceptions, faults, and warnings. This information can be captured as a result of trace statements embedded into the application code, as well as retrieving information from the event logs of any services that the system references. +* Monitoring the health of any third-party services that the system uses. This monitoring might require retrieving and parsing health data that these services supply. This information might take a variety of formats. +* Endpoint monitoring. This mechanism is described in more detail in the "Availability monitoring" section. +* Collecting ambient performance information, such as background CPU utilization or I/O (including network) activity. + +### Analyzing health data +The primary focus of health monitoring is to quickly indicate whether the system is running. Hot analysis of the immediate data can trigger an alert if a critical component is detected as unhealthy. (It fails to respond to a consecutive series of pings, for example.) The operator can then take the appropriate corrective action. + +A more advanced system might include a predictive element that performs a cold analysis over recent and current workloads. A cold analysis can spot trends and determine whether the system is likely to remain healthy or whether the system will need additional resources. This predictive element should be based on critical performance metrics, such as: + +* The rate of requests directed at each service or subsystem. +* The response times of these requests. +* The volume of data flowing into and out of each service. + +If the value of any metric exceeds a defined threshold, the system can raise an alert to enable an operator or autoscaling (if available) to take the preventative actions necessary to maintain system health. These actions might involve adding resources, restarting one or more services that are failing, or applying throttling to lower-priority requests. + +## Availability monitoring +A truly healthy system requires that the components and subsystems that compose the system are available. Availability monitoring is closely related to health monitoring. But whereas health monitoring provides an immediate view of the current health of the system, availability monitoring is concerned with tracking the availability of the system and its components to generate statistics about the uptime of the system. + +In many systems, some components (such as a database) are configured with built-in redundancy to permit rapid failover in the event of a serious fault or loss of connectivity. Ideally, users should not be aware that such a failure has occurred. But from an availability monitoring perspective, it's necessary to gather as much information as possible about such failures to determine the cause and take corrective actions to prevent them from recurring. + +The data that's required to track availability might depend on a number of lower-level factors. Many of these factors might be specific to the application, system, and environment. An effective monitoring system captures the availability data that corresponds to these low-level factors and then aggregates them to give an overall picture of the system. For example, in an e-commerce system, the business functionality that enables a customer to place orders might depend on the repository where order details are stored and the payment system that handles the monetary transactions for paying for these orders. The availability of the order-placement part of the system is therefore a function of the availability of the repository and the payment subsystem. + +### Requirements for availability monitoring +An operator should also be able to view the historical availability of each system and subsystem, and use this information to spot any trends that might cause one or more subsystems to periodically fail. (Do services start to fail at a particular time of day that corresponds to peak processing hours?) + +A monitoring solution should provide an immediate and historical view of the availability or unavailability of each subsystem. It should also be capable of quickly alerting an operator when one or more services fail or when users can't connect to services. This is a matter of not only monitoring each service, but also examining the actions that each user performs if these actions fail when they attempt to communicate with a service. To some extent, a degree of connectivity failure is normal and might be due to transient errors. But it might be useful to allow the system to raise an alert for the number of connectivity failures to a specified subsystem that occur during a specific period. + +### Data sources, instrumentation, and data-collection requirements +As with health monitoring, the raw data that's required to support availability monitoring can be generated as a result of synthetic user monitoring and logging any exceptions, faults, and warnings that might occur. In addition, availability data can be obtained from performing endpoint monitoring. The application can expose one or more health endpoints, each testing access to a functional area within the system. The monitoring system can ping each endpoint by following a defined schedule and collect the results (success or fail). + +All timeouts, network connectivity failures, and connection retry attempts must be recorded. All data should be time-stamped. + + + +### Analyzing availability data +The instrumentation data must be aggregated and correlated to support the following types of analysis: + +* The immediate availability of the system and subsystems. +* The availability failure rates of the system and subsystems. Ideally, an operator should be able to correlate failures with specific activities: what was happening when the system failed? +* A historical view of failure rates of the system or any subsystems across any specified period, and the load on the system (number of user requests, for example) when a failure occurred. +* The reasons for unavailability of the system or any subsystems. For example, the reasons might be service not running, connectivity lost, connected but timing out, and connected but returning errors. + +You can calculate the percentage availability of a service over a period of time by using the following formula: + +``` +%Availability = ((Total Time – Total Downtime) / Total Time ) * 100 +``` + +This is useful for SLA purposes. ([SLA monitoring](#SLA-monitoring) is described in more detail later in this guidance.) The definition of *downtime* depends on the service. For example, Visual Studio Team Services Build Service defines downtime as the period (total accumulated minutes) during which Build Service is unavailable. A minute is considered unavailable if all continuous HTTP requests to Build Service to perform customer-initiated operations throughout the minute either result in an error code or do not return a response. + +## Performance monitoring +As the system is placed under more and more stress (by increasing the volume of users), the size of the datasets that these users access grows and the possibility of failure of one or more components becomes more likely. Frequently, component failure is preceded by a decrease in performance. If you're able detect such a decrease, you can take proactive steps to remedy the situation. + +System performance depends on a number of factors. Each factor is typically measured through key performance indicators (KPIs), such as the number of database transactions per second or the volume of network requests that are successfully serviced in a specified time frame. Some of these KPIs might be available as specific performance measures, whereas others might be derived from a combination of metrics. + +> [!NOTE] +> Determining poor or good performance requires that you understand the level of performance at which the system should be capable of running. This requires observing the system while it's functioning under a typical load and capturing the data for each KPI over a period of time. This might involve running the system under a simulated load in a test environment and gathering the appropriate data before deploying the system to a production environment. +> +> You should also ensure that monitoring for performance purposes does not become a burden on the system. You might be able to dynamically adjust the level of detail for the data that the performance monitoring process gathers. +> +> + +### Requirements for performance monitoring +To examine system performance, an operator typically needs to see information that includes: + +* The response rates for user requests. +* The number of concurrent user requests. +* The volume of network traffic. +* The rates at which business transactions are being completed. +* The average processing time for requests. + +It can also be helpful to provide tools that enable an operator to help spot correlations, such as: + +* The number of concurrent users versus request latency times (how long it takes to start processing a request after the user has sent it). +* The number of concurrent users versus the average response time (how long it takes to complete a request after it has started processing). +* The volume of requests versus the number of processing errors. + +Along with this high-level functional information, an operator should be able to obtain a detailed view of the performance for each component in the system. This data is typically provided through low-level performance counters that track information such as: + +* Memory utilization. +* Number of threads. +* CPU processing time. +* Request queue length. +* Disk or network I/O rates and errors. +* Number of bytes written or read. +* Middleware indicators, such as queue length. + +All visualizations should allow an operator to specify a time period. The displayed data might be a snapshot of the current situation and/or a historical view of the performance. + +An operator should be able to raise an alert based on any performance measure for any specified value during any specified time interval. + +### Data sources, instrumentation, and data-collection requirements +You can gather high-level performance data (throughput, number of concurrent users, number of business transactions, error rates, and so on) by monitoring the progress of users' requests as they arrive and pass through the system. This involves incorporating tracing statements at key points in the application code, together with timing information. All faults, exceptions, and warnings should be captured with sufficient data for correlating them with the requests that caused them. The Internet Information Services (IIS) log is another useful source. + +If possible, you should also capture performance data for any external systems that the application uses. These external systems might provide their own performance counters or other features for requesting performance data. If this is not possible, record information such as the start time and end time of each request made to an external system, together with the status (success, fail, or warning) of the operation. For example, you can use a stopwatch approach to time requests: start a timer when the request starts and then stop the timer when the request finishes. + +Low-level performance data for individual components in a system might be available through features and services such as Windows performance counters and Azure Diagnostics. + +### Analyzing performance data +Much of the analysis work consists of aggregating performance data by user request type and/or the subsystem or service to which each request is sent. An example of a user request is adding an item to a shopping cart or performing the checkout process in an e-commerce system. + +Another common requirement is summarizing performance data in selected percentiles. For example, an operator might determine the response times for 99 percent of requests, 95 percent of requests, and 70 percent of requests. There might be SLA targets or other goals set for each percentile. The ongoing results should be reported in near real time to help detect immediate issues. The results should also be aggregated over the longer time for statistical purposes. + +In the case of latency issues affecting performance, an operator should be able to quickly identify the cause of the bottleneck by examining the latency of each step that each request performs. The performance data must therefore provide a means of correlating performance measures for each step to tie them to a specific request. + +Depending on the visualization requirements, it might be useful to generate and store a data cube that contains views of the raw data. This data cube can allow complex ad hoc querying and analysis of the performance information. + +## Security monitoring +All commercial systems that include sensitive data must implement a security structure. The complexity of the security mechanism is usually a function of the sensitivity of the data. In a system that requires users to be authenticated, you should record: + +* All sign-in attempts, whether they fail or succeed. +* All operations performed by--and the details of all resources accessed by--an authenticated user. +* When a user ends a session and signs out. + +Monitoring might be able to help detect attacks on the system. For example, a large number of failed sign-in attempts might indicate a brute-force attack. An unexpected surge in requests might be the result of a distributed denial-of-service (DDoS) attack. You must be prepared to monitor all requests to all resources regardless of the source of these requests. A system that has a sign-in vulnerability might accidentally expose resources to the outside world without requiring a user to actually sign in. + +### Requirements for security monitoring +The most critical aspects of security monitoring should enable an operator to quickly: + +* Detect attempted intrusions by an unauthenticated entity. +* Identify attempts by entities to perform operations on data for which they have not been granted access. +* Determine whether the system, or some part of the system, is under attack from outside or inside. (For example, a malicious authenticated user might be attempting to bring the system down.) + +To support these requirements, an operator should be notified: + +* If one account makes repeated failed sign-in attempts within a specified period. +* If one authenticated account repeatedly tries to access a prohibited resource during a specified period. +* If a large number of unauthenticated or unauthorized requests occur during a specified period. + +The information that's provided to an operator should include the host address of the source for each request. If security violations regularly arise from a particular range of addresses, these hosts might be blocked. + +A key part in maintaining the security of a system is being able to quickly detect actions that deviate from the usual pattern. Information such as the number of failed and/or successful sign-in requests can be displayed visually to help detect whether there is a spike in activity at an unusual time. (An example of this activity is users signing in at 3:00 AM and performing a large number of operations when their working day starts at 9:00 AM). This information can also be used to help configure time-based autoscaling. For example, if an operator observes that a large number of users regularly sign in at a particular time of day, the operator can arrange to start additional authentication services to handle the volume of work, and then shut down these additional services when the peak has passed. + +### Data sources, instrumentation, and data-collection requirements +Security is an all-encompassing aspect of most distributed systems. The pertinent data is likely to be generated at multiple points throughout a system. You should consider adopting a Security Information and Event Management (SIEM) approach to gather the security-related information that results from events raised by the application, network equipment, servers, firewalls, antivirus software, and other intrusion-prevention elements. + +Security monitoring can incorporate data from tools that are not part of your application. These tools can include utilities that identify port-scanning activities by external agencies, or network filters that detect attempts to gain unauthenticated access to your application and data. + +In all cases, the gathered data must enable an administrator to determine the nature of any attack and take the appropriate countermeasures. + +### Analyzing security data +A feature of security monitoring is the variety of sources from which the data arises. The different formats and level of detail often require complex analysis of the captured data to tie it together into a coherent thread of information. Apart from the simplest of cases (such as detecting a large number of failed sign-ins, or repeated attempts to gain unauthorized access to critical resources), it might not be possible to perform any complex automated processing of security data. Instead, it might be preferable to write this data, time-stamped but otherwise in its original form, to a secure repository to allow for expert manual analysis. + + + +## SLA monitoring +Many commercial systems that support paying customers make guarantees about the performance of the system in the form of SLAs. Essentially, SLAs state that the system can handle a defined volume of work within an agreed time frame and without losing critical information. SLA monitoring is concerned with ensuring that the system can meet measurable SLAs. + +> [!NOTE] +> SLA monitoring is closely related to performance monitoring. But whereas performance monitoring is concerned with ensuring that the system functions *optimally*, SLA monitoring is governed by a contractual obligation that defines what *optimally* actually means. +> +> + +SLAs are often defined in terms of: + +* Overall system availability. For example, an organization might guarantee that the system will be available for 99.9 percent of the time. This equates to no more than 9 hours of downtime per year, or approximately 10 minutes a week. +* Operational throughput. This aspect is often expressed as one or more high-water marks, such as guaranteeing that the system can support up to 100,000 concurrent user requests or handle 10,000 concurrent business transactions. +* Operational response time. The system might also make guarantees for the rate at which requests are processed. An example is that 99 percent of all business transactions will finish within 2 seconds, and no single transaction will take longer than 10 seconds. + +> [!NOTE] +> Some contracts for commercial systems might also include SLAs for customer support. An example is that all help-desk requests will elicit a response within 5 minutes, and that 99 percent of all problems will be fully addressed within 1 working day. Effective [issue tracking](#issue-tracking) (described later in this section) is key to meeting SLAs such as these. +> +> + +### Requirements for SLA monitoring +At the highest level, an operator should be able to determine at a glance whether the system is meeting the agreed SLAs or not. And if not, the operator should be able to drill down and examine the underlying factors to determine the reasons for substandard performance. + +Typical high-level indicators that can be depicted visually include: + +* The percentage of service uptime. +* The application throughput (measured in terms of successful transactions and/or operations per second). +* The number of successful/failing application requests. +* The number of application and system faults, exceptions, and warnings. + +All of these indicators should be capable of being filtered by a specified period of time. + +A cloud application will likely comprise a number of subsystems and components. An operator should be able to select a high-level indicator and see how it's composed from the health of the underlying elements. For example, if the uptime of the overall system falls below an acceptable value, an operator should be able to zoom in and determine which elements are contributing to this failure. + +> [!NOTE] +> System uptime needs to be defined carefully. In a system that uses redundancy to ensure maximum availability, individual instances of elements might fail, but the system can remain functional. System uptime as presented by health monitoring should indicate the aggregate uptime of each element and not necessarily whether the system has actually halted. Additionally, failures might be isolated. So even if a specific system is unavailable, the remainder of the system might remain available, although with decreased functionality. (In an e-commerce system, a failure in the system might prevent a customer from placing orders, but the customer might still be able to browse the product catalog.) +> +> + +For alerting purposes, the system should be able to raise an event if any of the high-level indicators exceed a specified threshold. The lower-level details of the various factors that compose the high-level indicator should be available as contextual data to the alerting system. + +### Data sources, instrumentation, and data-collection requirements +The raw data that's required to support SLA monitoring is similar to the raw data that's required for performance monitoring, together with some aspects of health and availability monitoring. (See those sections for more details.) You can capture this data by: + +* Performing endpoint monitoring. +* Logging exceptions, faults, and warnings. +* Tracing the execution of user requests. +* Monitoring the availability of any third-party services that the system uses. +* Using performance metrics and counters. + +All data must be timed and time-stamped. + +### Analyzing SLA data +The instrumentation data must be aggregated to generate a picture of the overall performance of the system. Aggregated data must also support drill-down to enable examination of the performance of the underlying subsystems. For example, you should be able to: + +* Calculate the total number of user requests during a specified period and determine the success and failure rate of these requests. +* Combine the response times of user requests to generate an overall view of system response times. +* Analyze the progress of user requests to break down the overall response time of a request into the response times of the individual work items in that request. +* Determine the overall availability of the system as a percentage of uptime for any specific period. +* Analyze the percentage time availability of the individual components and services in the system. This might involve parsing logs that third-party services have generated. + +Many commercial systems are required to report real performance figures against agreed SLAs for a specified period, typically a month. This information can be used to calculate credits or other forms of repayments for customers if the SLAs are not met during that period. You can calculate availability for a service by using the technique described in the section [Analyzing availability data](#analyzing-availability-data). + +For internal purposes, an organization might also track the number and nature of incidents that caused services to fail. Learning how to resolve these issues quickly, or eliminate them completely, will help to reduce downtime and meet SLAs. + +## Auditing +Depending on the nature of the application, there might be statutory or other legal regulations that specify requirements for auditing users' operations and recording all data access. Auditing can provide evidence that links customers to specific requests. Non-repudiation is an important factor in many e-business systems to help maintain trust be between a customer and the organization that's responsible for the application or service. + +### Requirements for auditing +An analyst must be able to trace the sequence of business operations that users are performing so that you can reconstruct users' actions. This might be necessary simply as a matter of record, or as part of a forensic investigation. + +Audit information is highly sensitive. It will likely include data that identifies the users of the system, together with the tasks that they're performing. For this reason, audit information will most likely take the form of reports that are available only to trusted analysts rather than as an interactive system that supports drill-down of graphical operations. An analyst should be able to generate a range of reports. For example, reports might list all users' activities occurring during a specified time frame, detail the chronology of activity for a single user, or list the sequence of operations performed against one or more resources. + +### Data sources, instrumentation, and data-collection requirements +The primary sources of information for auditing can include: + +* The security system that manages user authentication. +* Trace logs that record user activity. +* Security logs that track all identifiable and unidentifiable network requests. + +The format of the audit data and the way in which it's stored might be driven by regulatory requirements. For example, it might not be possible to clean the data in any way. (It must be recorded in its original format.) Access to the repository where it's held must be protected to prevent tampering. + +### Analyzing audit data +An analyst must be able to access the raw data in its entirety, in its original form. Aside from the requirement to generate common audit reports, the tools for analyzing this data are likely to be specialized and kept external to the system. + +## Usage monitoring +Usage monitoring tracks how the features and components of an application are used. An operator can use the gathered data to: + +* Determine which features are heavily used and determine any potential hotspots in the system. High-traffic elements might benefit from functional partitioning or even replication to spread the load more evenly. An operator can also use this information to ascertain which features are infrequently used and are possible candidates for retirement or replacement in a future version of the system. +* Obtain information about the operational events of the system under normal use. For example, in an e-commerce site, you can record the statistical information about the number of transactions and the volume of customers that are responsible for them. This information can be used for capacity planning as the number of customers grows. +* Detect (possibly indirectly) user satisfaction with the performance or functionality of the system. For example, if a large number of customers in an e-commerce system regularly abandon their shopping carts, this might be due to a problem with the checkout functionality. +* Generate billing information. A commercial application or multitenant service might charge customers for the resources that they use. +* Enforce quotas. If a user in a multitenant system exceeds their paid quota of processing time or resource usage during a specified period, their access can be limited or processing can be throttled. + +### Requirements for usage monitoring +To examine system usage, an operator typically needs to see information that includes: + +* The number of requests that are processed by each subsystem and directed to each resource. +* The work that each user is performing. +* The volume of data storage that each user occupies. +* The resources that each user is accessing. + +An operator should also be able to generate graphs. For example, a graph might display the most resource-hungry users, or the most frequently accessed resources or system features. + +### Data sources, instrumentation, and data-collection requirements +Usage tracking can be performed at a relatively high level. It can note the start and end times of each request and the nature of the request (read, write, and so on, depending on the resource in question). You can obtain this information by: + +* Tracing user activity. +* Capturing performance counters that measure the utilization for each resource. +* Monitoring the resource consumption by each user. + +For metering purposes, you also need to be able to identify which users are responsible for performing which operations, and the resources that these operations utilize. The gathered information should be detailed enough to enable accurate billing. + + + +## Issue tracking +Customers and other users might report issues if unexpected events or behavior occurs in the system. Issue tracking is concerned with managing these issues, associating them with efforts to resolve any underlying problems in the system, and informing customers of possible resolutions. + +### Requirements for issue tracking +Operators often perform issue tracking by using a separate system that enables them to record and report the details of problems that users report. These details can include the tasks that the user was trying to perform, symptoms of the problem, the sequence of events, and any error or warning messages that were issued. + +### Data sources, instrumentation, and data-collection requirements +The initial data source for issue-tracking data is the user who reported the issue in the first place. The user might be able to provide additional data such as: + +* A crash dump (if the application includes a component that runs on the user's desktop). +* A screen snapshot. +* The date and time when the error occurred, together with any other environmental information such as the user's location. + +This information can be used to help the debugging effort and help construct a backlog for future releases of the software. + +### Analyzing issue-tracking data +Different users might report the same problem. The issue-tracking system should associate common reports. + +The progress of the debugging effort should be recorded against each issue report. When the problem is resolved, the customer can be informed of the solution. + +If a user reports an issue that has a known solution in the issue-tracking system, the operator should be able to inform the user of the solution immediately. + +## Tracing operations and debugging software releases +When a user reports an issue, the user is often only aware of the immediate impact that it has on their operations. The user can only report the results of their own experience back to an operator who is responsible for maintaining the system. These experiences are usually just a visible symptom of one or more fundamental problems. In many cases, an analyst will need to dig through the chronology of the underlying operations to establish the root cause of the problem. This process is called *root cause analysis*. + +> [!NOTE] +> Root cause analysis might uncover inefficiencies in the design of an application. In these situations, it might be possible to rework the affected elements and deploy them as part of a subsequent release. This process requires careful control, and the updated components should be monitored closely. +> +> + +### Requirements for tracing and debugging +For tracing unexpected events and other problems, it's vital that the monitoring data provides enough information to enable an analyst to trace back to the origins of these issues and reconstruct the sequence of events that occurred. This information must be sufficient to enable an analyst to diagnose the root cause of any problems. A developer can then make the necessary modifications to prevent them from recurring. + +### Data sources, instrumentation, and data-collection requirements +Troubleshooting can involve tracing all the methods (and their parameters) invoked as part of an operation to build up a tree that depicts the logical flow through the system when a customer makes a specific request. Exceptions and warnings that the system generates as a result of this flow need to be captured and logged. + +To support debugging, the system can provide hooks that enable an operator to capture state information at crucial points in the system. Or, the system can deliver detailed step-by-step information as selected operations progress. Capturing data at this level of detail can impose an additional load on the system and should be a temporary process. An operator uses this process mainly when a highly unusual series of events occurs and is difficult to replicate, or when a new release of one or more elements into a system requires careful monitoring to ensure that the elements function as expected. + +## The monitoring and diagnostics pipeline +Monitoring a large-scale distributed system poses a significant challenge. Each of the scenarios described in the previous section should not necessarily be considered in isolation. There is likely to be a significant overlap in the monitoring and diagnostic data that's required for each situation, although this data might need to be processed and presented in different ways. For these reasons, you should take a holistic view of monitoring and diagnostics. + +You can envisage the entire monitoring and diagnostics process as a pipeline that comprises the stages shown in Figure 1. + +![Stages in the monitoring and diagnostics pipeline](./images/monitoring/Pipeline.png) + +*Figure 1. +The stages in the monitoring and diagnostics pipeline* + +Figure 1 highlights how the data for monitoring and diagnostics can come from a variety of data sources. The instrumentation and collection stages are concerned with identifying the sources from where the data needs to be captured, determining which data to capture, how to capture it, and how to format this data so that it can be easily examined. The analysis/diagnosis stage takes the raw data and uses it to generate meaningful information that an operator can use to determine the state of the system. The operator can use this information to make decisions about possible actions to take, and then feed the results back into the instrumentation and collection stages. The visualization/alerting stage phase presents a consumable view of the system state. It can display information in near real time by using a series of dashboards. And it can generate reports, graphs, and charts to provide a historical view of the data that can help identify long-term trends. If information indicates that a KPI is likely to exceed acceptable bounds, this stage can also trigger an alert to an operator. In some cases, an alert can also be used to trigger an automated process that attempts to take corrective actions, such as autoscaling. + +Note that these steps constitute a continuous-flow process where the stages are happening in parallel. Ideally, all the phases should be dynamically configurable. At some points, especially when a system has been newly deployed or is experiencing problems, it might be necessary to gather extended data on a more frequent basis. At other times, it should be possible to revert to capturing a base level of essential information to verify that the system is functioning properly. + +Additionally, the entire monitoring process should be considered a live, ongoing solution that's subject to fine-tuning and improvements as a result of feedback. For example, you might start with measuring many factors to determine system health. Analysis over time might lead to a refinement as you discard measures that aren't relevant, enabling you to more precisely focus on the data that you need while minimizing background noise. + +## Sources of monitoring and diagnostic data +The information that the monitoring process uses can come from several sources, as illustrated in Figure 1. At the application level, information comes from trace logs incorporated into the code of the system. Developers should follow a standard approach for tracking the flow of control through their code. For example, an entry to a method can emit a trace message that specifies the name of the method, the current time, the value of each parameter, and any other pertinent information. Recording the entry and exit times can also prove useful. + +You should log all exceptions and warnings, and ensure that you retain a full trace of any nested exceptions and warnings. Ideally, you should also capture information that identifies the user who is running the code, together with activity correlation information (to track requests as they pass through the system). And you should log attempts to access all resources such as message queues, databases, files, and other dependent services. This information can be used for metering and auditing purposes. + +Many applications make use of libraries and frameworks to perform common tasks such as accessing a data store or communicating over a network. These frameworks might be configurable to provide their own trace messages and raw diagnostic information, such as transaction rates and data transmission successes and failures. + +> [!NOTE] +> Many modern frameworks automatically publish performance and trace events. Capturing this information is simply a matter of providing a means to retrieve and store it where it can be processed and analyzed. +> +> + +The operating system where the application is running can be a source of low-level system-wide information, such as performance counters that indicate I/O rates, memory utilization, and CPU usage. Operating system errors (such as the failure to open a file correctly) might also be reported. + +You should also consider the underlying infrastructure and components on which your system runs. Virtual machines, virtual networks, and storage services can all be sources of important infrastructure-level performance counters and other diagnostic data. + +If your application uses other external services, such as a web server or database management system, these services might publish their own trace information, logs, and performance counters. Examples include SQL Server Dynamic Management Views for tracking operations performed against a SQL Server database, and IIS trace logs for recording requests made to a web server. + +As the components of a system are modified and new versions are deployed, it's important to be able to attribute issues, events, and metrics to each version. This information should be tied back to the release pipeline so that problems with a specific version of a component can be tracked quickly and rectified. + +Security issues might occur at any point in the system. For example, a user might attempt to sign in with an invalid user ID or password. An authenticated user might try to obtain unauthorized access to a resource. Or a user might provide an invalid or outdated key to access encrypted information. Security-related information for successful and failing requests should always be logged. + +The section [Instrumenting an application](#instrumenting-an-application) contains more guidance on the information that you should capture. But you can use a variety of strategies to gather this information: + +* **Application/system monitoring**. This strategy uses internal sources within the application, application frameworks, operating system, and infrastructure. The application code can generate its own monitoring data at notable points during the lifecycle of a client request. The application can include tracing statements that might be selectively enabled or disabled as circumstances dictate. It might also be possible to inject diagnostics dynamically by using a diagnostics framework. These frameworks typically provide plug-ins that can attach to various instrumentation points in your code and capture trace data at these points. + + Additionally, your code and/or the underlying infrastructure might raise events at critical points. Monitoring agents that are configured to listen for these events can record the event information. +* **Real user monitoring**. This approach records the interactions between a user and the application and observes the flow of each request and response. This information can have a two-fold purpose: it can be used for metering usage by each user, and it can be used to determine whether users are receiving a suitable quality of service (for example, fast response times, low latency, and minimal errors). You can use the captured data to identify areas of concern where failures occur most often. You can also use the data to identify elements where the system slows down, possibly due to hotspots in the application or some other form of bottleneck. If you implement this approach carefully, it might be possible to reconstruct users' flows through the application for debugging and testing purposes. + + > [!IMPORTANT] + > You should consider the data that's captured by monitoring real users to be highly sensitive because it might include confidential material. If you save captured data, store it securely. If you want to use the data for performance monitoring or debugging purposes, strip out all personally identifiable information first. + > + > +* **Synthetic user monitoring**. In this approach, you write your own test client that simulates a user and performs a configurable but typical series of operations. You can track the performance of the test client to help determine the state of the system. You can also use multiple instances of the test client as part of a load-testing operation to establish how the system responds under stress, and what sort of monitoring output is generated under these conditions. + + > [!NOTE] + > You can implement real and synthetic user monitoring by including code that traces and times the execution of method calls and other critical parts of an application. + > + > +* **Profiling**. This approach is primarily targeted at monitoring and improving application performance. Rather than operating at the functional level of real and synthetic user monitoring, it captures lower-level information as the application runs. You can implement profiling by using periodic sampling of the execution state of an application (determining which piece of code that the application is running at a given point in time). You can also use instrumentation that inserts probes into the code at important junctures (such as the start and end of a method call) and records which methods were invoked, at what time, and how long each call took. You can then analyze this data to determine which parts of the application might cause performance problems. +* **Endpoint monitoring**. This technique uses one or more diagnostic endpoints that the application exposes specifically to enable monitoring. An endpoint provides a pathway into the application code and can return information about the health of the system. Different endpoints can focus on various aspects of the functionality. You can write your own diagnostics client that sends periodic requests to these endpoints and assimilate the responses. This approach is described more in [Health Endpoint Monitoring Pattern](https://msdn.microsoft.com/library/dn589789.aspx) on the Microsoft website. + +For maximum coverage, you should use a combination of these techniques. + + + +## Instrumenting an application +Instrumentation is a critical part of the monitoring process. You can make meaningful decisions about the performance and health of a system only if you first capture the data that enables you to make these decisions. The information that you gather by using instrumentation should be sufficient to enable you to assess performance, diagnose problems, and make decisions without requiring you to sign in to a remote production server to perform tracing (and debugging) manually. Instrumentation data typically comprises metrics and information that's written to trace logs. + +The contents of a trace log can be the result of textual data that's written by the application or binary data that's created as the result of a trace event (if the application is using Event Tracing for Windows--ETW). They can also be generated from system logs that record events arising from parts of the infrastructure, such as a web server. Textual log messages are often designed to be human-readable, but they should also be written in a format that enables an automated system to parse them easily. + +You should also categorize logs. Don't write all trace data to a single log, but use separate logs to record the trace output from different operational aspects of the system. You can then quickly filter log messages by reading from the appropriate log rather than having to process a single lengthy file. Never write information that has different security requirements (such as audit information and debugging data) to the same log. + +> [!NOTE] +> A log might be implemented as a file on the file system, or it might be held in some other format, such as a blob in blob storage. Log information might also be held in more structured storage, such as rows in a table. +> +> + +Metrics will generally be a measure or count of some aspect or resource in the system at a specific time, with one or more associated tags or dimensions (sometimes called a *sample*). A single instance of a metric is usually not useful in isolation. Instead, metrics have to be captured over time. The key issue to consider is which metrics you should record and how frequently. Generating data for metrics too often can impose a significant additional load on the system, whereas capturing metrics infrequently might cause you to miss the circumstances that lead to a significant event. The considerations will vary from metric to metric. For example, CPU utilization on a server might vary significantly from second to second, but high utilization becomes an issue only if it's long-lived over a number of minutes. + + + +### Information for correlating data +You can easily monitor individual system-level performance counters, capture metrics for resources, and obtain application trace information from various log files. But some forms of monitoring require the analysis and diagnostics stage in the monitoring pipeline to correlate the data that's retrieved from several sources. This data might take several forms in the raw data, and the analysis process must be provided with sufficient instrumentation data to be able to map these different forms. For example, at the application framework level, a task might be identified by a thread ID. Within an application, the same work might be associated with the user ID for the user who is performing that task. + +Also, there's unlikely to be a 1:1 mapping between threads and user requests, because asynchronous operations might reuse the same threads to perform operations on behalf of more than one user. To complicate matters further, a single request might be handled by more than one thread as execution flows through the system. If possible, associate each request with a unique activity ID that's propagated through the system as part of the request context. (The technique for generating and including activity IDs in trace information depends on the technology that's used to capture the trace data.) + +All monitoring data should be time-stamped in the same way. For consistency, record all dates and times by using Coordinated Universal Time. This will help you more easily trace sequences of events. + +> [!NOTE] +> Computers operating in different time zones and networks might not be synchronized. Don't depend on using time stamps alone for correlating instrumentation data that spans multiple machines. +> +> + +### Information to include in the instrumentation data +Consider the following points when you're deciding which instrumentation data you need to collect: + +* Make sure that information captured by trace events is machine and human readable. Adopt well-defined schemas for this information to facilitate automated processing of log data across systems, and to provide consistency to operations and engineering staff reading the logs. Include environmental information, such as the deployment environment, the machine on which the process is running, the details of the process, and the call stack. +* Enable profiling only when necessary because it can impose a significant overhead on the system. Profiling by using instrumentation records an event (such as a method call) every time it occurs, whereas sampling records only selected events. The selection can be time-based (once every *n* seconds), or frequency-based (once every *n* requests). If events occur very frequently, profiling by instrumentation might cause too much of a burden and itself affect overall performance. In this case, the sampling approach might be preferable. However, if the frequency of events is low, sampling might miss them. In this case, instrumentation might be the better approach. +* Provide sufficient context to enable a developer or administrator to determine the source of each request. This might include some form of activity ID that identifies a specific instance of a request. It might also include information that can be used to correlate this activity with the computational work performed and the resources used. Note that this work might cross process and machine boundaries. For metering, the context should also include (either directly or indirectly via other correlated information) a reference to the customer who caused the request to be made. This context provides valuable information about the application state at the time that the monitoring data was captured. +* Record all requests, and the locations or regions from which these requests are made. This information can assist in determining whether there are any location-specific hotspots. This information can also be useful in determining whether to repartition an application or the data that it uses. +* Record and capture the details of exceptions carefully. Often, critical debug information is lost as a result of poor exception handling. Capture the full details of exceptions that the application throws, including any inner exceptions and other context information. Include the call stack if possible. +* Be consistent in the data that the different elements of your application capture, because this can assist in analyzing events and correlating them with user requests. Consider using a comprehensive and configurable logging package to gather information, rather than depending on developers to adopt the same approach as they implement different parts of the system. Gather data from key performance counters, such as the volume of I/O being performed, network utilization, number of requests, memory use, and CPU utilization. Some infrastructure services might provide their own specific performance counters, such as the number of connections to a database, the rate at which transactions are being performed, and the number of transactions that succeed or fail. Applications might also define their own specific performance counters. +* Log all calls made to external services, such as database systems, web services, or other system-level services that are part of the infrastructure. Record information about the time taken to perform each call, and the success or failure of the call. If possible, capture information about all retry attempts and failures for any transient errors that occur. + +### Ensuring compatibility with telemetry systems +In many cases, the information that instrumentation produces is generated as a series of events and passed to a separate telemetry system for processing and analysis. A telemetry system is typically independent of any specific application or technology, but it expects information to follow a specific format that's usually defined by a schema. The schema effectively specifies a contract that defines the data fields and types that the telemetry system can ingest. The schema should be generalized to allow for data arriving from a range of platforms and devices. + +A common schema should include fields that are common to all instrumentation events, such as the event name, the event time, the IP address of the sender, and the details that are required for correlating with other events (such as a user ID, a device ID, and an application ID). Remember that any number of devices might raise events, so the schema should not depend on the device type. Additionally, various devices might raise events for the same application; the application might support roaming or some other form of cross-device distribution. + +The schema might also include domain fields that are relevant to a particular scenario that's common across different applications. This might be information about exceptions, application start and end events, and success and/or failure of web service API calls. All applications that use the same set of domain fields should emit the same set of events, enabling a set of common reports and analytics to be built. + +Finally, a schema might contain custom fields for capturing the details of application-specific events. + +### Best practices for instrumenting applications +The following list summarizes best practices for instrumenting a distributed application running in the cloud. + +* Make logs easy to read and easy to parse. Use structured logging where possible. Be concise and descriptive in log messages. +* In all logs, identify the source and provide context and timing information as each log record is written. +* Use the same time zone and format for all time stamps. This will help to correlate events for operations that span hardware and services running in different geographic regions. +* Categorize logs and write messages to the appropriate log file. +* Do not disclose sensitive information about the system or personal information about users. Scrub this information before it's logged, but ensure that the relevant details are retained. For example, remove the ID and password from any database connection strings, but write the remaining information to the log so that an analyst can determine that the system is accessing the correct database. Log all critical exceptions, but enable the administrator to turn logging on and off for lower levels of exceptions and warnings. Also, capture and log all retry logic information. This data can be useful in monitoring the transient health of the system. +* Trace out of process calls, such as requests to external web services or databases. +* Don't mix log messages with different security requirements in the same log file. For example, don't write debug and audit information to the same log. +* With the exception of auditing events, make sure that all logging calls are fire-and-forget operations that do not block the progress of business operations. Auditing events are exceptional because they are critical to the business and can be classified as a fundamental part of business operations. +* Make sure that logging is extensible and does not have any direct dependencies on a concrete target. For example, rather than writing information by using *System.Diagnostics.Trace*, define an abstract interface (such as *ILogger*) that exposes logging methods and that can be implemented through any appropriate means. +* Make sure that all logging is fail-safe and never triggers any cascading errors. Logging must not throw any exceptions. +* Treat instrumentation as an ongoing iterative process and review logs regularly, not just when there is a problem. + +## Collecting and storing data +The collection stage of the monitoring process is concerned with retrieving the information that instrumentation generates, formatting this data to make it easier for the analysis/diagnosis stage to consume, and saving the transformed data in reliable storage. The instrumentation data that you gather from different parts of a distributed system can be held in a variety of locations and with varying formats. For example, your application code might generate trace log files and generate application event log data, whereas performance counters that monitor key aspects of the infrastructure that your application uses can be captured through other technologies. Any third-party components and services that your application uses might provide instrumentation information in different formats, by using separate trace files, blob storage, or even a custom data store. + +Data collection is often performed through a collection service that can run autonomously from the application that generates the instrumentation data. Figure 2 illustrates an example of this architecture, highlighting the instrumentation data-collection subsystem. + +![Example of collecting instrumentation data](./images/monitoring/TelemetryService.png) + +*Figure 2. +Collecting instrumentation data* + +Note that this is a simplified view. The collection service is not necessarily a single process and might comprise many constituent parts running on different machines, as described in the following sections. Additionally, if the analysis of some telemetry data must be performed quickly (hot analysis, as described in the section [Supporting hot, warm, and cold analysis](#supporting-hot-warm-and-cold-analysis) later in this document), local components that operate outside the collection service might perform the analysis tasks immediately. Figure 2 depicts this situation for selected events. After analytical processing, the results can be sent directly to the visualization and alerting subsystem. Data that's subjected to warm or cold analysis is held in storage while it awaits processing. + +For Azure applications and services, Azure Diagnostics provides one possible solution for capturing data. Azure Diagnostics gathers data from the following sources for each compute node, aggregates it, and then uploads it to Azure Storage: + +* IIS logs +* IIS Failed Request logs +* Windows event logs +* Performance counters +* Crash dumps +* Azure Diagnostics infrastructure logs +* Custom error logs +* .NET EventSource +* Manifest-based ETW + +For more information, see the article [Azure: Telemetry Basics and Troubleshooting](http://social.technet.microsoft.com/wiki/contents/articles/18146.windows-azure-telemetry-basics-and-troubleshooting.aspx). + +### Strategies for collecting instrumentation data +Considering the elastic nature of the cloud, and to avoid the necessity of manually retrieving telemetry data from every node in the system, you should arrange for the data to be transferred to a central location and consolidated. In a system that spans multiple datacenters, it might be useful to first collect, consolidate, and store data on a region-by-region basis, and then aggregate the regional data into a single central system. + +To optimize the use of bandwidth, you can elect to transfer less urgent data in chunks, as batches. However, the data must not be delayed indefinitely, especially if it contains time-sensitive information. + +#### *Pulling and pushing instrumentation data* +The instrumentation data-collection subsystem can actively retrieve instrumentation data from the various logs and other sources for each instance of the application (the *pull model*). Or, it can act as a passive receiver that waits for the data to be sent from the components that constitute each instance of the application (the *push model*). + +One approach to implementing the pull model is to use monitoring agents that run locally with each instance of the application. A monitoring agent is a separate process that periodically retrieves (pulls) telemetry data collected at the local node and writes this information directly to centralized storage that all instances of the application share. This is the mechanism that Azure Diagnostics implements. Each instance of an Azure web or worker role can be configured to capture diagnostic and other trace information that's stored locally. The monitoring agent that runs alongside each instance copies the specified data to Azure Storage. The article [Enabling Diagnostics in Azure Cloud Services and Virtual Machines](/azure/cloud-services/cloud-services-dotnet-diagnostics) provides more details on this process. Some elements, such as IIS logs, crash dumps, and custom error logs, are written to blob storage. Data from the Windows event log, ETW events, and performance counters is recorded in table storage. Figure 3 illustrates this mechanism. + +![Illustration of using a monitoring agent to pull information and write to shared storage](./images/monitoring/PullModel.png) + +*Figure 3. +Using a monitoring agent to pull information and write to shared storage* + +> [!NOTE] +> Using a monitoring agent is ideally suited to capturing instrumentation data that's naturally pulled from a data source. An example is information from SQL Server Dynamic Management Views or the length of an Azure Service Bus queue. +> +> + +It's feasible to use the approach just described to store telemetry data for a small-scale application running on a limited number of nodes in a single location. However, a complex, highly scalable, global cloud application might generate huge volumes of data from hundreds of web and worker roles, database shards, and other services. This flood of data can easily overwhelm the I/O bandwidth available with a single, central location. Therefore, your telemetry solution must be scalable to prevent it from acting as a bottleneck as the system expands. Ideally, your solution should incorporate a degree of redundancy to reduce the risks of losing important monitoring information (such as auditing or billing data) if part of the system fails. + +To address these issues, you can implement queuing, as shown in Figure 4. In this architecture, the local monitoring agent (if it can be configured appropriately) or custom data-collection service (if not) posts data to a queue. A separate process running asynchronously (the storage writing service in Figure 4) takes the data in this queue and writes it to shared storage. A message queue is suitable for this scenario because it provides "at least once" semantics that help ensure that queued data will not be lost after it's posted. You can implement the storage writing service by using a separate worker role. + +![Illustration of using a queue to buffer instrumentation data](./images/monitoring/BufferedQueue.png) + +*Figure 4. +Using a queue to buffer instrumentation data* + +The local data-collection service can add data to a queue immediately after it's received. The queue acts as a buffer, and the storage writing service can retrieve and write the data at its own pace. By default, a queue operates on a first-in, first-out basis. But you can prioritize messages to accelerate them through the queue if they contain data that must be handled more quickly. For more information, see the [Priority Queue](https://msdn.microsoft.com/library/dn589794.aspx) pattern. Alternatively, you can use different channels (such as Service Bus topics) to direct data to different destinations depending on the form of analytical processing that's required. + +For scalability, you can run multiple instances of the storage writing service. If there is a high volume of events, you can use an event hub to dispatch the data to different compute resources for processing and storage. + + + +#### *Consolidating instrumentation data* +The instrumentation data that the data-collection service retrieves from a single instance of an application gives a localized view of the health and performance of that instance. To assess the overall health of the system, it's necessary to consolidate some aspects of the data in the local views. You can perform this after the data has been stored, but in some cases, you can also achieve it as the data is collected. Rather than being written directly to shared storage, the instrumentation data can pass through a separate data consolidation service that combines data and acts as a filter and cleanup process. For example, instrumentation data that includes the same correlation information such as an activity ID can be amalgamated. (It's possible that a user starts performing a business operation on one node and then gets transferred to another node in the event of node failure, or depending on how load balancing is configured.) This process can also detect and remove any duplicated data (always a possibility if the telemetry service uses message queues to push instrumentation data out to storage). Figure 5 illustrates an example of this structure. + +![Example of using a service to consolidate instrumentation data](./images/monitoring/Consolidation.png) + +*Figure 5. +Using a separate service to consolidate and clean up instrumentation data* + +### Storing instrumentation data +The previous discussions have depicted a rather simplistic view of the way in which instrumentation data is stored. In reality, it can make sense to store the different types of information by using technologies that are most appropriate to the way in which each type is likely to be used. + +For example, Azure blob and table storage have some similarities in the way in which they're accessed. But they have limitations in the operations that you can perform by using them, and the granularity of the data that they hold is quite different. If you need to perform more analytical operations or require full-text search capabilities on the data, it might be more appropriate to use data storage that provides capabilities that are optimized for specific types of queries and data access. For example: + +* Performance counter data can be stored in a SQL database to enable ad hoc analysis. +* Trace logs might be better stored in Azure DocumentDB. +* Security information can be written to HDFS. +* Information that requires full-text search can be stored through Elasticsearch (which can also speed searches by using rich indexing). + +You can implement an additional service that periodically retrieves the data from shared storage, partitions and filters the data according to its purpose, and then writes it to an appropriate set of data stores as shown in Figure 6. An alternative approach is to include this functionality in the consolidation and cleanup process and write the data directly to these stores as it's retrieved rather than saving it in an intermediate shared storage area. Each approach has its advantages and disadvantages. Implementing a separate partitioning service lessens the load on the consolidation and cleanup service, and it enables at least some of the partitioned data to be regenerated if necessary (depending on how much data is retained in shared storage). However, it consumes additional resources. Also, there might be a delay between the receipt of instrumentation data from each application instance and the conversion of this data into actionable information. + +![Partitioning and storage of data](./images/monitoring/DataStorage.png) + +*Figure 6. +Partitioning data according to analytical and storage requirements* + +The same instrumentation data might be required for more than one purpose. For example, performance counters can be used to provide a historical view of system performance over time. This information might be combined with other usage data to generate customer billing information. In these situations, the same data might be sent to more than one destination, such as a document database that can act as a long-term store for holding billing information, and a multidimensional store for handling complex performance analytics. + +You should also consider how urgently the data is required. Data that provides information for alerting must be accessed quickly, so it should be held in fast data storage and indexed or structured to optimize the queries that the alerting system performs. In some cases, it might be necessary for the telemetry service that gathers the data on each node to format and save data locally so that a local instance of the alerting system can quickly notify you of any issues. The same data can be dispatched to the storage writing service shown in the previous diagrams and stored centrally if it's also required for other purposes. + +Information that's used for more considered analysis, for reporting, and for spotting historical trends is less urgent and can be stored in a manner that supports data mining and ad hoc queries. For more information, see the section [Supporting hot, warm, and cold analysis](#supporting-hot-warm-and-cold-analysis) later in this document. + +#### *Log rotation and data retention* +Instrumentation can generate considerable volumes of data. This data can be held in several places, starting with the raw log files, trace files, and other information captured at each node to the consolidated, cleaned, and partitioned view of this data held in shared storage. In some cases, after the data has been processed and transferred, the original raw source data can be removed from each node. In other cases, it might be necessary or simply useful to save the raw information. For example, data that's generated for debugging purposes might be best left available in its raw form but can then be discarded quickly after any bugs have been rectified. + +Performance data often has a longer life so that it can be used for spotting performance trends and for capacity planning. The consolidated view of this data is usually kept online for a finite period to enable fast access. After that, it can be archived or discarded. Data gathered for metering and billing customers might need to be saved indefinitely. Additionally, regulatory requirements might dictate that information collected for auditing and security purposes also needs to be archived and saved. This data is also sensitive and might need to be encrypted or otherwise protected to prevent tampering. You should never record users' passwords or other information that might be used to commit identity fraud. Such details should be scrubbed from the data before it's stored. + +#### *Down-sampling* +It's useful to store historical data so you can spot long-term trends. Rather than saving old data in its entirety, it might be possible to down-sample the data to reduce its resolution and save storage costs. As an example, rather than saving minute-by-minute performance indicators, you can consolidate data that's more than a month old to form an hour-by-hour view. + +### Best practices for collecting and storing logging information +The following list summarizes best practices for capturing and storing logging information: + +* The monitoring agent or data-collection service should run as an out-of-process service and should be simple to deploy. +* All output from the monitoring agent or data-collection service should be an agnostic format that's independent of the machine, operating system, or network protocol. For example, emit information in a self-describing format such as JSON, MessagePack, or Protobuf rather than ETL/ETW. Using a standard format enables the system to construct processing pipelines; components that read, transform, and send data in the agreed format can be easily integrated. +* The monitoring and data-collection process must be fail-safe and must not trigger any cascading error conditions. +* In the event of a transient failure in sending information to a data sink, the monitoring agent or data-collection service should be prepared to reorder telemetry data so that the newest information is sent first. (The monitoring agent/data-collection service might elect to drop the older data, or save it locally and transmit it later to catch up, at its own discretion.) + +## Analyzing data and diagnosing issues +An important part of the monitoring and diagnostics process is analyzing the gathered data to obtain a picture of the overall well-being of the system. You should have defined your own KPIs and performance metrics, and it's important to understand how you can structure the data that has been gathered to meet your analysis requirements. It's also important to understand how the data that's captured in different metrics and log files is correlated, because this information can be key to tracking a sequence of events and help diagnose problems that arise. + +As described in the section [Consolidating instrumentation data](#consolidating-instrumentation-data), the data for each part of the system is typically captured locally, but it generally needs to be combined with data generated at other sites that participate in the system. This information requires careful correlation to ensure that data is combined accurately. For example, the usage data for an operation might span a node that hosts a website to which a user connects, a node that runs a separate service accessed as part of this operation, and data storage held on another node. This information needs to be tied together to provide an overall view of the resource and processing usage for the operation. Some pre-processing and filtering of data might occur on the node on which the data is captured, whereas aggregation and formatting are more likely to occur on a central node. + + + +### Supporting hot, warm, and cold analysis +Analyzing and reformatting data for visualization, reporting, and alerting purposes can be a complex process that consumes its own set of resources. Some forms of monitoring are time-critical and require immediate analysis of data to be effective. This is known as *hot analysis*. Examples include the analyses that are required for alerting and some aspects of security monitoring (such as detecting an attack on the system). Data that's required for these purposes must be quickly available and structured for efficient processing. In some cases, it might be necessary to move the analysis processing to the individual nodes where the data is held. + +Other forms of analysis are less time-critical and might require some computation and aggregation after the raw data has been received. This is called *warm analysis*. Performance analysis often falls into this category. In this case, an isolated, single performance event is unlikely to be statistically significant. (It might be caused by a sudden spike or glitch.) The data from a series of events should provide a more reliable picture of system performance. + +Warm analysis can also be used to help diagnose health issues. A health event is typically processed through hot analysis and can raise an alert immediately. An operator should be able to drill into the reasons for the health event by examining the data from the warm path. This data should contain information about the events leading up to the issue that caused the health event. + +Some types of monitoring generate more long-term data. This analysis can be performed at a later date, possibly according to a predefined schedule. In some cases, the analysis might need to perform complex filtering of large volumes of data captured over a period of time. This is called *cold analysis*. The key requirement is that the data is stored safely after it has been captured. For example, usage monitoring and auditing require an accurate picture of the state of the system at regular points in time, but this state information does not have to be available for processing immediately after it has been gathered. + +An operator can also use cold analysis to provide the data for predictive health analysis. The operator can gather historical information over a specified period and use it in conjunction with the current health data (retrieved from the hot path) to spot trends that might soon cause health issues. In these cases, it might be necessary to raise an alert so that corrective action can be taken. + +### Correlating data +The data that instrumentation captures can provide a snapshot of the system state, but the purpose of analysis is to make this data actionable. For example: + +* What has caused an intense I/O loading at the system level at a specific time? +* Is it the result of a large number of database operations? +* Is this reflected in the database response times, the number of transactions per second, and application response times at the same juncture? + +If so, one remedial action that might reduce the load might be to shard the data over more servers. In addition, exceptions can arise as a result of a fault in any level of the system. An exception in one level often triggers another fault in the level above. + +For these reasons, you need to be able to correlate the different types of monitoring data at each level to produce an overall view of the state of the system and the applications that are running on it. You can then use this information to make decisions about whether the system is functioning acceptably or not, and determine what can be done to improve the quality of the system. + +As described in the section [Information for correlating data](#information-for-correlating-data), you must ensure that the raw instrumentation data includes sufficient context and activity ID information to support the required aggregations for correlating events. Additionally, this data might be held in different formats, and it might be necessary to parse this information to convert it into a standardized format for analysis. + +### Troubleshooting and diagnosing issues +Diagnosis requires the ability to determine the cause of faults or unexpected behavior, including performing root cause analysis. The information that's required typically includes: + +* Detailed information from event logs and traces, either for the entire system or for a specified subsystem during a specified time window. +* Complete stack traces resulting from exceptions and faults of any specified level that occur within the system or a specified subsystem during a specified period. +* Crash dumps for any failed processes either anywhere in the system or for a specified subsystem during a specified time window. +* Activity logs recording the operations that are performed either by all users or for selected users during a specified period. + +Analyzing data for troubleshooting purposes often requires a deep technical understanding of the system architecture and the various components that compose the solution. As a result, a large degree of manual intervention is often required to interpret the data, establish the cause of problems, and recommend an appropriate strategy to correct them. It might be appropriate simply to store a copy of this information in its original format and make it available for cold analysis by an expert. + +## Visualizing data and raising alerts +An important aspect of any monitoring system is the ability to present the data in such a way that an operator can quickly spot any trends or problems. Also important is the ability to quickly inform an operator if a significant event has occurred that might require attention. + +Data presentation can take several forms, including visualization by using dashboards, alerting, and reporting. + +### Visualization by using dashboards +The most common way to visualize data is to use dashboards that can display information as a series of charts, graphs, or some other illustration. These items can be parameterized, and an analyst should be able to select the important parameters (such as the time period) for any specific situation. + +Dashboards can be organized hierarchically. Top-level dashboards can give an overall view of each aspect of the system but enable an operator to drill down to the details. For example, a dashboard that depicts the overall disk I/O for the system should allow an analyst to view the I/O rates for each individual disk to ascertain whether one or more specific devices account for a disproportionate volume of traffic. Ideally, the dashboard should also display related information, such as the source of each request (the user or activity) that's generating this I/O. This information can then be used to determine whether (and how) to spread the load more evenly across devices, and whether the system would perform better if more devices were added. + +A dashboard might also use color-coding or some other visual cues to indicate values that appear anomalous or that are outside an expected range. Using the previous example: + +* A disk with an I/O rate that's approaching its maximum capacity over an extended period (a hot disk) can be highlighted in red. +* A disk with an I/O rate that periodically runs at its maximum limit over short periods (a warm disk) can be highlighted in yellow. +* A disk that's exhibiting normal usage can be displayed in green. + +Note that for a dashboard system to work effectively, it must have the raw data to work with. If you are building your own dashboard system, or using a dashboard developed by another organization, you must understand which instrumentation data you need to collect, at what levels of granularity, and how it should be formatted for the dashboard to consume. + +A good dashboard does not only display information, it also enables an analyst to pose ad hoc questions about that information. Some systems provide management tools that an operator can use to perform these tasks and explore the underlying data. Alternatively, depending on the repository that's used to hold this information, it might be possible to query this data directly, or import it into tools such as Microsoft Excel for further analysis and reporting. + +> [!NOTE] +> You should restrict access to dashboards to authorized personnel, because this information might be commercially sensitive. You should also protect the underlying data for dashboards to prevent users from changing it. +> +> + +### Raising alerts +Alerting is the process of analyzing the monitoring and instrumentation data and generating a notification if a significant event is detected. + +Alerting helps ensure that the system remains healthy, responsive, and secure. It's an important part of any system that makes performance, availability, and privacy guarantees to the users where the data might need to be acted on immediately. An operator might need to be notified of the event that triggered the alert. Alerting can also be used to invoke system functions such as autoscaling. + +Alerting usually depends on the following instrumentation data: + +* Security events. If the event logs indicate that repeated authentication and/or authorization failures are occurring, the system might be under attack and an operator should be informed. +* Performance metrics. The system must quickly respond if a particular performance metric exceeds a specified threshold. +* Availability information. If a fault is detected, it might be necessary to quickly restart one or more subsystems, or fail over to a backup resource. Repeated faults in a subsystem might indicate more serious concerns. + +Operators might receive alert information by using many delivery channels such as email, a pager device, or an SMS text message. An alert might also include an indication of how critical a situation is. Many alerting systems support subscriber groups, and all operators who are members of the same group can receive the same set of alerts. + +An alerting system should be customizable, and the appropriate values from the underlying instrumentation data can be provided as parameters. This approach enables an operator to filter data and focus on those thresholds or combinations of values that are of interest. Note that in some cases, the raw instrumentation data can be provided to the alerting system. In other situations, it might be more appropriate to supply aggregated data. (For example, an alert can be triggered if the CPU utilization for a node has exceeded 90 percent over the last 10 minutes). The details provided to the alerting system should also include any appropriate summary and context information. This data can help reduce the possibility that false-positive events will trip an alert. + +### Reporting +Reporting is used to generate an overall view of the system. It might incorporate historical data in addition to current information. Reporting requirements themselves fall into two broad categories: operational reporting and security reporting. + +Operational reporting typically includes the following aspects: + +* Aggregating statistics that you can use to understand resource utilization of the overall system or specified subsystems during a specified time window +* Identifying trends in resource usage for the overall system or specified subsystems during a specified period +* Monitoring the exceptions that have occurred throughout the system or in specified subsystems during a specified period +* Determining the efficiency of the application in terms of the deployed resources, and understanding whether the volume of resources (and their associated cost) can be reduced without affecting performance unnecessarily + +Security reporting is concerned with tracking customers' use of the system. It can include: + +* Auditing user operations. This requires recording the individual requests that each user performs, together with dates and times. The data should be structured to enable an administrator to quickly reconstruct the sequence of operations that a user performs over a specified period. +* Tracking resource use by user. This requires recording how each request for a user accesses the various resources that compose the system, and for how long. An administrator must be able to use this data to generate a utilization report by user over a specified period, possibly for billing purposes. + +In many cases, batch processes can generate reports according to a defined schedule. (Latency is not normally an issue.) But they should also be available for generation on an ad hoc basis if needed. As an example, if you are storing data in a relational database such as Azure SQL Database, you can use a tool such as SQL Server Reporting Services to extract and format data and present it as a set of reports. + +## Related patterns and guidance +* [Autoscaling guidance](../best-practices/auto-scaling.md) describes how to decrease management overhead by reducing the need for an operator to continually monitor the performance of a system and make decisions about adding or removing resources. +* [Health Endpoint Monitoring Pattern](https://msdn.microsoft.com/library/dn589789.aspx) describes how to implement functional checks within an application that external tools can access through exposed endpoints at regular intervals. +* [Priority Queue Pattern](https://msdn.microsoft.com/library/dn589794.aspx) shows how to prioritize queued messages so that urgent requests are received and can be processed before less urgent messages. + +## More information +* [Monitor, diagnose, and troubleshoot Microsoft Azure Storage](/azure/storage/storage-monitoring-diagnosing-troubleshooting) +* [Azure: Telemetry Basics and Troubleshooting](http://social.technet.microsoft.com/wiki/contents/articles/18146.windows-azure-telemetry-basics-and-troubleshooting.aspx) +* [Enabling Diagnostics in Azure Cloud Services and Virtual Machines](/azure/cloud-services/cloud-services-dotnet-diagnostics) +* [Azure Redis Cache](https://azure.microsoft.com/services/cache/), [Azure DocumentDB](https://azure.microsoft.com/services/documentdb/), and [HDInsight](https://azure.microsoft.com/services/hdinsight/) +* [How to use Service Bus queues](/azure/service-bus-messaging/service-bus-dotnet-get-started-with-queues) +* [SQL Server business intelligence in Azure Virtual Machines](/azure/virtual-machines/windows/sqlclassic/virtual-machines-windows-classic-ps-sql-bi) +* [Receive alert notifications](/azure/monitoring-and-diagnostics/insights-receive-alert-notifications) and [Track service health](/azure/monitoring-and-diagnostics/insights-service-health) +* [Application Insights](/azure/application-insights/app-insights-overview) + diff --git a/docs/best-practices/naming-conventions.md b/docs/best-practices/naming-conventions.md new file mode 100644 index 00000000000..f9297a691b6 --- /dev/null +++ b/docs/best-practices/naming-conventions.md @@ -0,0 +1,228 @@ +--- +title: Naming conventions for Azure resources +description: Naming conventions for Azure resources. How to name virtual machines, storage accounts, networks, virtual networks, subnets and other Azure entities +services: '' +documentationcenter: na +author: telmosampaio +manager: bennage +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: f5cbc794-6e51-4b36-b0cb-428c874c6950 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 10/31/2016 +ms.author: telmos + +--- +# Naming conventions + +[!INCLUDE [header](../_includes/header.md)] + +This article is a summary of the naming rules and restrictions for Azure resources +and a baseline set of recommendations for naming conventions. You can use these recommendations +as a starting point for your own conventions specific to your needs. + +The choice of a name for any resource in Microsoft Azure is important because: + +* It is difficult to change a name later. +* Names must meet the requirements of their specific resource type. + +Consistent naming conventions make resources easier to locate. They can also indicate the role of +a resource in a solution. + +The key to success with naming conventions is establishing and following them across your applications and organizations. + +## Naming subscriptions +When naming Azure subscriptions, verbose names make understanding the context +and purpose of each subscription clear. When working in an environment with many subscriptions, following a shared naming convention can improve clarity. + +A recommended pattern for naming subscriptions is: + +` ` + +* Company would usually be the same for each subscription. However, some companies may have + child companies within the organizational structure. These companies may be managed by a central IT + group. In these cases, they could be differentiated by having both the parent company name (*Contoso*) + and child company name (*North Wind*). +* Department is a name within the organization where a group of individuals work. This item within + the namespace as optional. +* Product line is a specific name for a product or function that is performed from within the department. + This is generally optional for internal-facing services and applications. However, it is highly recommended to use + for public-facing services that require easy separation and identification (such as for clear + separation of billing records). +* Environment is the name that describes the deployment lifecycle of the applications or services, + such as Dev, QA, or Prod. + +| Company | Department | Product Line or Service | Environment | Full Name | +| --- | --- | --- | --- | --- | +| Contoso |SocialGaming |AwesomeService |Production |Contoso SocialGaming AwesomeService Production | +| Contoso |SocialGaming |AwesomeService |Dev |Contoso SocialGaming AwesomeService Dev | +| Contoso |IT |InternalApps |Production |Contoso IT InternalApps Production | +| Contoso |IT |InternalApps |Dev |Contoso IT InternalApps Dev | + + + +## Use affixes to avoid ambiguity +When naming resources in Azure, it is recommended to use common prefixes or suffixes to identify the type and +context of the resource. While all the information about type, metadata, context, is available programmatically, +applying common affixes simplifies visual identification. When incorporating affixes into your naming convention, +it is important to clearly specify whether the affix is at the beginning of the name +(prefix) or at the end (suffix). + +For instance, here are two possible names for a service hosting a calculation engine: + +* SvcCalculationEngine (prefix) +* CalculationEngineSvc (suffix) + +Affixes can refer to different aspects that describe the particular resources. The following table +shows some examples typically used. + +| Aspect | Example | Notes | +| --- | --- | --- | +| Environment |dev, prod, QA |Identifies the environment for the resource | +| Location |uw (US West), ue (US East) |Identifies the region into which the resource is deployed | +| Instance |01, 02 |For resources that have more than one named instance (web servers, etc.). | +| Product or Service |service |Identifies the product, application, or service that the resource supports | +| Role |sql, web, messaging |Identifies the role of the associated resource | + +When developing a specific naming convention for your company or projects, it is importantly to +choose a common set of affixes and their position (suffix or prefix). + +## Naming Rules and Restrictions +Each resource or service type in Azure enforces a set of naming restrictions and scope; any naming convention +or pattern must adhere to the requisite naming rules and scope. For example, while the name of a VM maps to a DNS +name (and is thus required to be unique across all of Azure), the name of a VNET is scoped to the Resource Group that +it is created within. + +In general, avoid having any special characters (`-` or `_`) as the first or last character in any name. These characters will cause most validation rules to fail. + +| Category | Service or Entity | Scope | Length | Casing | Valid Characters | Suggested Pattern | Example | +| --- | --- | --- | --- | --- | --- | --- | --- | +| Resource Group |Resource Group |Global |1-64 |Case insensitive |Alphanumeric, underscore, and hyphen |`--rg` |`profx-prod-rg` | +| Resource Group |Availability Set |Resource Group |1-80 |Case insensitive |Alphanumeric, underscore, and hyphen |`--as` |`profx-sql-as` | +| General |Tag |Associated Entity |512 (name), 256 (value) |Case insensitive |Alphanumeric |`"key" : "value"` |`"department" : "Central IT"` | +| Compute |Virtual Machine |Resource Group |1-15 |Case insensitive |Alphanumeric, underscore, and hyphen |`--vm` |`profx-sql-vm1` | +| Storage |Storage account name (data) |Global |3-24 |Lower case |Alphanumeric |`` (use a function to calculate a unique guid for naming storage accounts) |`profxdata001` | +| Storage |Storage account name (disks) |Global |3-24 |Lower case |Alphanumeric |`st` |`profxsql001st0` | +| Storage |Container name |Storage account |3-63 |Lower case |Alphanumeric and dash |`` |`logs` | +| Storage |Blob name |Container |1-1024 |Case sensitive |Any URL char |`` |`` | +| Storage |Queue name |Storage account |3-63 |Lower case |Alphanumeric and dash |`--` |`awesomeservice-messages-001` | +| Storage |Table name |Storage account |3-63 |Case insensitive |Alphanumeric |`-` |`awesomeservice-logs` | +| Storage |File name |Storage account |3-63 |Lower case |Alphanumeric |`` |`` | +| Networking |Virtual Network (VNet) |Resource Group |2-64 |Case-insensitive |Alphanumeric, dash, underscore, and period |`-[section]-vnet` |`profx-vnet` | +| Networking |Subnet |Parent VNet |2-80 |Case-insensitive |Alphanumeric, underscore, dash, and period |`` |`web` | +| Networking |Network Interface |Resource Group |1-80 |Case-insensitive |Alphanumeric, dash, underscore, and period |`-nic` |`profx-sql1-nic1` | +| Networking |Network Security Group |Resource Group |1-80 |Case-insensitive |Alphanumeric, dash, underscore, and period |`--nsg` |`profx-app-nsg` | +| Networking |Network Security Group Rule |Resource Group |1-80 |Case-insensitive |Alphanumeric, dash, underscore, and period |`` |`sql-allow` | +| Networking |Public IP Address |Resource Group |1-80 |Case-insensitive |Alphanumeric, dash, underscore, and period |`-pip` |`profx-sql1-pip` | +| Networking |Load Balancer |Resource Group |1-80 |Case-insensitive |Alphanumeric, dash, underscore, and period |`-lb` |`profx-lb` | +| Networking |Load Balanced Rules Config |Load Balancer |1-80 |Case-insensitive |Alphanumeric, dash, underscore, and period |`` |`http` | +| Networking |Azure Application Gateway |Resource Group |1-80 |Case-insensitive |Alphanumeric, dash, underscore, and period |`-aag` |`profx-agw` | +| Networking |Traffic Manager Profile |Resource Group |1-63 |Case-insensitive |Alphanumeric, dash, and period |`` |`app1` | + + +## Organizing resources with tags +The Azure Resource Manager supports tagging entities with arbitrary +text strings to identify context and streamline automation. For example, the tag `"sqlVersion: "sql2014ee"` could identify VMs in a deployment running SQL Server 2014 Enterprise Edition for running an automated script against them. Tags should be used to augment and enhance context along side of the naming conventions chosen. + +> [!TIP] +> One other advantage of tags is that tags span resource groups, allowing you to link and correlate entities across +> disparate deployments. +> +> + +Each resource or resource group can have a maximum of **15** tags. The tag name is limited to 512 characters, and the tag +value is limited to 256 characters. + +For more information on resource tagging, refer to [Using tags to organize your Azure resources](/azure/azure-resource-manager/resource-group-using-tags/). + +Some of the common tagging use cases are: + +* **Billing**; Grouping resources and associating them with billing or charge back codes. +* **Service Context Identification**; Identify groups of resources across Resource Groups for common operations and grouping +* **Access Control and Security Context**; Administrative role identification based on portfolio, system, service, app, instance, etc. + +> [!TIP] +> Tag early - tag often. Better to have a baseline tagging scheme in place and adjust over time rather than having +> to retrofit after the fact. +> +> + +An example of some common tagging approaches: + +| Tag Name | Key | Example | Comment | +| --- | --- | --- | --- | +| Bill To / Internal Chargeback ID |billTo |`IT-Chargeback-1234` |An internal I/O or billing code | +| Operator or Directly Responsible Individual (DRI) |managedBy |`joe@contoso.com` |Alias or email address | +| Project Name |project-name |`myproject` |Name of the project or product line | +| Project Version |project-version |`3.4` |Version of the project or product line | +| Environment |environment |`` |Environmental identifier | +| Tier |tier |`Front End, Back End, Data` |Tier or role/context identification | +| Data Profile |dataProfile |`Public, Confidential, Restricted, Internal` |Sensitivity of data stored in the resource | + +## Tips and tricks +Some types of resources may require additional care on naming and conventions. + +### Virtual machines +Especially in larger topologies, carefully naming virtual machines streamlines identifying the +role and purpose of each machine, and enabling more predictable scripting. + +> [!WARNING] +> Every virtual machine in Azure has both an Azure resource name, and an operating +> system host name. +> If the resource name and host name are different, managing the VMs may be challenging and should be avoided. +> For example, if a virtual machine is created from a .vhd that already contains a +> configured operating system with a hostname. +> +> + +* [Microsoft NetBIOS Computer Naming Conventions](https://support.microsoft.com/en-us/help/188997/microsoft-netbios-computer-naming-conventions) + +### Storage accounts and storage entities +There are two primary use cases for storage accounts - backing disks for VMs, and storing +data in blobs, queues and tables. Storage accounts used for VM disks should follow the naming +convention of associating them with the parent VM name (and with the potential need for multiple +storage accounts for high-end VM SKUs, also apply a number suffix). + +> [!TIP] +> Storage accounts - whether for data or disks - should follow a naming convention that +> allows for multiple storage accounts to be leveraged (i.e. always using a numeric suffix). +> +> + +It possible to configure a custom domain name for accessing blob data in your Azure Storage account. +The default endpoint for the Blob service is `https://mystorage.blob.core.windows.net`. + +But if you map a custom domain (such as www.contoso.com) to the blob endpoint for your storage account, +you can also access blob data in your storage account by using that domain. For example, with a custom +domain name, `http://mystorage.blob.core.windows.net/mycontainer/myblob` could be accessed as +`http://www.contoso.com/mycontainer/myblob`. + +For more information about configuring this feature, refer to [Configure a custom domain name for your Blob storage endpoint](/azure/storage/storage-custom-domain-name/). + +For more information on naming blobs, containers and tables: + +* [Naming and Referencing Containers, Blobs, and Metadata](https://msdn.microsoft.com/library/dd135715.aspx) +* [Naming Queues and Metadata](https://msdn.microsoft.com/library/dd179349.aspx) +* [Naming Tables](https://msdn.microsoft.com/library/azure/dd179338.aspx) + +A blob name can contain any combination of characters, but reserved URL characters must be properly +escaped. Avoid blob names that end with a period (.), a forward slash (/), or a sequence or combination +of the two. By convention, the forward slash is the **virtual** directory separator. Do not use a backward +slash (\) in a blob name. The client APIs may allow it, but then fail to hash properly, and the +signatures will not match. + +It is not possible to modify the name of a storage account or container after it has been created. +If you want to use a new name, you must delete it and create a new one. + +> [!TIP] +> We recommend that you establish a naming convention for all storage accounts and types +> before embarking on the development of a new service or application. +> +> \ No newline at end of file diff --git a/docs/best-practices/retry-service-specific.md b/docs/best-practices/retry-service-specific.md new file mode 100644 index 00000000000..9a881137f35 --- /dev/null +++ b/docs/best-practices/retry-service-specific.md @@ -0,0 +1,1092 @@ +--- +title: Retry service specific guidance +description: Service specific guidance for setting the retry mechanism. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: 159d6a0b-b929-4e7f-b297-f89b4af8a940 +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/13/2016 +ms.author: masashin + +--- +# Retry guidance for specific services +[!INCLUDE [header](../_includes/header.md)] + +Most Azure services and client SDKs include a retry mechanism. However, these differ because each service has different characteristics and requirements, and so each retry mechanism is tuned to a specific service. This guide summarizes the retry mechanism features for the majority of Azure services, and includes information to help you use, adapt, or extend the retry mechanism for that service. + +For general guidance on handling transient faults, and retrying connections and operations against services and resources, see [Retry guidance](./transient-faults.md). + +The following table summarizes the retry features for the Azure services described in this guidance. + +| **Service** | **Retry capabilities** | **Policy configuration** | **Scope** | **Telemetry features** | +| --- | --- | --- | --- | --- | +| **[Azure Storage](#azure-storage-retry-guidelines)** |Native in client |Programmatic |Client and individual operations |TraceSource | +| **[SQL Database with Entity Framework](#sql-database-using-entity-framework-6-retry-guidelines)** |Native in client |Programmatic |Global per AppDomain |None | +| **[SQL Database with ADO.NET](#azure-storage-retry-guidelines)** |Topaz* |Declarative and programmatic |Single statements or blocks of code |Custom | +| **[Service Bus](#service-bus-retry-guidelines)** |Native in client |Programmatic |Namespace Manager, Messaging Factory, and Client |ETW | +| **[Azure Redis Cache](#azure-redis-cache-retry-guidelines)** |Native in client |Programmatic |Client |TextWriter | +| **[DocumentDB](#documentdb-retry-guidelines)** |Native in service |Non-configurable |Global |TraceSource | +| **[Azure Search](#azure-storage-retry-guidelines)** |Native in client |Programmatic |Client |ETW or Custom | +| **[Active Directory](#azure-active-directory-retry-guidelines)** |Topaz* (with custom detection strategy) |Declarative and programmatic |Blocks of code |Custom | + +*Topaz in the friendly name for the Transient Fault Handling Application Block that is included in [Enterprise Library 6.0][entlib]. You can use a custom detection strategy with Topaz for most types of services, as described in this guidance. Default strategies for Topaz are shown in the section [Transient Fault Handling Application Block (Topaz) strategies](#transient-fault-handling-application-block-topaz-strategies) at the end of this guidance. Note that the block is now an open-sourced framework and is not directly supported by Microsoft. + +> [!NOTE] +> For most of the Azure built-in retry mechanisms, there is currently no way apply a different retry policy for different types of error or exception beyond the functionality include in the retry policy. Therefore, the best guidance available at the time of writing is to configure a policy that provides the optimum average performance and availability. One way to fine-tune the policy is to analyze log files to determine the type of transient faults that are occurring. For example, if the majority of errors are related to network connectivity issues, you might attempt an immediate retry rather than wait a long time for the first retry. +> +> + +## Azure Storage retry guidelines +Azure storage services include table and blob storage, files, and storage queues. + +### Retry mechanism +Retries occur at the individual REST operation level and are an integral part of the client API implementation. The client storage SDK uses classes that implement the [IExtendedRetryPolicy Interface](http://msdn.microsoft.com/library/microsoft.windowsazure.storage.retrypolicies.iextendedretrypolicy.aspx). + +There are different implementations of the interface. Storage clients can choose from policies specifically designed for accessing tables, blobs, and queues. Each implementation uses a different retry strategy that essentially defines the retry interval and other details. + +The built-in classes provide support for linear (constant delay) and exponential with randomization retry intervals. There is also a no retry policy for use when another process is handling retries at a higher level. However, you can implement your own retry classes if you have specific requirements not provided by the built-in classes. + +Alternate retries switch between primary and secondary storage service location if you are using read access geo-redundant storage (RA-GRS) and the result of the request is a retryable error. See [Azure Storage Redundancy Options](http://msdn.microsoft.com/library/azure/dn727290.aspx) for more information. + +### Policy configuration +Retry policies are configured programmatically. A typical procedure is to create and populate a **TableRequestOptions**, **BlobRequestOptions**, **FileRequestOptions**, or **QueueRequestOptions** instance. + +```csharp +TableRequestOptions interactiveRequestOption = new TableRequestOptions() +{ + RetryPolicy = new LinearRetry(TimeSpan.FromMilliseconds(500), 3), + // For Read-access geo-redundant storage, use PrimaryThenSecondary. + // Otherwise set this to PrimaryOnly. + LocationMode = LocationMode.PrimaryThenSecondary, + // Maximum execution time based on the business use case. Maximum value up to 10 seconds. + MaximumExecutionTime = TimeSpan.FromSeconds(2) +}; +``` + +The request options instance can then be set on the client, and all operations with the client will use the specified request options. + +```csharp +client.DefaultRequestOptions = interactiveRequestOption; +var stats = await client.GetServiceStatsAsync(); +``` + +You can override the client request options by passing a populated instance of the request options class as a parameter to operation methods. + +```csharp +var stats = await client.GetServiceStatsAsync(interactiveRequestOption, operationContext: null); +``` + +You use an **OperationContext** instance to specify the code to execute when a retry occurs and when an operation has completed. This code can collect information about the operation for use in logs and telemetry. + + // Set up notifications for an operation + var context = new OperationContext(); + context.ClientRequestID = "some request id"; + context.Retrying += (sender, args) => + { + /* Collect retry information */ + }; + context.RequestCompleted += (sender, args) => + { + /* Collect operation completion information */ + }; + var stats = await client.GetServiceStatsAsync(null, context); + +In addition to indicating whether a failure is suitable for retry, the extended retry policies return a **RetryContext** object that indicates the number of retries, the results of the last request, whether the next retry will happen in the primary or secondary location (see table below for details). The properties of the **RetryContext** object can be used to decide if and when to attempt a retry. For more details, see [IExtendedRetryPolicy.Evaluate Method](http://msdn.microsoft.com/library/microsoft.windowsazure.storage.retrypolicies.iextendedretrypolicy.evaluate.aspx). + +The following table shows the default settings for the built-in retry policies. + +| **Context** | **Setting** | **Default value** | **Meaning** | +| --- | --- | --- | --- | +| Table / Blob / File
    QueueRequestOptions |MaximumExecutionTime

    ServerTimeout




    LocationMode






    RetryPolicy |120 seconds

    None











    ExponentialPolicy |Maximum execution time for the request, including all potential retry attempts.
    Server timeout interval for the request (value is rounded to seconds). If not specified, it will use the default value for all requests to the server. Usually, the best option is to omit this setting so that the server default is used.
    If the storage account is created with the Read access geo-redundant storage (RA-GRS) replication option, you can use the location mode to indicate which location should receive the request. For example, if **PrimaryThenSecondary** is specified, requests are always sent to the primary location first. If a request fails, it is sent to the secondary location.
    See below for details of each option. | +| Exponential policy |maxAttempt
    deltaBackoff


    MinBackoff

    MaxBackoff |3
    4 seconds


    3 seconds

    30 seconds |Number of retry attempts.
    Back-off interval between retries. Multiples of this timespan, including a random element, will be used for subsequent retry attempts.
    Added to all retry intervals computed from deltaBackoff. This value cannot be changed.
    MaxBackoff is used if the computed retry interval is greater than MaxBackoff. This value cannot be changed. | +| Linear policy |maxAttempt
    deltaBackoff |3
    30 seconds |Number of retry attempts.
    Back-off interval between retries. | + +### Retry usage guidance +Consider the following guidelines when accessing Azure storage services using the storage client API: + +* Use the built-in retry policies from the Microsoft.WindowsAzure.Storage.RetryPolicies namespace where they are appropriate for your requirements. In most cases, these policies will be sufficient. +* Use the **ExponentialRetry** policy in batch operations, background tasks, or non-interactive scenarios. In these scenarios, you can typically allow more time for the service to recover—with a consequently increased chance of the operation eventually succeeding. +* Consider specifying the **MaximumExecutionTime** property of the **RequestOptions** parameter to limit the total execution time, but take into account the type and size of the operation when choosing a timeout value. +* If you need to implement a custom retry, avoid creating wrappers around the storage client classes. Instead, use the capabilities to extend the existing policies through the **IExtendedRetryPolicy** interface. +* If you are using read access geo-redundant storage (RA-GRS) you can use the **LocationMode** to specify that retry attempts will access the secondary read-only copy of the store should the primary access fail. However, when using this option you must ensure that your application can work successfully with data that may be stale if the replication from the primary store has not yet completed. + +Consider starting with following settings for retrying operations. These are general purpose settings, and you should monitor the operations and fine tune the values to suit your own scenario. + +| **Context** | **Sample target E2E
    max latency** | **Retry policy** | **Settings** | **Values** | **How it works** | +| --- | --- | --- | --- | --- | --- | +| Interactive, UI,
    or foreground |2 seconds |Linear |maxAttempt
    deltaBackoff |3
    500 ms |Attempt 1 - delay 500 ms
    Attempt 2 - delay 500 ms
    Attempt 3 - delay 500 ms | +| Background
    or batch |30 seconds |Exponential |maxAttempt
    deltaBackoff |5
    4 seconds |Attempt 1 - delay ~3 sec
    Attempt 2 - delay ~7 sec
    Attempt 3 - delay ~15 sec | + +### Telemetry +Retry attempts are logged to a **TraceSource**. You must configure a **TraceListener** to capture the events and write them to a suitable destination log. You can use the **TextWriterTraceListener** or **XmlWriterTraceListener** to write the data to a log file, the **EventLogTraceListener** to write to the Windows Event Log, or the **EventProviderTraceListener** to write trace data to the ETW subsystem. You can also configure auto-flushing of the buffer, and the verbosity of events that will be logged (for example, Error, Warning, Informational, and Verbose). For more information, see [Client-side Logging with the .NET Storage Client Library](http://msdn.microsoft.com/library/azure/dn782839.aspx). + +Operations can receive an **OperationContext** instance, which exposes a **Retrying** event that can be used to attach custom telemetry logic. For more information, see [OperationContext.Retrying Event](http://msdn.microsoft.com/library/microsoft.windowsazure.storage.operationcontext.retrying.aspx). + +### Examples +The following code example shows how to create two **TableRequestOptions** instances with different retry settings; one for interactive requests and one for background requests. The example then sets these two retry policies on the client so that they apply for all requests, and also sets the interactive strategy on a specific request so that it overrides the default settings applied to the client. + +```csharp +using System; +using System.Threading.Tasks; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.RetryPolicies; +using Microsoft.WindowsAzure.Storage.Table; + +namespace RetryCodeSamples +{ + class AzureStorageCodeSamples + { + private const string connectionString = "UseDevelopmentStorage=true"; + + public async static Task Samples() + { + var storageAccount = CloudStorageAccount.Parse(connectionString); + + TableRequestOptions interactiveRequestOption = new TableRequestOptions() + { + RetryPolicy = new LinearRetry(TimeSpan.FromMilliseconds(500), 3), + // For Read-access geo-redundant storage, use PrimaryThenSecondary. + // Otherwise set this to PrimaryOnly. + LocationMode = LocationMode.PrimaryThenSecondary, + // Maximum execution time based on the business use case. Maximum value up to 10 seconds. + MaximumExecutionTime = TimeSpan.FromSeconds(2) + }; + + TableRequestOptions backgroundRequestOption = new TableRequestOptions() + { + // Client has a default exponential retry policy with 4 sec delay and 3 retry attempts + // Retry delays will be approximately 3 sec, 7 sec, and 15 sec + MaximumExecutionTime = TimeSpan.FromSeconds(30), + // PrimaryThenSecondary in case of Read-access geo-redundant storage, else set this to PrimaryOnly + LocationMode = LocationMode.PrimaryThenSecondary + }; + + var client = storageAccount.CreateCloudTableClient(); + // Client has a default exponential retry policy with 4 sec delay and 3 retry attempts + // Retry delays will be approximately 3 sec, 7 sec, and 15 sec + // ServerTimeout and MaximumExecutionTime are not set + + { + // Set properties for the client (used on all requests unless overridden) + // Different exponential policy parameters for background scenarios + client.DefaultRequestOptions = backgroundRequestOption; + // Linear policy for interactive scenarios + client.DefaultRequestOptions = interactiveRequestOption; + } + + { + // set properties for a specific request + var stats = await client.GetServiceStatsAsync(interactiveRequestOption, operationContext: null); + } + + { + // Set up notifications for an operation + var context = new OperationContext(); + context.ClientRequestID = "some request id"; + context.Retrying += (sender, args) => + { + /* Collect retry information */ + }; + context.RequestCompleted += (sender, args) => + { + /* Collect operation completion information */ + }; + var stats = await client.GetServiceStatsAsync(null, context); + } + } + } +} +``` + +### More information +* [Azure Storage Client Library Retry Policy Recommendations](https://azure.microsoft.com/blog/2014/05/22/azure-storage-client-library-retry-policy-recommendations/) +* [Storage Client Library 2.0 – Implementing Retry Policies](http://gauravmantri.com/2012/12/30/storage-client-library-2-0-implementing-retry-policies/) + +## SQL Database using Entity Framework 6 retry guidelines +SQL Database is a hosted SQL database available in a range of sizes and as both a standard (shared) and premium (non-shared) service. Entity Framework is an object-relational mapper that enables .NET developers to work with relational data using domain-specific objects. It eliminates the need for most of the data-access code that developers usually need to write. + +### Retry mechanism +Retry support is provided when accessing SQL Database using Entity Framework 6.0 and higher through a mechanism called [Connection Resiliency / Retry Logic](http://msdn.microsoft.com/data/dn456835.aspx). A full specification is available in the [.NET Entity Framework wiki](https://entityframework.codeplex.com/wikipage?title=Connection%20Resiliency%20Spec) on Codeplex. The main features of the retry mechanism are: + +* The primary abstraction is the **IDbExecutionStrategy** interface. This interface: + * Defines synchronous and asynchronous **Execute*** methods. + * Defines classes that can be used directly or can be configured on a database context as a default strategy, mapped to provider name, or mapped to a provider name and server name. When configured on a context, retries occur at the level of individual database operations, of which there might be several for a given context operation. + * Defines when to retry a failed connection, and how. +* It includes several built-in implementations of the **IDbExecutionStrategy** interface: + * Default - no retrying. + * Default for SQL Database (automatic) - no retrying, but inspects exceptions and wraps them with suggestion to use the SQL Database strategy. + * Default for SQL Database - exponential (inherited from base class) plus SQL Database detection logic. +* It implements an exponential back-off strategy that includes randomization. +* The built-in retry classes are stateful and are not thread safe. However, they can be reused after the current operation is completed. +* If the specified retry count is exceeded, the results are wrapped in a new exception. It does not bubble up the current exception. + +### Policy configuration +Retry support is provided when accessing SQL Database using Entity Framework 6.0 and higher. Retry policies are configured programmatically. The configuration cannot be changed on a per-operation basis. + +When configuring a strategy on the context as the default, you specify a function that creates a new strategy on demand. The following code shows how you can create a retry configuration class that extends the **DbConfiguration** base class. + +```csharp +public class BloggingContextConfiguration : DbConfiguration +{ + public BlogConfiguration() + { + // Set up the execution strategy for SQL Database (exponential) with 5 retries and 4 sec delay + this.SetExecutionStrategy( + "System.Data.SqlClient", () => new SqlAzureExecutionStrategy(5, TimeSpan.FromSeconds(4))); + } +} +``` + +You can then specify this as the default retry strategy for all operations using the **SetConfiguration** method of the **DbConfiguration** instance when the application starts. By default, EF will automatically discover and use the configuration class. + + DbConfiguration.SetConfiguration(new BloggingContextConfiguration()); + +You can specify the retry configuration class for a context by annotating the context class with a **DbConfigurationType** attribute. However, if you have only one configuration class, EF will use it without the need to annotate the context. + + [DbConfigurationType(typeof(BloggingContextConfiguration))] + public class BloggingContext : DbContext + { ... + +If you need to use different retry strategies for specific operations, or disable retries for specific operations, you can create a configuration class that allows you to suspend or swap strategies by setting a flag in the **CallContext**. The configuration class can use this flag to switch strategies, or disable the strategy you provide and use a default strategy. For more information, see [Suspend Execution Strategy](http://msdn.microsoft.com/dn307226#transactions_workarounds) in the page Limitations with Retrying Execution Strategies (EF6 onwards). + +Another technique for using specific retry strategies for individual operations is to create an instance of the required strategy class and supply the desired settings through parameters. You then invoke its **ExecuteAsync** method. + + var executionStrategy = new SqlAzureExecutionStrategy(5, TimeSpan.FromSeconds(4)); + var blogs = await executionStrategy.ExecuteAsync( + async () => + { + using (var db = new BloggingContext("Blogs")) + { + // Acquire some values asynchronously and return them + } + }, + new CancellationToken() + ); + +The simplest way to use a **DbConfiguration** class is to locate it in the same assembly as the **DbContext** class. However, this is not appropriate when the same context is required in different scenarios, such as different interactive and background retry strategies. If the different contexts execute in separate AppDomains, you can use the built-in support for specifying configuration classes in the configuration file or set it explicitly using code. If the different contexts must execute in the same AppDomain, a custom solution will be required. + +For more information, see [Code-Based Configuration (EF6 onwards)](http://msdn.microsoft.com/data/jj680699.aspx). + +The following table shows the default settings for the built-in retry policy when using EF6. + +![Retry guidance table](./images/retry-service-specific/RetryServiceSpecificGuidanceTable4.png) + +### Retry usage guidance +Consider the following guidelines when accessing SQL Database using EF6: + +* Choose the appropriate service option (shared or premium). A shared instance may suffer longer than usual connection delays and throttling due to the usage by other tenants of the shared server. If predictable performance and reliable low latency operations are required, consider choosing the premium option. +* A fixed interval strategy is not recommended for use with Azure SQL Database. Instead, use an exponential back-off strategy because the service may be overloaded, and longer delays allow more time for it to recover. +* Choose a suitable value for the connection and command timeouts when defining connections. Base the timeout on both your business logic design and through testing. You may need to modify this value over time as the volumes of data or the business processes change. Too short a timeout may result in premature failures of connections when the database is busy. Too long a timeout may prevent the retry logic working correctly by waiting too long before detecting a failed connection. The value of the timeout is a component of the end-to-end latency, although you cannot easily determine how many commands will execute when saving the context. You can change the default timeout by setting the **CommandTimeout** property of the **DbContext** instance. +* Entity Framework supports retry configurations defined in configuration files. However, for maximum flexibility on Azure you should consider creating the configuration programmatically within the application. The specific parameters for the retry policies, such as the number of retries and the retry intervals, can be stored in the service configuration file and used at runtime to create the appropriate policies. This allows the settings to be changed within requiring the application to be restarted. + +Consider starting with following settings for retrying operations. You cannot specify the delay between retry attempts (it is fixed and generated as an exponential sequence). You can specify only the maximum values, as shown here; unless you create a custom retry strategy. These are general purpose settings, and you should monitor the operations and fine tune the values to suit your own scenario. + +| **Context** | **Sample target E2E
    max latency** | **Retry policy** | **Settings** | **Values** | **How it works** | +| --- | --- | --- | --- | --- | --- | +| Interactive, UI,
    or foreground |2 seconds |Exponential |MaxRetryCount
    MaxDelay |3
    750 ms |Attempt 1 - delay 0 sec
    Attempt 2 - delay 750 ms
    Attempt 3 – delay 750 ms | +| Background
    or batch |30 seconds |Exponential |MaxRetryCount
    MaxDelay |5
    12 seconds |Attempt 1 - delay 0 sec
    Attempt 2 - delay ~1 sec
    Attempt 3 - delay ~3 sec
    Attempt 4 - delay ~7 sec
    Attempt 5 - delay 12 sec | + +> [!NOTE] +> The end-to-end latency targets assume the default timeout for connections to the service. If you specify longer connection timeouts, the end-to-end latency will be extended by this additional time for every retry attempt. +> +> + +### Examples +The following code example defines a simple data access solution that uses Entity Framework. It sets a specific retry strategy by defining an instance of a class named **BlogConfiguration** that extends **DbConfiguration**. + +```csharp +using System; +using System.Collections.Generic; +using System.Data.Entity; +using System.Data.Entity.SqlServer; +using System.Threading.Tasks; + +namespace RetryCodeSamples +{ + public class BlogConfiguration : DbConfiguration + { + public BlogConfiguration() + { + // Set up the execution strategy for SQL Database (exponential) with 5 retries and 12 sec delay. + // These values could be loaded from configuration rather than being hard-coded. + this.SetExecutionStrategy( + "System.Data.SqlClient", () => new SqlAzureExecutionStrategy(5, TimeSpan.FromSeconds(12))); + } + } + + // Specify the configuration type if more than one has been defined. + // [DbConfigurationType(typeof(BlogConfiguration))] + public class BloggingContext : DbContext + { + // Definition of content goes here. + } + + class EF6CodeSamples + { + public async static Task Samples() + { + // Execution strategy configured by DbConfiguration subclass, discovered automatically or + // or explicitly indicated through configuration or with an attribute. Default is no retries. + using (var db = new BloggingContext("Blogs")) + { + // Add, edit, delete blog items here, then: + await db.SaveChangesAsync(); + } + } + } +} +``` + +More examples of using the Entity Framework retry mechanism can be found in [Connection Resiliency / Retry Logic](http://msdn.microsoft.com/data/dn456835.aspx). + +### More information +* [Azure SQL Database Performance and Elasticity Guide](http://social.technet.microsoft.com/wiki/contents/articles/3507.windows-azure-sql-database-performance-and-elasticity-guide.aspx) + +## SQL Database using ADO.NET retry guidelines +SQL Database is a hosted SQL database available in a range of sizes and as both a standard (shared) and premium (non-shared) service. + +### Retry mechanism +SQL Database has no built-in support for retries when accessed using ADO.NET. However, the return codes from requests can be used to determine why a request failed. The page [Azure SQL Database Throttling](http://msdn.microsoft.com/library/dn338079.aspx) explains how throttling can prevent connections, the return codes for specific situations, and how you can handle these and retry operations. + +You can use the Transient Fault Handling Application Block (Topaz) with the Nuget package EnterpriseLibrary.TransientFaultHandling.Data (class **SqlAzureTransientErrorDetectionStrategy**) to implement a retry mechanism for SQL Database. + +The block also provides the **ReliableSqlConnection** class, which implements the old ADO.NET 1.0 API (**IDbConnection** instead of **DbConnection**) and performs retries and connection management internally. While convenient, this requires you to use a different set of methods for invoking operations with retries, and so is not a simple direct replacement. It does not support asynchronous execution, which is recommended when implementing and using Azure services. In addition, because this class uses ADO.NET 1.0, it does not benefit from the recent improvements and updates to ADO.NET. + +### Policy configuration (SQL Database using ADO.NET) +The Transient Fault Handling Application Block supports both file-based and programmatic configuration. In general, you should use programmatic configuration for maximum flexibility (see the notes in the following section for more information). The following code, which would be executed once at application startup, creates and populates a **RetryManager** with a list of four retry strategies suitable for use with Azure SQL Database. It also sets the default strategies for the **RetryManager**. These are the strategies that will be used for connections and commands if an alternative is not specified when creating a connection or command. + +```csharp +RetryManager.SetDefault(new RetryManager( + new List { new ExponentialBackoff(name: "default", retryCount: 3, + minBackoff: TimeSpan.FromMilliseconds(100), + maxBackoff: TimeSpan.FromSeconds(30), + deltaBackoff: TimeSpan.FromSeconds(1), + firstFastRetry: true), + new ExponentialBackoff(name: "default sql connection", retryCount: 3, + minBackoff: TimeSpan.FromMilliseconds(100), + maxBackoff: TimeSpan.FromSeconds(30), + deltaBackoff: TimeSpan.FromSeconds(1), + firstFastRetry: true), + new ExponentialBackoff(name: "default sql command", retryCount: 3, + minBackoff: TimeSpan.FromMilliseconds(100), + maxBackoff: TimeSpan.FromSeconds(30), + deltaBackoff: TimeSpan.FromSeconds(1), + firstFastRetry: true), + new ExponentialBackoff(name: "alt sql", retryCount: 5, + minBackoff: TimeSpan.FromMilliseconds(100), + maxBackoff: TimeSpan.FromSeconds(30), + deltaBackoff: TimeSpan.FromSeconds(1), + firstFastRetry: true), }, + "default", + new Dictionary { + { + RetryManagerSqlExtensions.DefaultStrategyConnectionTechnologyName, "default sql connection" + }, + { + RetryManagerSqlExtensions.DefaultStrategyCommandTechnologyName, "default sql command"} + })); +``` + +For information about how you can use the retry policies you have configured when you access Azure SQL Database, see the [Examples](#examples) section below. + +Default strategies for the Transient Fault Handling Application Block are shown in the section [Transient Fault Handling Application Block (Topaz) strategies](#transient-fault-handling-application-block-topaz-strategies) at the end of this guidance. + +### Retry usage guidance +Consider the following guidelines when accessing SQL Database using ADO.NET: + +* Choose the appropriate service option (shared or premium). A shared instance may suffer longer than usual connection delays and throttling due to the usage by other tenants of the shared server. If more predictable performance and reliable low latency operations are required, consider choosing the premium option. +* Ensure that you perform retries at the appropriate level or scope to avoid non-idempotent operations causing inconsistency in the data. Ideally, all operations should be idempotent so that they can be repeated without causing inconsistency. Where this is not the case, the retry should be performed at a level or scope that allows all related changes to be undone if one operation fails; for example, from within a transactional scope. For more information, see [Cloud Service Fundamentals Data Access Layer – Transient Fault Handling](http://social.technet.microsoft.com/wiki/contents/articles/18665.cloud-service-fundamentals-data-access-layer-transient-fault-handling.aspx#Idempotent_Guarantee). +* A fixed interval strategy is not recommended for use with Azure SQL Database except for interactive scenarios where there are only a few retries at very short intervals. Instead, consider using an exponential back-off strategy for the majority of scenarios. +* Choose a suitable value for the connection and command timeouts when defining connections. Too short a timeout may result in premature failures of connections when the database is busy. Too long a timeout may prevent the retry logic working correctly by waiting too long before detecting a failed connection. The value of the timeout is a component of the end-to-end latency; it is effectively added to the retry delay specified in the retry policy for every retry attempt. +* Close the connection after a certain number of retries, even when using an exponential back off retry logic, and retry the operation on a new connection. Retrying the same operation multiple times on the same connection can be a factor that contributes to connection problems. For an example of this technique, see [Cloud Service Fundamentals Data Access Layer – Transient Fault Handling](http://social.technet.microsoft.com/wiki/contents/articles/18665.cloud-service-fundamentals-data-access-layer-transient-fault-handling.aspx). +* When connection pooling is in use (the default) there is a chance that the same connection will be chosen from the pool, even after closing and reopening a connection. If this is the case, a technique to resolve it is to call the **ClearPool** method of the **SqlConnection** class to mark the connection as not reusable. However, you should do this only after several connection attempts have failed, and only when encountering the specific class of transient failures such as SQL timeouts (error code -2) related to faulty connections. +* If the data access code uses transactions initiated as **TransactionScope** instances, the retry logic should reopen the connection and initiate a new transaction scope. For this reason, the retryable code block should encompass the entire scope of the transaction. +* The Transient Fault Handling Application Block supports retry configurations entirely defined in configuration files. However, for maximum flexibility on Azure you should consider creating the configuration programmatically within the application. The specific parameters for the retry policies, such as the number of retries and the retry intervals, can be stored in the service configuration file and used at runtime to create the appropriate policies. This allows the settings to be changed within requiring the application to be restarted. + +Consider starting with following settings for retrying operations. These are general purpose settings, and you should monitor the operations and fine tune the values to suit your own scenario. + +| **Context** | **Sample target E2E
    max latency** | **Retry strategy** | **Settings** | **Values** | **How it works** | +| --- | --- | --- | --- | --- | --- | +| Interactive, UI,
    or foreground |2 sec |FixedInterval |Retry count
    Retry interval
    First fast retry |3
    500 ms
    true |Attempt 1 - delay 0 sec
    Attempt 2 - delay 500 ms
    Attempt 3 - delay 500 ms | +| Background
    or batch |30 sec |ExponentialBackoff |Retry count
    Min back-off
    Max back-off
    Delta back-off
    First fast retry |5
    0 sec
    60 sec
    2 sec
    false |Attempt 1 - delay 0 sec
    Attempt 2 - delay ~2 sec
    Attempt 3 - delay ~6 sec
    Attempt 4 - delay ~14 sec
    Attempt 5 - delay ~30 sec | + +> [!NOTE] +> The end-to-end latency targets assume the default timeout for connections to the service. If you specify longer connection timeouts, the end-to-end latency will be extended by this additional time for every retry attempt. +> +> + +### Examples +This section describes how you can use the Transient Fault Handling Application Block to access Azure SQL Database using a set of retry policies you have configured in the **RetryManager** (as shown in the previous section [Policy configuration](#policy-configuration). The simplest approach to using the block is through the **ReliableSqlConnection** class, or by calling the extension methods such as **OpenWithRetry** on a connection (see [The Transient Fault Handling Application Block](http://msdn.microsoft.com/library/hh680934.aspx) for more information). + +However, in the current version of the Transient Fault Handling Application Block these approaches do not indigenously support asynchronous operations against SQL Database. Good practice demands that you use only asynchronous techniques to access Azure services such as SQL Database, and so you should consider the following techniques to use the Transient Fault Handling Application Block with SQL Database. + +You can use the simplified asynchronous support in version 5 of the C# language to create asynchronous versions of the methods provided by the block. For example, the following code shows how you might create an asynchronous version of the **ExecuteReaderWithRetry** extension method. The changes and additions to the original code are highlighted. The source code for Topaz is available on Codeplex at [Transient Fault Handling Application Block ("Topaz")](http://topaz.codeplex.com/SourceControl/latest). + +```csharp +public async static Task ExecuteReaderWithRetryAsync(this SqlCommand command, RetryPolicy cmdRetryPolicy, +RetryPolicy conRetryPolicy) +{ + GuardConnectionIsNotNull(command); + + // Check if retry policy was specified, if not, use the default retry policy. + return await (cmdRetryPolicy ?? RetryPolicy.NoRetry).ExecuteAsync(async () => + { + var hasOpenConnection = await EnsureValidConnectionAsync(command, conRetryPolicy).ConfigureAwait(false); + + try + { + return await command.ExecuteReaderAsync().ConfigureAwait(false); + } + catch (Exception) + { + if (hasOpenConnection && command.Connection != null && command.Connection.State == ConnectionState.Open) + { + command.Connection.Close(); + } + + throw; + } + }).ConfigureAwait(false); +} +``` + +This new asynchronous extension method can be used in the same way as the synchronous versions included in the block. + +```csharp +var sqlCommand = sqlConnection.CreateCommand(); +sqlCommand.CommandText = "[some query]"; + +var retryPolicy = + RetryManager.Instance.GetRetryPolicy("alt sql"); +using (var reader = await sqlCommand.ExecuteReaderWithRetryAsync(retryPolicy)) +{ + // Do something with the values +} +``` + +However, this approach deals only with individual operations or commands, and not with blocks of statements where there can be properly defined transactional boundaries. In addition, it does not address the situation of removing faulty connections from the connection pool so that they are not selected for subsequent attempts. A synchronous example of resolving these issues can be found in [Cloud Service Fundamentals Data Access Layer – Transient Fault Handling](http://social.technet.microsoft.com/wiki/contents/articles/18665.cloud-service-fundamentals-data-access-layer-transient-fault-handling.aspx#Timeouts_amp_Connection_Management). In addition to retrying arbitrary sequences of database instructions, it clears the connection pool to remove invalid connections, and instruments the entire process. While the code shown in this example is synchronous, it is relatively easy to convert it to asynchronous code. + +### More information +For detailed information about using the Transient Fault Handling Application Block, see: + +* [Using the Transient Fault Handling Application Block with SQL Azure](http://msdn.microsoft.com/library/hh680899.aspx) +* [Perseverance, Secret of All Triumphs: Using the Transient Fault Handling Application Block](http://msdn.microsoft.com/library/dn440719.aspx) +* [Cloud Service Fundamentals Data Access Layer – Transient Fault Handling](http://social.technet.microsoft.com/wiki/contents/articles/18665.cloud-service-fundamentals-data-access-layer-transient-fault-handling.aspx) + +For general guidance on getting the most from SQL Database, see [Azure SQL Database Performance and Elasticity Guide](http://social.technet.microsoft.com/wiki/contents/articles/3507.windows-azure-sql-database-performance-and-elasticity-guide.aspx). + + +## Service Bus retry guidelines +Service Bus is a cloud messaging platform that provides loosely coupled message exchange with improved scale and resiliency for components of an application, whether hosted in the cloud or on-premises. + +### Retry mechanism +Service Bus implements retries using implementations of the [RetryPolicy](http://msdn.microsoft.com/library/microsoft.servicebus.retrypolicy.aspx) base class. All of the Service Bus clients expose a **RetryPolicy** property that can be set to one of the implementations of the **RetryPolicy** base class. The built-in implementations are: + +* The [RetryExponential Class](http://msdn.microsoft.com/library/microsoft.servicebus.retryexponential.aspx). This exposes properties that control the back-off interval, the retry count, and the **TerminationTimeBuffer** property that is used to limit the total time for the operation to complete. +* The [NoRetry Class](http://msdn.microsoft.com/library/microsoft.servicebus.noretry.aspx). This is used when retries at the Service Bus API level are not required, such as when retries are managed by another process as part of a batch or multiple step operation. + +Service Bus actions can return a range of exceptions, as listed in [Appendix: Messaging Exceptions](http://msdn.microsoft.com/library/hh418082.aspx). The list provides information about which if these indicate that retrying the operation is appropriate. For example, a [ServerBusyException](http://msdn.microsoft.com/library/microsoft.servicebus.messaging.serverbusyexception.aspx) indicates that the client should wait for a period of time, then retry the operation. The occurrence of a **ServerBusyException** also causes Service Bus to switch to a different mode, in which an extra 10-second delay is added to the computed retry delays. This mode is reset after a short period. + +The exceptions returned from Service Bus expose the **IsTransient** property that indicates if the client should retry the operation. The built-in **RetryExponential** policy relies on the **IsTransient** property in the **MessagingException** class, which is the base class for all Service Bus exceptions. If you create custom implementations of the **RetryPolicy** base class you could use a combination of the exception type and the **IsTransient** property to provide more fine-grained control over retry actions. For example, you could detect a **QuotaExceededException** and take action to drain the queue before retrying sending a message to it. + +### Policy configuration +Retry policies are set programmatically, and can be set as a default policy for a **NamespaceManager** and for a **MessagingFactory**, or individually for each messaging client. To set the default retry policy for a messaging session you set the **RetryPolicy** of the **NamespaceManager**. + + namespaceManager.Settings.RetryPolicy = new RetryExponential(minBackoff: TimeSpan.FromSeconds(0.1), + maxBackoff: TimeSpan.FromSeconds(30), + maxRetryCount: 3); + +Note that this code uses named parameters for clarity. Alternatively you can omit the names because none of the parameters is optional. + + namespaceManager.Settings.RetryPolicy = new RetryExponential(TimeSpan.FromSeconds(0.1), + TimeSpan.FromSeconds(30), TimeSpan.FromSeconds(2), TimeSpan.FromSeconds(5), 3); + +To set the default retry policy for all clients created from a messaging factory, you set the **RetryPolicy** of the **MessagingFactory**. + + messagingFactory.RetryPolicy = new RetryExponential(minBackoff: TimeSpan.FromSeconds(0.1), + maxBackoff: TimeSpan.FromSeconds(30), + maxRetryCount: 3); + +To set the retry policy for a messaging client, or to override its default policy, you set its **RetryPolicy** property using an instance of the required policy class: + +```csharp +client.RetryPolicy = new RetryExponential(minBackoff: TimeSpan.FromSeconds(0.1), + maxBackoff: TimeSpan.FromSeconds(30), + maxRetryCount: 3); +``` + +The retry policy cannot be set at the individual operation level. It applies to all operations for the messaging client. +The following table shows the default settings for the built-in retry policy. + +![Retry guidance table](./images/retry-service-specific/RetryServiceSpecificGuidanceTable7.png) + +### Retry usage guidance +Consider the following guidelines when using Service Bus: + +* When using the built-in **RetryExponential** implementation, do not implement a fallback operation as the policy reacts to Server Busy exceptions and automatically switches to an appropriate retry mode. +* Service Bus supports a feature called Paired Namespaces, which implements automatic failover to a backup queue in a separate namespace if the queue in the primary namespace fails. Messages from the secondary queue can be sent back to the primary queue when it recovers. This feature helps to address transient failures. For more information, see [Asynchronous Messaging Patterns and High Availability](http://msdn.microsoft.com/library/azure/dn292562.aspx). + +Consider starting with following settings for retrying operations. These are general purpose settings, and you should monitor the operations and fine tune the values to suit your own scenario. + +![Retry guidance table](./images/retry-service-specific/RetryServiceSpecificGuidanceTable8.png) + +### Telemetry +Service Bus logs retries as ETW events using an **EventSource**. You must attach an **EventListener** to the event source to capture the events and view them in Performance Viewer, or write them to a suitable destination log. You could use the [Semantic Logging Application Block](http://msdn.microsoft.com/library/dn775006.aspx) to do this. The retry events are of the following form: + +```text +Microsoft-ServiceBus-Client/RetryPolicyIteration +ThreadID="14,500" +FormattedMessage="[TrackingId:] RetryExponential: Operation Get:https://retry-tests.servicebus.windows.net/TestQueue/?api-version=2014-05 at iteration 0 is retrying after 00:00:00.1000000 sleep because of Microsoft.ServiceBus.Messaging.MessagingCommunicationException: The remote name could not be resolved: 'retry-tests.servicebus.windows.net'.TrackingId:6a26f99c-dc6d-422e-8565-f89fdd0d4fe3, TimeStamp:9/5/2014 10:00:13 PM." +trackingId="" +policyType="RetryExponential" +operation="Get:https://retry-tests.servicebus.windows.net/TestQueue/?api-version=2014-05" +iteration="0" +iterationSleep="00:00:00.1000000" +lastExceptionType="Microsoft.ServiceBus.Messaging.MessagingCommunicationException" +exceptionMessage="The remote name could not be resolved: 'retry-tests.servicebus.windows.net'.TrackingId:6a26f99c-dc6d-422e-8565-f89fdd0d4fe3,TimeStamp:9/5/2014 10:00:13 PM" +``` + +### Examples +The following code example shows how to set the retry policy for: + +* A namespace manager. The policy applies to all operations on that manager, and cannot be overridden for individual operations. +* A messaging factory. The policy applies to all clients created from that factory, and cannot be overridden when creating individual clients. +* An individual messaging client. After a client has been created, you can set the retry policy for that client. The policy applies to all operations on that client. + +```csharp +using System; +using System.Threading.Tasks; +using Microsoft.ServiceBus; +using Microsoft.ServiceBus.Messaging; + +namespace RetryCodeSamples +{ + class ServiceBusCodeSamples + { + private const string connectionString = + @"Endpoint=sb://[my-namespace].servicebus.windows.net/; + SharedAccessKeyName=RootManageSharedAccessKey; + SharedAccessKey=C99..........Mk="; + + public async static Task Samples() + { + const string QueueName = "TestQueue"; + + ServiceBusEnvironment.SystemConnectivity.Mode = ConnectivityMode.Http; + + var namespaceManager = NamespaceManager.CreateFromConnectionString(connectionString); + + // The namespace manager will have a default exponential policy with 10 retry attempts + // and a 3 second delay delta. + // Retry delays will be approximately 0 sec, 3 sec, 9 sec, 25 sec and the fixed 30 sec, + // with an extra 10 sec added when receiving a ServiceBusyException. + + { + // Set different values for the retry policy, used for all operations on the namespace manager. + namespaceManager.Settings.RetryPolicy = + new RetryExponential( + minBackoff: TimeSpan.FromSeconds(0), + maxBackoff: TimeSpan.FromSeconds(30), + maxRetryCount: 3); + + // Policies cannot be specified on a per-operation basis. + if (!await namespaceManager.QueueExistsAsync(QueueName)) + { + await namespaceManager.CreateQueueAsync(QueueName); + } + } + + + var messagingFactory = MessagingFactory.Create( + namespaceManager.Address, namespaceManager.Settings.TokenProvider); + // The messaging factory will have a default exponential policy with 10 retry attempts + // and a 3 second delay delta. + // Retry delays will be approximately 0 sec, 3 sec, 9 sec, 25 sec and the fixed 30 sec, + // with an extra 10 sec added when receiving a ServiceBusyException. + + { + // Set different values for the retry policy, used for clients created from it. + messagingFactory.RetryPolicy = + new RetryExponential( + minBackoff: TimeSpan.FromSeconds(1), + maxBackoff: TimeSpan.FromSeconds(30), + maxRetryCount: 3); + + + // Policies cannot be specified on a per-operation basis. + var session = await messagingFactory.AcceptMessageSessionAsync(); + } + + + { + var client = messagingFactory.CreateQueueClient(QueueName); + // The client inherits the policy from the factory that created it. + + + // Set different values for the retry policy on the client. + client.RetryPolicy = + new RetryExponential( + minBackoff: TimeSpan.FromSeconds(0.1), + maxBackoff: TimeSpan.FromSeconds(30), + maxRetryCount: 3); + + + // Policies cannot be specified on a per-operation basis. + var session = await client.AcceptMessageSessionAsync(); + } + } + } +} +``` + +### More information +* [Asynchronous Messaging Patterns and High Availability](http://msdn.microsoft.com/library/azure/dn292562.aspx) + +## Azure Redis Cache retry guidelines +Azure Redis Cache is a fast data access and low latency cache service based on the popular open source Redis Cache. It is secure, managed by Microsoft, and is accessible from any application in Azure. + +The guidance in this section is based on using the StackExchange.Redis client to access the cache. A list of other suitable clients can be found on the [Redis website](http://redis.io/clients), and these may have different retry mechanisms. + +Note that the StackExchange.Redis client uses multiplexing through a single connection. The recommended usage is to create an instance of the client at application startup and use this instance for all operations against the cache. For this reason, the connection to the cache is made only once, and so all of the guidance in this section is related to the retry policy for this initial connection—and not for each operation that accesses the cache. + +### Retry mechanism +The StackExchange.Redis client uses a connection manager class that is configured through a set of options. These options include a **ConnectRetry** property that specifies the number of times a failed connection to the cache will be retried. However, the retry policy in used only for the initial connect action, and it does not wait between retries. + +### Policy configuration +Retry policies are configured programmatically by setting the options for the client before connecting to the cache. This can be done by creating an instance of the **ConfigurationOptions** class, populating its properties, and passing it to the **Connect** method. + +```csharp +var options = new ConfigurationOptions { EndPoints = { "localhost" }, + ConnectRetry = 3, + ConnectTimeout = 2000 }; +ConnectionMultiplexer redis = ConnectionMultiplexer.Connect(options, writer); +``` + +Note that the **ConnectTimeout** property specifies the maximum waiting time in milliseconds), not the delay between retries. + +Alternatively, you can specify the options as a string, and pass this to the **Connect** method. + +```csharp + var options = "localhost,connectRetry=3,connectTimeout=2000"; + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect(options, writer); +``` + +It is also possible to specify options directly when you connect to the cache. + +```csharp +var conn = ConnectionMultiplexer.Connect("redis0:6380,redis1:6380,connectRetry=3"); +``` + +The following table shows the default settings for the built-in retry policy. + +| **Context** | **Setting** | **Default value**
    (v 1.0.331) | **Meaning** | +| --- | --- | --- | --- | +| ConfigurationOptions |ConnectRetry

    ConnectTimeout

    SyncTimeout |3

    Maximum 5000 ms plus SyncTimeout
    1000 |The number of times to repeat connect attempts during the initial connection operation.
    Timeout (ms) for connect operations. Not a delay between retry attempts.
    Time (ms) to allow for synchronous operations. | + +> [!NOTE] +> SyncTimeout contributes to the end-to-end latency of an operation. However, in general, using synchronous operations is not recommended. For more information see [Pipelines and Multiplexers](http://github.com/StackExchange/StackExchange.Redis/blob/master/Docs/PipelinesMultiplexers.md). +> +> + +### Retry usage guidance +Consider the following guidelines when using Azure Redis Cache: + +* The StackExchange Redis client manages its own retries, but only when establishing a connection to the cache when the application first starts. You can configure the connection timeout and the number of retry attempts to establish this connection, but the retry policy does not apply to operations against the cache. +* The retry mechanism has no delay between retry attempts. It simply retries a failed connection after the specified connection timeout expires, and for the specified number of times. +* Instead of using a large number of retry attempts, consider falling back by accessing the original data source instead. + +### Telemetry +You can collect information about connections (but not other operations) using a **TextWriter**. + +```csharp +var writer = new StringWriter(); +... +ConnectionMultiplexer redis = ConnectionMultiplexer.Connect(options, writer); +``` + +An example of the output this generates is shown below. + +```text +localhost:6379,connectTimeout=2000,connectRetry=3 +1 unique nodes specified +Requesting tie-break from localhost:6379 > __Booksleeve_TieBreak... +Allowing endpoints 00:00:02 to respond... +localhost:6379 faulted: SocketFailure on PING +localhost:6379 failed to nominate (Faulted) +> UnableToResolvePhysicalConnection on GET +No masters detected +localhost:6379: Standalone v2.0.0, master; keep-alive: 00:01:00; int: Connecting; sub: Connecting; not in use: DidNotRespond +localhost:6379: int ops=0, qu=0, qs=0, qc=1, wr=0, sync=1, socks=2; sub ops=0, qu=0, qs=0, qc=0, wr=0, socks=2 +Circular op-count snapshot; int: 0 (0.00 ops/s; spans 10s); sub: 0 (0.00 ops/s; spans 10s) +Sync timeouts: 0; fire and forget: 0; last heartbeat: -1s ago +resetting failing connections to retry... +retrying; attempts left: 2... +... +``` + +### Examples +The following code example shows how you can configure the connection timeout setting and the number of retries when initializing the StackExchange.Redis client to access Azure Redis Cache at application startup. Note that the connection timeout is the period of time that you are willing to wait for connection to the cache; it is not the delay between retry attempts. + +This example shows how to set the configuration using an instance of the **ConfigurationOptions**. + +```csharp +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using StackExchange.Redis; + +namespace RetryCodeSamples +{ + class CacheRedisCodeSamples + { + public async static Task Samples() + { + var writer = new StringWriter(); + + { + try + { + // Using object-based configuration. + var options = new ConfigurationOptions + { + EndPoints = { "localhost" }, + ConnectRetry = 3, + ConnectTimeout = 2000 // The maximum waiting time (ms), not the delay for retries. + }; + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect(options, writer); + + // Store a reference to the multiplexer for use in the application. + } + catch + { + Console.WriteLine(writer.ToString()); + throw; + } + } + } + } +} +``` + +This example shows how to set the configuration by specifying the options as a string. + +```csharp +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using StackExchange.Redis; + +namespace RetryCodeSamples +{ + class CacheRedisCodeSamples + { + public async static Task Samples() + { + var writer = new StringWriter(); + + { + try + { + // Using string-based configuration. + var options = "localhost,connectRetry=3,connectTimeout=2000"; + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect(options, writer); + + // Store a reference to the multiplexer for use in the application. + } + catch + { + Console.WriteLine(writer.ToString()); + throw; + } + } + } + } +} +``` + +For more examples, see [Configuration](http://github.com/StackExchange/StackExchange.Redis/blob/master/Docs/Configuration.md#configuration) on the project website. + +### More information +* [Redis website](http://redis.io/) + +## DocumentDB retry guidelines +DocumentDB is a fully-managed document database-as-a-service with rich query and indexing capabilities over a schema-free JSON data model. It offers configurable and reliable performance, native JavaScript transactional processing, and is built for the cloud with elastic scale. + +### Retry mechanism +The `DocumentClient` class automatically retries failed attempts. To set the number of retries and the maximum wait time, configure [ConnectionPolicy.RetryOptions]. Exceptions that the client raises are either beyond the retry policy or are not transient errors. + +If DocumentDB throttles the client, it returns an HTTP 429 error. Check the status code in the `DocumentClientException`. + +### Policy configuration +The following table shows the default settings for the `RetryOptions` class. + +| Setting | Default value | Description | +| --- | --- | --- | +| MaxRetryAttemptsOnThrottledRequests |9 |The maximum number of retries if the request fails because DocumentDB applied rate limiting on the client. | +| MaxRetryWaitTimeInSeconds |30 |The maximum retry time in seconds. | + +### Example +```csharp +DocumentClient client = new DocumentClient(new Uri(endpoint), authKey); ; +var options = client.ConnectionPolicy.RetryOptions; +options.MaxRetryAttemptsOnThrottledRequests = 5; +options.MaxRetryWaitTimeInSeconds = 15; +``` + +### Telemetry +Retry attempts are logged as unstructured trace messages through a .NET **TraceSource**. You must configure a **TraceListener** to capture the events and write them to a suitable destination log. + +For example, if you add the following to your App.config file, traces will be generated in a text file in the same location as the executable: + +``` + + + + + + + + + + + + + + +``` + + +## Azure Search retry guidelines +Azure Search can be used to add powerful and sophisticated search capabilities to a website or application, quickly and easily tune search results, and construct rich and fine-tuned ranking models. + +### Retry mechanism +Retry behavior in the Azure Search SDK is controlled by the `SetRetryPolicy` method on the [SearchServiceClient] and [SearchIndexClient] classes. The default policy retries with exponential backoff when Azure Search returns a 5xx or 408 (Request Timeout) response. + +### Telemetry +Trace with ETW or by registering a custom trace provider. For more information, see the [AutoRest documentation][autorest]. + +## Azure Active Directory retry guidelines +Azure Active Directory (AD) is a comprehensive identity and access management cloud solution that combines core directory services, advanced identity governance, security, and application access management. Azure AD also offers developers an identity management platform to deliver access control to their applications, based on centralized policy and rules. + +### Retry mechanism +There is no built-in retry mechanism for Azure Active Directory in the Active Directory Authentication Library (ADAL). You can use the Transient Fault Handling Application Block to implement a retry strategy that contains a custom detection mechanism for the exceptions returned by Active Directory. + +### Policy configuration (Azure Active Directory) +When using the Transient Fault Handling Application Block with Azure Active Directory you create a **RetryPolicy** instance based on a class that defines the detection strategy you want to use. + +```csharp +var policy = new RetryPolicy(new ExponentialBackoff(retryCount: 5, + minBackoff: TimeSpan.FromSeconds(0), + maxBackoff: TimeSpan.FromSeconds(60), + deltaBackoff: TimeSpan.FromSeconds(2))); +``` + +You then call the **ExecuteAction** or **ExecuteAsync** method of the retry policy, passing in the operation you want to execute. + +```csharp +var result = await policy.ExecuteAsync(() => authContext.AcquireTokenAsync(resourceId, clientId, uc)); +``` + +The detection strategy class receives exceptions when a failure occurs, and must detect whether this is likely to be a transient fault or a more permanent failure. Typically it will do this by examining the exception type and status code. For example, a Service Unavailable response indicates that a retry attempt should be made. The Transient Fault Handling Application Block does not include a detection strategy class that is suitable for use with the ADAL client, but an example of a custom detection strategy is provided in the [Examples](#examples) section below. Using a custom detection strategy is no different from using one supplied with the block. + +Default strategies for the Transient Fault Handling Application Block are shown in the section [Transient Fault Handling Application Block (Topaz) strategies](#transient-fault-handling-application-block-topaz-strategies) at the end of this guidance. + +### Retry usage guidance +Consider the following guidelines when using Azure Active Directory: + +* If you are using the REST API for Azure Active Directory, you should retry the operation only if the result is an error in the 5xx range (such as 500 Internal Server Error, 502 Bad Gateway, 503 Service Unavailable, and 504 Gateway Timeout). Do not retry for any other errors. +* If you are using the Active Directory Authentication Library (ADAL), HTTP codes are not readily accessible. You will need to create a custom detection strategy that includes logic to check the properties of the ADAL-specific exceptions. See the [Examples](#examples) section below. +* An exponential back-off policy is recommended for use in batch scenarios with Azure Active Directory. + +Consider starting with following settings for retrying operations. These are general purpose settings, and you should monitor the operations and fine tune the values to suit your own scenario. + +| **Context** | **Sample target E2E
    max latency** | **Retry strategy** | **Settings** | **Values** | **How it works** | +| --- | --- | --- | --- | --- | --- | +| Interactive, UI,
    or foreground |2 sec |FixedInterval |Retry count
    Retry interval
    First fast retry |3
    500 ms
    true |Attempt 1 - delay 0 sec
    Attempt 2 - delay 500 ms
    Attempt 3 - delay 500 ms | +| Background or
    batch |60 sec |ExponentialBackoff |Retry count
    Min back-off
    Max back-off
    Delta back-off
    First fast retry |5
    0 sec
    60 sec
    2 sec
    false |Attempt 1 - delay 0 sec
    Attempt 2 - delay ~2 sec
    Attempt 3 - delay ~6 sec
    Attempt 4 - delay ~14 sec
    Attempt 5 - delay ~30 sec | + +### Examples +The following code example shows how you can use the Transient Fault Handling Application Block (Topaz) to define a custom transient error detection strategy suitable for use with the ADAL client. The code creates a new **RetryPolicy** instance based on a custom detection strategy of type **AdalDetectionStrategy**, as defined in the code listing below. Custom detection strategies for Topaz implement the **ITransientErrorDetectionStrategy** interface and return true if a retry should be attempted, or **false** if the failure appears to be non-transient and a retry should not be attempted. + + using System; + using System.Linq; + using System.Net; + using System.Threading.Tasks; + using Microsoft.Practices.TransientFaultHandling; + using Microsoft.IdentityModel.Clients.ActiveDirectory; + + namespace RetryCodeSamples + { + class ActiveDirectoryCodeSamples + { + public async static Task Samples() + { + var authority = "[some authority]"; + var resourceId = “[some resource id]”; + var clientId = “[some client id]”; + + var authContext = new AuthenticationContext(authority); + + var uc = new UserCredential(“[user]", "[password]"); + + // Use Topaz with a custom detection strategy to manage retries. + var policy = + new RetryPolicy( + new ExponentialBackoff( + retryCount: 5, + minBackoff: TimeSpan.FromSeconds(0), + maxBackoff: TimeSpan.FromSeconds(60), + deltaBackoff: TimeSpan.FromSeconds(2))); + + var result = await policy.ExecuteAsync(() => authContext.AcquireTokenAsync(resourceId, clientId, uc)); + + // Get the access token + var accessToken = result.AccessToken; + + // Use the result, probably to authorize an API call. + } + } + + // TODO: This is sample code that needs validation from the WAAD team! + // based on existing detection strategies + public class AdalDetectionStrategy : ITransientErrorDetectionStrategy + { + private static readonly WebExceptionStatus[] webExceptionStatus = + new[] + { + WebExceptionStatus.ConnectionClosed, + WebExceptionStatus.Timeout, + WebExceptionStatus.RequestCanceled + }; + + private static readonly HttpStatusCode[] httpStatusCodes = + new[] + { + HttpStatusCode.InternalServerError, + HttpStatusCode.GatewayTimeout, + HttpStatusCode.ServiceUnavailable, + HttpStatusCode.RequestTimeout + }; + + public bool IsTransient(Exception ex) + { + var adalException = ex as AdalException; + if (adalException == null) + { + return false; + } + + if (adalException.ErrorCode == AdalError.ServiceUnavailable) + { + return true; + } + + var innerWebException = adalException.InnerException as WebException; + if (innerWebException != null) + { + if (webExceptionStatus.Contains(innerWebException.Status)) + { + return true; + } + + if (innerWebException.Status == WebExceptionStatus.ProtocolError) + { + var response = innerWebException.Response as HttpWebResponse; + return response != null && httpStatusCodes.Contains(response.StatusCode); + } + } + + return false; + } + } + } + +For information about retrying Active Directory Graph API operations and the error codes returned see: + +* [Code Sample: Retry Logic](http://msdn.microsoft.com/library/azure/dn448547.aspx) +* [Azure AD Graph Error Codes](http://msdn.microsoft.com/library/azure/hh974480.aspx) + +### More information +* [Implementing a Custom Detection Strategy](http://msdn.microsoft.com/library/hh680940.aspx) (Topaz) +* [Implementing a Custom Retry Strategy](http://msdn.microsoft.com/library/hh680943.aspx) (Topaz) +* [Token Issuance and Retry Guidelines](http://msdn.microsoft.com/library/azure/dn168916.aspx) + +## General REST and retry guidelines +Consider the following when accessing Azure or third party services: + +* Use a systematic approach to managing retries, perhaps as reusable code, so that you can apply a consistent methodology across all clients and all solutions. +* Consider using a retry framework such as the Transient Fault Handling Application Block to manage retries if the target service or client has no built-in retry mechanism. This will help you implement a consistent retry behavior, and it may provide a suitable default retry strategy for the target service. However, you may need to create custom retry code for services that have non-standard behavior, that do not rely on exceptions to indicate transient failures, or if you want to use a **Retry-Response** reply to manage retry behavior. +* The transient detection logic will depend on the actual client API you use to invoke the REST calls. Some clients, such as the newer **HttpClient** class, will not throw exceptions for completed requests with a non-success HTTP status code. This improves performance but prevents the use of the Transient Fault Handling Application Block. In this case you could wrap the call to the REST API with code that produces exceptions for non-success HTTP status codes, which can then be processed by the block. Alternatively, you can use a different mechanism to drive the retries. +* The HTTP status code returned from the service can help to indicate whether the failure is transient. You may need to examine the exceptions generated by a client or the retry framework to access the status code or to determine the equivalent exception type. The following HTTP codes typically indicate that a retry is appropriate: + * 408 Request Timeout + * 500 Internal Server Error + * 502 Bad Gateway + * 503 Service Unavailable + * 504 Gateway Timeout +* If you base your retry logic on exceptions, the following typically indicate a transient failure where no connection could be established: + * WebExceptionStatus.ConnectionClosed + * WebExceptionStatus.ConnectFailure + * WebExceptionStatus.Timeout + * WebExceptionStatus.RequestCanceled +* In the case of a service unavailable status, the service might indicate the appropriate delay before retrying in the **Retry-After** response header or a different custom header (as in the DocumentDB service). Services might also send additional information as custom headers, or embedded in the content of the response. The Transient Fault Handling Application Block cannot use the standard or any custom “retry-after” headers. +* Do not retry for status codes representing client errors (errors in the 4xx range) except for a 408 Request Timeout. +* Thoroughly test your retry strategies and mechanisms under a range of conditions, such as different network states and varying system loadings. + +### Retry strategies +The following are the typical types of retry strategy intervals: + +* **Exponential**: A retry policy that performs a specified number of retries, using a randomized exponential back off approach to determine the interval between retries. For example: + + var random = new Random(); + + var delta = (int)((Math.Pow(2.0, currentRetryCount) - 1.0) * + random.Next((int)(this.deltaBackoff.TotalMilliseconds * 0.8), + (int)(this.deltaBackoff.TotalMilliseconds * 1.2))); + var interval = (int)Math.Min(checked(this.minBackoff.TotalMilliseconds + delta), + this.maxBackoff.TotalMilliseconds); + retryInterval = TimeSpan.FromMilliseconds(interval); +* **Incremental**: A retry strategy with a specified number of retry attempts and an incremental time interval between retries. For example: + + retryInterval = TimeSpan.FromMilliseconds(this.initialInterval.TotalMilliseconds + + (this.increment.TotalMilliseconds * currentRetryCount)); +* **LinearRetry**: A retry policy that performs a specified number of retries, using a specified fixed time interval between retries. For example: + + retryInterval = this.deltaBackoff; + +### More information +* [Circuit breaker strategies](http://msdn.microsoft.com/library/dn589784.aspx) + +## Transient Fault Handling Application Block (Topaz) strategies +The Transient Fault Handling Application Block has the following default strategies. + +| **Strategy** | **Setting** | **Default value** | **Meaning** | +| --- | --- | --- | --- | +| **Exponential** |retryCount
    minBackoff

    maxBackoff

    deltaBackoff

    fastFirstRetry |10
    1 second

    30 seconds

    10 seconds

    true |The number of retry attempts.
    The minimum back-off time. The higher of this value or the computed back-off will be used as the retry delay.
    The minimum back-off time. The lower of this value or the computed back-off will be used as the retry delay.
    The value used to calculate a random delta for the exponential delay between retries.
    Whether the first retry attempt will be made immediately. | +| **Incremental** |retryCount
    initialInterval
    increment

    fastFirstRetry
    |10
    1 second
    1 second

    true |The number of retry attempts.
    The initial interval that will apply for the first retry.
    The incremental time value that will be used to calculate the progressive delay between retries.
    Whether the first retry attempt will be made immediately. | +| **Linear (fixed interval)** |retryCount
    retryInterval
    fastFirstRetry
    |10
    1 second
    true |The number of retry attempts.
    The delay between retries.
    Whether first retry attempt will be made immediately. | + +For examples of using the Transient Fault Handling Application Block, see the Examples sections earlier in this guidance for Azure SQL Database using ADO.NET and Azure Active Directory. + + + +[autorest]: https://github.com/Azure/autorest/tree/master/docs +[ConnectionPolicy.RetryOptions]: https://msdn.microsoft.com/library/azure/microsoft.azure.documents.client.connectionpolicy.retryoptions.aspx +[entlib]: http://msdn.microsoft.com/library/dn440719.aspx +[SearchIndexClient]: https://msdn.microsoft.com/library/azure/microsoft.azure.search.searchindexclient.aspx +[SearchServiceClient]: https://msdn.microsoft.com/library/microsoft.azure.search.searchserviceclient.aspx diff --git a/docs/best-practices/toc.md b/docs/best-practices/toc.md new file mode 100644 index 00000000000..5e2c62e1905 --- /dev/null +++ b/docs/best-practices/toc.md @@ -0,0 +1,12 @@ +# Best Practices +## [API design](./api-design.md) +## [API implementation](./api-implementation.md) +## [Autoscaling](./auto-scaling.md) +## [Background jobs](./background-jobs.md) +## [Caching](./caching.md) +## [Content Delivery Network](./cdn.md) +## [Data partitioning](./data-partitioning.md) +## [Monitoring and diagnostics](./monitoring.md) +## [Naming conventions](./naming-conventions.md) +## [Retry guidance for specific services](./retry-service-specific.md) +## [Transient fault handling](./transient-faults.md) diff --git a/docs/best-practices/transient-faults.md b/docs/best-practices/transient-faults.md new file mode 100644 index 00000000000..9a5066b3409 --- /dev/null +++ b/docs/best-practices/transient-faults.md @@ -0,0 +1,116 @@ +--- +title: Retry general guidance +description: Guidance on retry for transient fault handling. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +pnp.series.title: Best Practices + +ms.assetid: 5115cbf2-ecdd-42a4-8191-886de7bbadef +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/13/2016 +ms.author: masashin + +--- +# Transient fault handling +[!INCLUDE [header](../_includes/header.md)] + +All applications that communicate with remote services and resources must be sensitive to transient faults. This is especially the case for applications that run in the cloud, where the nature of the environment and connectivity over the Internet means these types of faults are likely to be encountered more often. Transient faults include the momentary loss of network connectivity to components and services, the temporary unavailability of a service, or timeouts that arise when a service is busy. These faults are often self-correcting, and if the action is repeated after a suitable delay it is likely succeed. + +This document covers general guidance for transient fault handling. For information about handling transient faults when using Microsoft Azure services, see [Azure service-specific retry guidelines](./retry-service-specific.md). + +## Why do transient faults occur in the cloud? +Transient faults can occur in any environment, on any platform or operating system, and in any kind of application. In solutions that run on local, on-premises infrastructure, performance and availability of the application and its components is typically maintained through expensive and often under-used hardware redundancy, and components and resources are located close to each another. While this makes a failure less likely, it can still result in transient faults - and even an outage through unforeseen events such as external power supply or network issues, or other disaster scenarios. + +Cloud hosting, including private cloud systems, can offer a higher overall availability by using shared resources, redundancy, automatic failover, and dynamic resource allocation across a huge number of commodity compute nodes. However, the nature of these environments can mean that transient faults are more likely to occur. There are several reasons for this: + +* Many resources in a cloud environment are shared, and access to these resources is subject to throttling in order to protect the resource. Some services will refuse connections when the load rises to a specific level, or a maximum throughput rate is reached, in order to allow processing of existing requests and to maintain performance of the service for all users. Throttling helps to maintain the quality of service for neighbors and other tenants using the shared resource. +* Cloud environments are built using vast numbers of commodity hardware units. They deliver performance by dynamically distributing the load across multiple computing units and infrastructure components, and deliver reliability by automatically recycling or replacing failed units. This dynamic nature means that transient faults and temporary connection failures may occasionally occur. +* There are often more hardware components, including network infrastructure such as routers and load balancers, between the application and the resources and services it uses. This additional infrastructure can occasionally introduce additional connection latency and transient connection faults. +* Network conditions between the client and the server may be variable, especially when communication crosses the Internet. Even in on-premises locations, very heavy traffic loads may slow communication and cause intermittent connection failures. + +## Challenges +Transient faults can have a huge impact on the perceived availability of an application, even if it has been thoroughly tested under all foreseeable circumstances. To ensure that cloud-hosted applications operate reliably, they must be able to respond to the following challenges: + +* The application must be able to detect faults when they occur, and determine if these faults are likely to be transient, more long-lasting, or are terminal failures. Different resources are likely to return different responses when a fault occurs, and these responses may also vary depending on the context of the operation; for example, the response for an error when reading from storage may be different from response for an error when writing to storage. Many resources and services have well-documented transient failure contracts. However, where such information is not available, it may be difficult to discover the nature of the fault and whether it is likely to be transient. +* The application must be able to retry the operation if it determines that the fault is likely to be transient and keep track of the number of times the operation was retried. +* The application must use an appropriate strategy for the retries. This strategy specifies the number of times it should retry, the delay between each attempt, and the actions to take after a failed attempt. The appropriate number of attempts and the delay between each one are often difficult to determine, and vary based on the type of resource as well as the current operating conditions of the resource and the application itself. + +## General guidelines +The following guidelines will help you to design a suitable transient fault handing mechanism for your applications: + +* **Determine if there is a built-in retry mechanism:** + * Many services provide an SDK or client library that contains a transient fault handling mechanism. The retry policy it uses is typically tailored to the nature and requirements of the target service. Alternatively, REST interfaces for services may return information that is useful in determining whether a retry is appropriate, and how long to wait before the next retry attempt. + * Use the built-in retry mechanism where one is available unless you have specific and well-understood requirements that mean a different retry behavior is more appropriate. +* **Determine if the operation is suitable for retrying**: + * You should only retry operations where the faults are transient (typically indicated by the nature of the error), and if there is at least some likelihood that the operation will succeed when reattempted. There is no point in reattempting operations that indicate an invalid operation such as a database update to an item that does not exist, or requests to a service or resource that has suffered a fatal error + * In general, you should implement retries only where the full impact of this can be determined, and the conditions are well understood and can be validated. If not, leave it to the calling code to implement retries. Remember that the errors returned from resources and services outside your control may evolve over time, and you may need to revisit your transient fault detection logic. + * When you create services or components, consider implementing error codes and messages that will help clients determine whether they should retry failed operations. In particular, indicate if the client should retry the operation (perhaps by returning an **isTransient** value) and suggest a suitable delay before the next retry attempt. If you build a web service, consider returning custom errors defined within your service contracts. Even though generic clients may not be able to read these, they will be useful when building custom clients. +* **Determine an appropriate retry count and interval:** + * It is vital to optimize the retry count and the interval to the type of use case. If you do not retry a sufficient number of times, the application will be unable to complete the operation and is likely to experience a failure. If you retry too many times, or with too short an interval between tries, the application can potentially hold resources such as threads, connections, and memory for long periods, which will adversely affect the health of the application. + * The appropriate values for the time interval and the number of retry attempts depend on the type of operation being attempted. For example, if the operation is part of a user interaction, the interval should be short and only a few retries attempted to avoid making users wait for a response (which holds open connections and can reduce availability for other users). If the operation is part of a long running or critical workflow, where cancelling and restarting the process is expensive or time-consuming, it is appropriate to wait longer between attempts and retry more times. + * Determining the appropriate intervals between retries is the most difficult part of designing a successful strategy. Typical strategies use the following types of retry interval: + * **Exponential back-off**. The application waits a short time before the first retry, and then exponentially increasing times between each subsequent retry. For example, it may retry the operation after 3 seconds, 12 seconds, 30 seconds, and so on. + * **Incremental intervals**. The application waits a short time before the first retry, and then incrementally increasing times between each subsequent retry. For example, it may retry the operation after 3 seconds, 7 seconds, 13 seconds, and so on. + * **Regular intervals**. The application waits for the same period of time between each attempt. For example, it may retry the operation every 3 seconds. + * **Immediate retry**. Sometimes a transient fault is extremely short, perhaps caused by an event such as a network packet collision or a spike in a hardware component. In this case, retrying the operation immediately is appropriate because it may succeed if the fault has cleared in the time it takes the application to assemble and send the next request. However, there should never be more than one immediate retry attempt, and you should switch to alternative strategies, such as such as exponential back-off or fallback actions, if the immediate retry fails. + * **Randomization**. Any of the retry strategies listed above may include a randomization to prevent multiple instances of the client sending subsequent retry attempts at the same time. For example, one instance may retry the operation after 3 seconds, 11 seconds, 28 seconds, and so on while another instance may retry the operation after 4 seconds, 12 seconds, 26 seconds, and so on. Randomization is a useful technique that may be combined with other strategies. + * As a general guideline, use an exponential back-off strategy for background operations, and immediate or regular interval retry strategies for interactive operations. In both cases, you should choose the delay and the retry count so that the maximum latency for all retry attempts is within the required end-to-end latency requirement. + * Take into account the combination of all the factors that contribute to the overall maximum timeout for a retried operation. These factors include the time taken for a failed connection to produce a response (typically set by a timeout value in the client) as well as the delay between retry attempts and the maximum number of retries. The total of all these times can result in very large overall operation times, especially when using an exponential delay strategy where the interval between retries grows rapidly after each failure. If a process must meet a specific service level agreement (SLA), the overall operation time, including all timeouts and delays, must be within that defined in the SLA + * Over-aggressive retry strategies, which have too short intervals or too may retries, can have an adverse effect on the target resource or service. This may prevent the resource or service from recovering from its overloaded state, and it will continue to block or refuse requests. This results in a vicious circle where more and more requests are sent to the resource or service, and consequently its ability to recover is further reduced. + * Take into account the timeout of the operations when choosing the retry intervals to avoid launching a subsequent attempt immediately (for example, if the timeout period is similar to the retry interval). Also consider if you need to keep the total possible period (the timeout plus the retry intervals) to below a specific total time. Operations that have unusually short or very long timeouts may influence how long to wait, and how often to retry the operation. + * Use the type of the exception and any data it contains, or the error codes and messages returned from the service, to optimize the interval and the number of retries. For example, some exceptions or error codes (such as the HTTP code 503 Service Unavailable with a Retry-After header in the response) may indicate how long the error might last, or that the service has failed and will not respond to any subsequent attempt. +* **Avoid anti-patterns**: + * In the vast majority of cases, you should avoid implementations that include duplicated layers of retry code. Avoid designs that include cascading retry mechanisms, or that implement retry at every stage of an operation that involves a hierarchy of requests, unless you have specific requirements that demand this. In these exceptional circumstances, use policies that prevent excessive numbers of retries and delay periods, and make sure you understand the consequences. For example, if one component makes a request to another, which then accesses the target service, and you implement retry with a count of three on both calls there will be nine retry attempts in total against the service. Many services and resources implement a built-in retry mechanism and you should investigate how you can disable or modify this if you need to implement retries at a higher level. + * Never implement an endless retry mechanism. This is likely to prevent the resource or service recovering from overload situations, and cause throttling and refused connections to continue for a longer period. Use a finite number or retries, or implement a pattern such as [Circuit Breaker](http://msdn.microsoft.com/library/dn589784.aspx) to allow the service to recover. + * Never perform an immediate retry more than once. + * Avoid using a regular retry interval, especially when you have a large number of retry attempts, when accessing services and resources in Azure. The optimum approach is this scenario is an exponential back-off strategy with a circuit-breaking capability. + * Prevent multiple instances of the same client, or multiple instances of different clients, from sending retries at the same times. If this is likely to occur, introduce randomization into the retry intervals. +* **Test your retry strategy and implementation:** + * Ensure you fully test your retry strategy implementation under as wide a set of circumstances as possible, especially when both the application and the target resources or services it uses are under extreme load. To check behavior during testing, you can: + * Inject transient and non-transient faults into the service. For example, send invalid requests or add code that detects test requests and responds with different types of errors. For an example using TestApi, see [Fault Injection Testing with TestApi](http://msdn.microsoft.com/magazine/ff898404.aspx) and [Introduction to TestApi – Part 5: Managed Code Fault Injection APIs](http://blogs.msdn.com/b/ivo_manolov/archive/2009/11/25/9928447.aspx). + * Create a mock of the resource or service that returns a range of errors that the real service may return. Ensure you cover all the types of error that your retry strategy is designed to detect. + * Force transient errors to occur by temporarily disabling or overloading the service if it is a custom service that you created and deployed (you should not, of course, attempt to overload any shared resources or shared services within Azure). + * For HTTP-based APIs, consider using the FiddlerCore library in your automated tests to change the outcome of HTTP requests, either by adding extra roundtrip times or by changing the response (such as the HTTP status code, headers, body, or other factors). This enables deterministic testing of a subset of the failure conditions, whether transient faults or other types of failure. For more information, see [FiddlerCore](http://www.telerik.com/fiddler/fiddlercore). For examples of how to use the library, particularly the **HttpMangler** class, examine the [source code for the Azure Storage SDK](https://github.com/Azure/azure-storage-net/tree/master/Test). + * Perform high load factor and concurrent tests to ensure that the retry mechanism and strategy works correctly under these conditions, and does not have an adverse effect on the operation of the client or cause cross-contamination between requests. +* **Manage retry policy configurations:** + * A *retry policy* is a combination of all of the elements of your retry strategy. It defines the detection mechanism that determines whether a fault is likely to be transient, the type of interval to use (such as regular, exponential back-off, and randomization), the actual interval value(s), and the number of times to retry. + * Retries must be implemented in many places within even the simplest application, and in every layer of more complex applications. Rather than hard-coding the elements of each policy at multiple locations, consider using a central point for storing all the policies. For example, store the values such as the interval and retry count in application configuration files, read them at runtime, and programmatically build the retry policies. This makes it easier to manage the settings, and to modify and fine tune the values in order to respond to changing requirements and scenarios. However, design the system to store the values rather than rereading a configuration file every time, and ensure suitable defaults are used if the values cannot be obtained from configuration. + * In an Azure Cloud Services application, consider storing the values that are used to build the retry policies at runtime in the service configuration file so that they can be changed without needing to restart the application. + * Take advantage of built-in or default retry strategies available in the client APIs you use, but only where they are appropriate for your scenario. These strategies are typically general-purpose. In some scenarios they may be all that is required, but in other scenarios they may not offer the full range of options to suit your specific requirements. You must understand how the settings will affect your application through testing to determine the most appropriate values. +* **Log and track transient and non-transient faults:** + * As part of your retry strategy, include exception handling and other instrumentation that logs when retry attempts are made. While an occasional transient failure and retry are to be expected, and do not indicate a problem, regular and increasing numbers of retries are often an indicator of an issue that may cause a failure, or is currently impacting application performance and availability. + * Log transient faults as Warning entries rather than Error entries so that monitoring systems do not detect them as application errors that may trigger false alerts. + * Consider storing a value in your log entries that indicates if the retries were caused by throttling in the service, or by other types of faults such as connection failures, so that you can differentiate them during analysis of the data. An increase in the number of throttling errors is often an indicator of a design flaw in the application or the need to switch to a premium service that offers dedicated hardware. + * Consider measuring and logging the overall time taken for operations that include a retry mechanism. This is a good indicator of the overall effect of transient faults on user response times, process latency, and the efficiency of the application use cases. Also log the number of retries occurred in order to understand the factors that contributed to the response time. + * Consider implementing a telemetry and monitoring system that can raise alerts when the number and rate of failures, the average number of retries, or the overall times taken for operations to succeed, is increasing. +* **Manage operations that continually fail:** + + * There will be circumstances where the operation continues to fail at every attempt, and it is vital to consider how you will handle this situation: + * Although a retry strategy will define the maximum number of times that an operation should be retried, it does not prevent the application repeating the operation again, with the same number of retries. For example, if an order processing service fails with a fatal error that puts it out of action permanently, the retry strategy may detect a connection timeout and consider it to be a transient fault. The code will retry the operation a specified number of times and then give up. However, when another customer places an order, the operation will be attempted again - even though it is sure to fail every time. + * To prevent continual retries for operations that continually fail, consider implementing the [Circuit Breaker pattern](http://msdn.microsoft.com/library/dn589784.aspx). In this pattern, if the number of failures within a specified time window exceeds the threshold, requests are returned to the caller immediately as errors, without attempting to access the failed resource or service. + * The application can periodically test the service, on an intermittent basis and with very long intervals between requests, to detect when it becomes available. An appropriate interval will depend on the scenario, such as the criticality of the operation and the nature of the service, and might be anything between a few minutes and several hours. At the point where the test succeeds, the application can resume normal operations and pass requests to the newly recovered service. + * In the meantime, it may be possible to fall back to another instance of the service (perhaps in a different datacenter or application), use a similar service that offers compatible (perhaps simpler) functionality, or perform some alternative operations in the hope that the service will become available soon. For example, it may be appropriate to store requests for the service in a queue or data store and replay them later. Otherwise you might be able to redirect the user to an alternative instance of the application, degrade the performance of the application but still offer acceptable functionality, or just return a message to the user indicating that the application is not available at present. +* **Other considerations** + + * When deciding on the values for the number of retries and the retry intervals for a policy, consider if the operation on the service or resource is part of a long-running or multi-step operation. It may be difficult or expensive to compensate all the other operational steps that have already succeeded when one fails. In this case, a very long interval and a large number of retries may be acceptable as long as it does not block other operations by holding or locking scarce resources. + * Consider if retrying the same operation may cause inconsistencies in data. If some parts of a multi-step process are repeated, and the operations are not idempotent, it may result in an inconsistency. For example, an operation that increments a value, if repeated, will produce an invalid result. Repeating an operation that sends a message to a queue may cause an inconsistency in the message consumer if it cannot detect duplicate messages. To prevent this, ensure that you design each step as an idempotent operation. For more information about idempotency, see [Idempotency Patterns](http://blog.jonathanoliver.com/2010/04/idempotency-patterns/). + * Consider the scope of the operations that will be retried. For example, it may be easier to implement retry code at a level that encompasses several operations, and retry them all if one fails. However, doing this may result in idempotency issues or unnecessary rollback operations. + * If you choose a retry scope that encompasses several operations, take into account the total latency of all of them when determining the retry intervals, when monitoring the time taken, and before raising alerts for failures. + * Consider how your retry strategy may affect neighbors and other tenants in a shared application, or when using shared resources and services. Aggressive retry policies can cause an increasing number of transient faults to occur for these other users and for applications that share the resources and services. Likewise, your application may be affected by the retry policies implemented by other users of the resources and services. For mission-critical applications, you may decide to use premium services that are not shared. This provides you with much more control over the load and consequent throttling of these resources and services, which can help to justify the additional cost. + +## More information +* [Azure service-specific retry guidelines](./retry-service-specific.md) +* [The Transient Fault Handling Application Block](http://msdn.microsoft.com/library/hh680934.aspx) +* [Circuit Breaker Pattern](http://msdn.microsoft.com/library/dn589784.aspx) +* [Compensating Transaction Pattern](http://msdn.microsoft.com/library/dn589804.aspx) +* [Idempotency Patterns](http://blog.jonathanoliver.com/2010/04/idempotency-patterns/) + diff --git a/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-firewalls.png b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-firewalls.png new file mode 100644 index 00000000000..c3e538d0bff Binary files /dev/null and b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-firewalls.png differ diff --git a/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-monitor.png b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-monitor.png new file mode 100644 index 00000000000..d8d15132726 Binary files /dev/null and b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-monitor.png differ diff --git a/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-proxy.png b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-proxy.png new file mode 100644 index 00000000000..18fc4293388 Binary files /dev/null and b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-proxy.png differ diff --git a/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-redundant-routers.png b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-redundant-routers.png new file mode 100644 index 00000000000..7eeb86f1740 Binary files /dev/null and b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute-redundant-routers.png differ diff --git a/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute.png b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute.png new file mode 100644 index 00000000000..4bcd7ed1b5d Binary files /dev/null and b/docs/blueprints/_images/blueprints/arch-iaas-hybrid-expressroute.png differ diff --git a/docs/blueprints/_images/blueprints/compute-multi-dc-linux.png b/docs/blueprints/_images/blueprints/compute-multi-dc-linux.png new file mode 100644 index 00000000000..1dfe3cfb5fd Binary files /dev/null and b/docs/blueprints/_images/blueprints/compute-multi-dc-linux.png differ diff --git a/docs/blueprints/_images/blueprints/compute-multi-dc.png b/docs/blueprints/_images/blueprints/compute-multi-dc.png new file mode 100644 index 00000000000..664776880c0 Binary files /dev/null and b/docs/blueprints/_images/blueprints/compute-multi-dc.png differ diff --git a/docs/blueprints/_images/blueprints/compute-multi-vm.png b/docs/blueprints/_images/blueprints/compute-multi-vm.png new file mode 100644 index 00000000000..28e60cd86c3 Binary files /dev/null and b/docs/blueprints/_images/blueprints/compute-multi-vm.png differ diff --git a/docs/blueprints/_images/blueprints/compute-n-tier-linux.png b/docs/blueprints/_images/blueprints/compute-n-tier-linux.png new file mode 100644 index 00000000000..aa11f7d99a8 Binary files /dev/null and b/docs/blueprints/_images/blueprints/compute-n-tier-linux.png differ diff --git a/docs/blueprints/_images/blueprints/compute-n-tier.png b/docs/blueprints/_images/blueprints/compute-n-tier.png new file mode 100644 index 00000000000..49b68fb3b11 Binary files /dev/null and b/docs/blueprints/_images/blueprints/compute-n-tier.png differ diff --git a/docs/blueprints/_images/blueprints/deploybutton.png b/docs/blueprints/_images/blueprints/deploybutton.png new file mode 100644 index 00000000000..e81f2c1c573 Binary files /dev/null and b/docs/blueprints/_images/blueprints/deploybutton.png differ diff --git a/docs/blueprints/_images/blueprints/hybrid-network-expressroute-vpn-failover.png b/docs/blueprints/_images/blueprints/hybrid-network-expressroute-vpn-failover.png new file mode 100644 index 00000000000..790b176da1a Binary files /dev/null and b/docs/blueprints/_images/blueprints/hybrid-network-expressroute-vpn-failover.png differ diff --git a/docs/blueprints/_images/blueprints/hybrid-network-secure-vnet-dmz.png b/docs/blueprints/_images/blueprints/hybrid-network-secure-vnet-dmz.png new file mode 100644 index 00000000000..97cb2e5d7f5 Binary files /dev/null and b/docs/blueprints/_images/blueprints/hybrid-network-secure-vnet-dmz.png differ diff --git a/docs/blueprints/_images/blueprints/hybrid-network-secure-vnet.png b/docs/blueprints/_images/blueprints/hybrid-network-secure-vnet.png new file mode 100644 index 00000000000..632e3d8ed14 Binary files /dev/null and b/docs/blueprints/_images/blueprints/hybrid-network-secure-vnet.png differ diff --git a/docs/blueprints/_images/blueprints/hybrid-network-vpn.png b/docs/blueprints/_images/blueprints/hybrid-network-vpn.png new file mode 100644 index 00000000000..c19615efd3d Binary files /dev/null and b/docs/blueprints/_images/blueprints/hybrid-network-vpn.png differ diff --git a/docs/blueprints/_images/blueprints/paas-basic-web-app-staging-slots.png b/docs/blueprints/_images/blueprints/paas-basic-web-app-staging-slots.png new file mode 100644 index 00000000000..2f48d89ad40 Binary files /dev/null and b/docs/blueprints/_images/blueprints/paas-basic-web-app-staging-slots.png differ diff --git a/docs/blueprints/_images/blueprints/paas-basic-web-app.png b/docs/blueprints/_images/blueprints/paas-basic-web-app.png new file mode 100644 index 00000000000..c6388ab61af Binary files /dev/null and b/docs/blueprints/_images/blueprints/paas-basic-web-app.png differ diff --git a/docs/blueprints/_images/blueprints/paas-basic-web-simplified.png b/docs/blueprints/_images/blueprints/paas-basic-web-simplified.png new file mode 100644 index 00000000000..3e3cb090926 Binary files /dev/null and b/docs/blueprints/_images/blueprints/paas-basic-web-simplified.png differ diff --git a/docs/blueprints/_images/blueprints/paas-web-app-multi-region.png b/docs/blueprints/_images/blueprints/paas-web-app-multi-region.png new file mode 100644 index 00000000000..1378ce0517a Binary files /dev/null and b/docs/blueprints/_images/blueprints/paas-web-app-multi-region.png differ diff --git a/docs/blueprints/_images/blueprints/paas-web-multiregion-simplified.png b/docs/blueprints/_images/blueprints/paas-web-multiregion-simplified.png new file mode 100644 index 00000000000..179ffff3204 Binary files /dev/null and b/docs/blueprints/_images/blueprints/paas-web-multiregion-simplified.png differ diff --git a/docs/blueprints/_images/blueprints/paas-web-scalability-simplified.png b/docs/blueprints/_images/blueprints/paas-web-scalability-simplified.png new file mode 100644 index 00000000000..171563904af Binary files /dev/null and b/docs/blueprints/_images/blueprints/paas-web-scalability-simplified.png differ diff --git a/docs/blueprints/_images/blueprints/paas-web-scalability.png b/docs/blueprints/_images/blueprints/paas-web-scalability.png new file mode 100644 index 00000000000..b5a2e8a2733 Binary files /dev/null and b/docs/blueprints/_images/blueprints/paas-web-scalability.png differ diff --git a/docs/blueprints/_images/compute/compute-multi-region.png b/docs/blueprints/_images/compute/compute-multi-region.png new file mode 100644 index 00000000000..69c6c0d3b4e Binary files /dev/null and b/docs/blueprints/_images/compute/compute-multi-region.png differ diff --git a/docs/blueprints/_images/compute/compute-multi-tier.png b/docs/blueprints/_images/compute/compute-multi-tier.png new file mode 100644 index 00000000000..60e4ec98de0 Binary files /dev/null and b/docs/blueprints/_images/compute/compute-multi-tier.png differ diff --git a/docs/blueprints/_images/compute/compute-multi-vm.png b/docs/blueprints/_images/compute/compute-multi-vm.png new file mode 100644 index 00000000000..2db3c8c0168 Binary files /dev/null and b/docs/blueprints/_images/compute/compute-multi-vm.png differ diff --git a/docs/blueprints/_images/compute/compute-single-vm.png b/docs/blueprints/_images/compute/compute-single-vm.png new file mode 100644 index 00000000000..886459b5d1f Binary files /dev/null and b/docs/blueprints/_images/compute/compute-single-vm.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure1.png b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure1.png new file mode 100644 index 00000000000..c2a0600d6de Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure1.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure2.png b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure2.png new file mode 100644 index 00000000000..83eebc500a2 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure2.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure3.png b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure3.png new file mode 100644 index 00000000000..0f63847a1fd Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure3.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure4.png b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure4.png new file mode 100644 index 00000000000..d0127c974f6 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure4.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure5.png b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure5.png new file mode 100644 index 00000000000..298760f2d24 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-expressroute/figure5.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-vpn/RRAS-perf-counters.png b/docs/blueprints/_images/guidance-hybrid-network-vpn/RRAS-perf-counters.png new file mode 100644 index 00000000000..285b5dc8bb8 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-vpn/RRAS-perf-counters.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-vpn/RRAS-perf-graph.png b/docs/blueprints/_images/guidance-hybrid-network-vpn/RRAS-perf-graph.png new file mode 100644 index 00000000000..955b617b2c2 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-vpn/RRAS-perf-graph.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-vpn/arch-diagram.png b/docs/blueprints/_images/guidance-hybrid-network-vpn/arch-diagram.png new file mode 100644 index 00000000000..57271d6bf5b Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-vpn/arch-diagram.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-vpn/audit-logs.png b/docs/blueprints/_images/guidance-hybrid-network-vpn/audit-logs.png new file mode 100644 index 00000000000..c4d3d500751 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-vpn/audit-logs.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network-vpn/partitioned-vpn.png b/docs/blueprints/_images/guidance-hybrid-network-vpn/partitioned-vpn.png new file mode 100644 index 00000000000..98dc37dda55 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network-vpn/partitioned-vpn.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network/figure1.png b/docs/blueprints/_images/guidance-hybrid-network/figure1.png new file mode 100644 index 00000000000..51d581283b6 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network/figure1.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network/figure2.png b/docs/blueprints/_images/guidance-hybrid-network/figure2.png new file mode 100644 index 00000000000..749f14eb9ca Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network/figure2.png differ diff --git a/docs/blueprints/_images/guidance-hybrid-network/figure3.png b/docs/blueprints/_images/guidance-hybrid-network/figure3.png new file mode 100644 index 00000000000..2e4ee269fe0 Binary files /dev/null and b/docs/blueprints/_images/guidance-hybrid-network/figure3.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure1.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure1.png new file mode 100644 index 00000000000..fe1da87a872 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure1.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure10.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure10.png new file mode 100644 index 00000000000..efdfb059254 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure10.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure11.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure11.png new file mode 100644 index 00000000000..908006c5432 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure11.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure12.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure12.png new file mode 100644 index 00000000000..33516a5b2f7 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure12.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure13.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure13.png new file mode 100644 index 00000000000..18569b32f76 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure13.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure14.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure14.png new file mode 100644 index 00000000000..e73139dc8a4 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure14.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure2.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure2.png new file mode 100644 index 00000000000..5b04d992cc5 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure2.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure3.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure3.png new file mode 100644 index 00000000000..b78ee676d1a Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure3.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure4.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure4.png new file mode 100644 index 00000000000..e8f43d34157 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure4.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure5.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure5.png new file mode 100644 index 00000000000..d535b53358a Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure5.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure6.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure6.png new file mode 100644 index 00000000000..d0424e906d4 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure6.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure7.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure7.png new file mode 100644 index 00000000000..e033a2ef618 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure7.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure8.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure8.png new file mode 100644 index 00000000000..22a874791ef Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure8.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure9.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure9.png new file mode 100644 index 00000000000..3d3514d4237 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-ad/figure9.png differ diff --git a/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-adfs/figure1.png b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-adfs/figure1.png new file mode 100644 index 00000000000..a5cf10dab27 Binary files /dev/null and b/docs/blueprints/_images/guidance-iaas-ra-secure-vnet-adfs/figure1.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad-resource-forest/figure1.png b/docs/blueprints/_images/guidance-identity-aad-resource-forest/figure1.png new file mode 100644 index 00000000000..f3780c1c9ef Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad-resource-forest/figure1.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure1.png b/docs/blueprints/_images/guidance-identity-aad/figure1.png new file mode 100644 index 00000000000..62b1a956ebd Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure1.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure10.png b/docs/blueprints/_images/guidance-identity-aad/figure10.png new file mode 100644 index 00000000000..28ba8819221 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure10.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure11.png b/docs/blueprints/_images/guidance-identity-aad/figure11.png new file mode 100644 index 00000000000..b430e739c8c Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure11.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure12.png b/docs/blueprints/_images/guidance-identity-aad/figure12.png new file mode 100644 index 00000000000..3837d6a4702 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure12.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure13.png b/docs/blueprints/_images/guidance-identity-aad/figure13.png new file mode 100644 index 00000000000..10efa01626a Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure13.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure14.png b/docs/blueprints/_images/guidance-identity-aad/figure14.png new file mode 100644 index 00000000000..ff6016a1b7c Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure14.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure15.png b/docs/blueprints/_images/guidance-identity-aad/figure15.png new file mode 100644 index 00000000000..e79f4784cec Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure15.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure16.png b/docs/blueprints/_images/guidance-identity-aad/figure16.png new file mode 100644 index 00000000000..699f5b6cb1a Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure16.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure17.png b/docs/blueprints/_images/guidance-identity-aad/figure17.png new file mode 100644 index 00000000000..2f1f00ff229 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure17.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure18.png b/docs/blueprints/_images/guidance-identity-aad/figure18.png new file mode 100644 index 00000000000..2fd54e77afb Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure18.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure19.png b/docs/blueprints/_images/guidance-identity-aad/figure19.png new file mode 100644 index 00000000000..0961ae9b728 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure19.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure2.png b/docs/blueprints/_images/guidance-identity-aad/figure2.png new file mode 100644 index 00000000000..699dc4229e5 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure2.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure20.png b/docs/blueprints/_images/guidance-identity-aad/figure20.png new file mode 100644 index 00000000000..a5c84223ade Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure20.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure21.png b/docs/blueprints/_images/guidance-identity-aad/figure21.png new file mode 100644 index 00000000000..a46fc754a55 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure21.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure22.png b/docs/blueprints/_images/guidance-identity-aad/figure22.png new file mode 100644 index 00000000000..9570c2e0191 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure22.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure23.png b/docs/blueprints/_images/guidance-identity-aad/figure23.png new file mode 100644 index 00000000000..869893b1989 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure23.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure24.png b/docs/blueprints/_images/guidance-identity-aad/figure24.png new file mode 100644 index 00000000000..70458eaeff3 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure24.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure25.png b/docs/blueprints/_images/guidance-identity-aad/figure25.png new file mode 100644 index 00000000000..bdb95c1f4c9 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure25.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure26.png b/docs/blueprints/_images/guidance-identity-aad/figure26.png new file mode 100644 index 00000000000..71c97a3478c Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure26.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure27.png b/docs/blueprints/_images/guidance-identity-aad/figure27.png new file mode 100644 index 00000000000..4fd6851e54f Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure27.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure3.png b/docs/blueprints/_images/guidance-identity-aad/figure3.png new file mode 100644 index 00000000000..6f2784d3b7a Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure3.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure5.png b/docs/blueprints/_images/guidance-identity-aad/figure5.png new file mode 100644 index 00000000000..c68eb81254a Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure5.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure6.png b/docs/blueprints/_images/guidance-identity-aad/figure6.png new file mode 100644 index 00000000000..1b80032acca Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure6.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure7.png b/docs/blueprints/_images/guidance-identity-aad/figure7.png new file mode 100644 index 00000000000..fe73ac41f23 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure7.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure8.png b/docs/blueprints/_images/guidance-identity-aad/figure8.png new file mode 100644 index 00000000000..166153dbb49 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure8.png differ diff --git a/docs/blueprints/_images/guidance-identity-aad/figure9.png b/docs/blueprints/_images/guidance-identity-aad/figure9.png new file mode 100644 index 00000000000..bbe07dd9845 Binary files /dev/null and b/docs/blueprints/_images/guidance-identity-aad/figure9.png differ diff --git a/docs/blueprints/_images/identity/figure1.png b/docs/blueprints/_images/identity/figure1.png new file mode 100644 index 00000000000..b33d2848a08 Binary files /dev/null and b/docs/blueprints/_images/identity/figure1.png differ diff --git a/docs/blueprints/_images/identity/figure2.png b/docs/blueprints/_images/identity/figure2.png new file mode 100644 index 00000000000..52f9dc76da8 Binary files /dev/null and b/docs/blueprints/_images/identity/figure2.png differ diff --git a/docs/blueprints/_images/identity/figure3.png b/docs/blueprints/_images/identity/figure3.png new file mode 100644 index 00000000000..2c80d1299c3 Binary files /dev/null and b/docs/blueprints/_images/identity/figure3.png differ diff --git a/docs/blueprints/_images/identity/figure4.png b/docs/blueprints/_images/identity/figure4.png new file mode 100644 index 00000000000..53e2d3f0b2a Binary files /dev/null and b/docs/blueprints/_images/identity/figure4.png differ diff --git a/docs/blueprints/_images/security/figure1.png b/docs/blueprints/_images/security/figure1.png new file mode 100644 index 00000000000..257b6e07397 Binary files /dev/null and b/docs/blueprints/_images/security/figure1.png differ diff --git a/docs/blueprints/_images/security/figure2.png b/docs/blueprints/_images/security/figure2.png new file mode 100644 index 00000000000..3aa19eb89e8 Binary files /dev/null and b/docs/blueprints/_images/security/figure2.png differ diff --git a/docs/blueprints/dmz/images/nva-ha/active-passive.png b/docs/blueprints/dmz/images/nva-ha/active-passive.png new file mode 100644 index 00000000000..c9da7723bf9 Binary files /dev/null and b/docs/blueprints/dmz/images/nva-ha/active-passive.png differ diff --git a/docs/blueprints/dmz/images/nva-ha/l7-ingress-egress-ag.png b/docs/blueprints/dmz/images/nva-ha/l7-ingress-egress-ag.png new file mode 100644 index 00000000000..49280075ef7 Binary files /dev/null and b/docs/blueprints/dmz/images/nva-ha/l7-ingress-egress-ag.png differ diff --git a/docs/blueprints/dmz/images/nva-ha/l7-ingress-egress.png b/docs/blueprints/dmz/images/nva-ha/l7-ingress-egress.png new file mode 100644 index 00000000000..3d7de7ec4c2 Binary files /dev/null and b/docs/blueprints/dmz/images/nva-ha/l7-ingress-egress.png differ diff --git a/docs/blueprints/dmz/images/nva-ha/l7-ingress.png b/docs/blueprints/dmz/images/nva-ha/l7-ingress.png new file mode 100644 index 00000000000..4a52eb1f987 Binary files /dev/null and b/docs/blueprints/dmz/images/nva-ha/l7-ingress.png differ diff --git a/docs/blueprints/dmz/images/nva-ha/single-nva.png b/docs/blueprints/dmz/images/nva-ha/single-nva.png new file mode 100644 index 00000000000..e17ac4603a7 Binary files /dev/null and b/docs/blueprints/dmz/images/nva-ha/single-nva.png differ diff --git a/docs/blueprints/dmz/images/secure-vnet-dmz.svg b/docs/blueprints/dmz/images/secure-vnet-dmz.svg new file mode 100644 index 00000000000..3cf24d6c452 --- /dev/null +++ b/docs/blueprints/dmz/images/secure-vnet-dmz.svg @@ -0,0 +1,858 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + +Azure Virtual Network + + + + On-premises + network + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UDR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DMZ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/dmz/images/secure-vnet-hybrid.svg b/docs/blueprints/dmz/images/secure-vnet-hybrid.svg new file mode 100644 index 00000000000..85d73c979ce --- /dev/null +++ b/docs/blueprints/dmz/images/secure-vnet-hybrid.svg @@ -0,0 +1,728 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + +Azure Virtual Network + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DMZ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/dmz/index.md b/docs/blueprints/dmz/index.md new file mode 100644 index 00000000000..0a5b04a4fd7 --- /dev/null +++ b/docs/blueprints/dmz/index.md @@ -0,0 +1,68 @@ +--- +title: Network DMZ | Architectural Blueprints +description: Explains and compares the different methods available for protecting applications and components running in Azure as part of a hybrid system from unauthorized intrusion. +layout: LandingPage +pnp.series.title: Network DMZ +pnp.series.next: secure-vnet-hybrid +--- + + +# Series overview +[!INCLUDE [header](../../_includes/header.md)] + +An on-premises network can be connected to a virtual network in Azure by using an Azure VPN gateway. The network boundary between these two environments can expose areas of weakness in terms of security, and it is necessary to protect this boundary to block unauthorized requests. Similar protection is required for applications running on VMs in Azure that are exposed to the public Internet. + + + diff --git a/docs/blueprints/dmz/nva-ha.md b/docs/blueprints/dmz/nva-ha.md new file mode 100644 index 00000000000..2cc652e530f --- /dev/null +++ b/docs/blueprints/dmz/nva-ha.md @@ -0,0 +1,117 @@ +--- +title: Deploy a high availability network virtual appliances +description: How to deploy network virtual appliances in high availability. +services: '' +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: '' +ms.assetid: d78ea9a8-a8f2-457b-a918-16341a377f5c +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 12/06/2016 +ms.author: telmos +pnp.series.prev: secure-vnet-dmz +pnp.series.title: Network DMZ +cardTitle: Deploy highly available network virtual appliances +--- +# Deploy highly availabile network virtual appliances + +This article shows how to deploy a set of network virtual appliances (NVAs) for high availability in Azure. An NVA is typically used to control the flow of network traffic from a perimeter network, also known as a DMZ, to other networks or subnets. To learn about implementing a DMZ in Azure, see [Microsoft cloud services and network security][cloud-security]. The article includes example architectures for ingress only, egress only, and both ingress and egress. + +**Prerequisites:** This article assumes a basic understanding of Azure networking, [Azure load balancers][lb-overview], and [user-defined routes][udr-overview] (UDRs). + + +## Architecture Diagrams + +An NVA can be deployed to a DMZ in many different architectures. For example, the following figure illustrates the use of a [single NVA][nva-scenario] for ingress. + +![[0]][0] + +In this architecture, the NVA provides a secure network boundary by checking all inbound and outbound network traffic and passing only the traffic that meets network security rules. However, the fact that all network traffic must pass through the NVA means that the NVA is a single point of failure in the network. If the NVA fails, there is no other path for network traffic and all the back-end subnets are unavailable. + +To make an NVA highly available, deploy more than one NVA into an availability set. + +The following architectures describe the resources and configuration necessary for highly available NVAs: + +| Solution | Benefits | Considerations | +| --- | --- | --- | +| [Ingress with layer 7 NVAs][ingress-with-layer-7] |All NVA nodes are active |Requires an NVA that can terminate connections and use SNAT
    Requires a separate set of NVAs for traffic coming from the Internet and from Azure
    Can only be used for traffic originating outside Azure | +| [Egress with layer 7 NVAs][egress-with-layer-7] |All NVA nodes are active | Requires an NVA that can terminate connections and implements source network address translation (SNAT) +| [Ingress-Egress with layer 7 NVAs][ingress-egress-with-layer-7] |All nodes are active
    Able to handle traffic originated in Azure |Requires an NVA that can terminate connections and use SNAT
    Requires a separate set of NVAs for traffic coming from the Internet and from Azure | +| [PIP-UDR switch][pip-udr-switch] |Single set of NVAs for all traffic
    Can handle all traffic (no limit on port rules) |Active-passive
    Requires a failover process | + +## Ingress with layer 7 NVAs + +The following figure shows a high availability architecture that implements an ingress DMZ behind an internet-facing load balancer. This architecture is designed to provide connectivity to Azure workloads for layer 7 traffic, such as HTTP or HTTPS: + +![[1]][1] + +The benefit of this architecture is that all NVAs are active, and if one fails the load balancer directs network traffic to the other NVA. Both NVAs route traffic to the internal load balancer so as long as one NVA is active, traffic continues to flow. The NVAs are required to terminate SSL traffic intended for the web tier VMs. These NVAs cannot be extended to handle on-premises traffic because on-premises traffic requires another dedicated set of NVAs with their own network routes. + +> [!NOTE] +> This architecture is used in the [DMZ between Azure and your on-premises datacenter][dmz-on-prem] reference architecture and the [DMZ between Azure and the Internet][dmz-internet] reference architecture. Each of these reference architectures includes a deployment solution that you can use. Follow the links for more information. + +## Egress with layer 7 NVAs + +The previous architecture can be expanded to provide an egress DMZ for requests originating in the Azure workload. The following architecture is designed to provide high availability of the NVAs in the DMZ for layer 7 traffic, such as HTTP or HTTPS: + +![[2]][2] + +In this architecture, all traffic originating in Azure is routed to an internal load balancer. The load balancer distributes outgoing requests between a set of NVAs. These NVAs direct traffic to the Internet using their individual public IP addresses. + +> [!NOTE] +> This architecture is used in the [DMZ between Azure and your on-premises datacenter][dmz-on-prem] reference architecture and the [DMZ between Azure and the Internet][dmz-internet] reference architecture. Each of these reference architectures includes a deployment solution that you can use. Follow the links for more information. + +## Ingress-egress with layer 7 NVAs + +In the two previous architectures, there was a separate DMZ for ingress and egress. The following architecture demonstrates how to create a DMZ that can be used for both ingress and egress for layer 7 traffic, such as HTTP or HTTPS: + +![[4]][4] + +In this architecture, the NVAs process incoming requests from the application gateway. The NVAs also process outgoing requests from the workload VMs in the back-end pool of the load balancer. Because incoming traffic is routed with an application gateway and outgoing traffic is routed with a load balancer, the NVAs are responsible for maintaining session affinity. That is, the application gateway maintains a mapping of inbound and outbound requests so it can forward the correct response to the original requestor. However, the internal load balancer does not have access to the application gateway mappings, and uses its own logic to send responses to the NVAs. It's possible the load balancer could send a response to an NVA that did not initially receive the request from the application gateway. In this case, the NVAs must communicate and transfer the response between them so the correct NVA can forward the response to the application gateway. + +## PIP-UDR switch with layer 4 NVAs + +The following architecture demonstrates an architecture with one active and one passive NVA. This architecture handles both ingress and egress for layer 4 traffic: + +![[3]][3] + +This architecture is similar to the first architecture discussed in this article. That architecture included a single NVA accepting and filtering incoming layer 4 requests. This architecture adds a second passive NVA to provide high availability. If the active NVA fails, the passive NVA is made active and the UDR and PIP are changed to point to the NICs on the now active NVA. These changes to the UDR and PIP can either be done manually or using an automated process. The automated process is typically daemon or other monitoring service running in Azure. It queries a health probe on the active NVA and performs the UDR and PIP switch when it detects a failure of the NVA. + +The preceding figure shows an example [ZooKeeper][zookeeper] cluster providing a high availability daemon. Within the ZooKeeper cluster, a quorum of nodes elects a leader. If the leader fails, the remaining nodes hold an election to elect a new leader. For this architecture, the leader node executes the daemon that queries the health endpoint on the NVA. If the NVA fails to respond to the health probe, the daemon activates the passive NVA. The daemon then calls the Azure REST API to remove the PIP from the failed NVA and attaches it to newly activated NVA. The daemon then modifies the UDR to point to the newly activated NVA's internal IP address. + +> [!NOTE] +> Do not include the ZooKeeper nodes in a subnet that is only accessible using a route that includes the NVA. Otherwise, the ZooKeeper nodes are inaccessible if the NVA fails. Should the daemon fail for any reason, you won't be able to access any of the ZooKeeper nodes to diagnose the problem. + + + + + +## Next steps +* Learn how to [implement a DMZ between Azure and your on-premises datacenter][dmz-on-prem] using layer-7 NVAs. +* Learn how to [implement a DMZ between Azure and the Internet][dmz-internet] using layer-7 NVAs. + + +[cloud-security]: /azure/best-practices-network-security +[dmz-on-prem]: ./secure-vnet-hybrid.md +[dmz-internet]: ./secure-vnet-dmz.md +[egress-with-layer-7]: #egress-with-layer-7-nvas +[ingress-with-layer-7]: #ingress-with-layer-7-nvas +[ingress-egress-with-layer-7]: #ingress-egress-with-layer-7-nvas +[lb-overview]: /azure/load-balancer/load-balancer-overview/ +[nva-scenario]: /azure/virtual-network/virtual-network-scenario-udr-gw-nva/ +[pip-udr-switch]: #pip-udr-switch-with-layer-4-nvas +[udr-overview]: /azure/virtual-network/virtual-networks-udr-overview/ +[zookeeper]: https://zookeeper.apache.org/ + + +[0]: ./images/nva-ha/single-nva.png "Single NVA architecture" +[1]: ./images/nva-ha/l7-ingress.png "Layer 7 ingress" +[2]: ./images/nva-ha/l7-ingress-egress.png "Layer 7 egress" +[3]: ./images/nva-ha/active-passive.png "Active-Passive cluster" +[4]: ./images/nva-ha/l7-ingress-egress-ag.png diff --git a/docs/blueprints/dmz/secure-vnet-dmz.md b/docs/blueprints/dmz/secure-vnet-dmz.md new file mode 100644 index 00000000000..0eb06f1b20f --- /dev/null +++ b/docs/blueprints/dmz/secure-vnet-dmz.md @@ -0,0 +1,151 @@ +--- +title: Implementing a DMZ between Azure and the Internet +description: >- + How to implement a secure hybrid network architecture with Internet access in + Azure. +services: 'guidance,vpn-gateway,expressroute,load-balancer,virtual-network' +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: azure-resource-manager +pnp.series.title: Network DMZ +pnp.series.next: nva-ha +ms.assetid: 6fbc8d25-815d-4e98-9ade-d423ddf19dc9 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/23/2016 +ms.author: telmos +pnp.series.prev: secure-vnet-hybrid +cardTitle: DMZ between Azure and the Internet +--- +# Between Azure and the Internet +[!INCLUDE [header](../../_includes/header.md)] + +This article describes best practices for implementing a secure hybrid network that extends your on-premises network and also accepts Internet traffic to Azure. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager](/azure/azure-resource-manager/resource-group-overview) and classic. This reference architecture uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +This reference architecture extends the architecture described in [Implementing a DMZ between Azure and your on-premises datacenter][implementing-a-secure-hybrid-network-architecture]. This reference architecture adds a public DMZ that handles Internet traffic, in addition to the private DMZ that handles traffic from the on-premises network + +Typical uses for this architecture include: + +* Hybrid applications where workloads run partly on-premises and partly in Azure. +* Azure infrastructure that routes incoming traffic from on-premises and the Internet. + +## Architecture diagram + +The following diagram highlights the important components in this architecture: + +> A Visio document that includes this architecture diagram is available for download from the [Microsoft download center][visio-download]. This diagram is on the "DMZ - Public" page. +> +> + +[![0]][0] + +To enable Internet traffic to Azure, the architecture includes the following components: + +* **Public IP address (PIP)**. The IP address of the public endpoint. External users connected to the Internet can access the system through this address. +* **Network virtual appliance (NVA)**. This architecture includes a separate pool of NVAs for traffic originating on the Internet. +* **Azure load balancer**. All incoming requests from the Internet pass through the load balancer and are distributed to the NVAs in the public DMZ. +* **Public DMZ inbound subnet**. This subnet accepts requests from the Azure load balancer. Incoming requests are passed to one of the NVAs in the public DMZ. +* **Public DMZ outbound subnet**. Requests that are approved by the NVA pass through this subnet to the internal load balancer for the web tier. + +## Recommendations + +The following recommendations apply for most scenarios. Follow these recommendations unless you have a specific requirement that overrides them. + +### NVA recommendations + +Use one set of NVAs for traffic originating on the Internet, and another for traffic originating on-premises. Using only one set of NVAs for both is a security risk, because it provides no security perimeter between the two sets of network traffic. Using separate NVAs reduces the complexity of checking security rules, and makes it clear which rules correspond to each incoming network request. One set of NVAs implements rules for Internet traffic only, while another set of NVAs implement rules for on-premises traffic only. + +Include a layer-7 NVA to terminate application connections at the NVA level and maintain compatibility with the backend tiers. This guarantees symmetric connectivity where response traffic from the backend tiers returns through the NVA. + +### Public load balancer recommendations + +For scalability and availability, deploy the public DMZ NVAs in an [availability set][availability-set] and use an [Internet facing load balancer][load-balancer] to distribute Internet requests across the NVAs in the availability set. + +Configure the load balancer to accept requests only on the ports necessary for Internet traffic. For example, restrict inbound HTTP requests to port 80 and inbound HTTPS requests to port 443. + +## Scalability considerations + +Even if your architecture initially requires a single NVA in the public DMZ, we recommend putting a load balancer in front of the public DMZ from the beginning. That will make it easier to scale to multiple NVAs in the future, if needed. + +## Availability considerations + +The Internet facing load balancer requires each NVA in the public DMZ inbound subnet to implement a [health probe][lb-probe]. A health probe that fails to respond on this endpoint is considered to be unavailable, and the load balancer will direct requests to other NVAs in the same availability set. Note that if all NVAs fail to respond, your application will fail, so it's important to have monitoring configured to alert DevOps when the number of healthy NVA instances falls below a defined threshold. + +## Manageability considerations + +All monitoring and management for the NVAs in the public DMZ should be be performed by the jumpbox in the management subnet. As discussed in [Implementing a DMZ between Azure and your on-premises datacenter][implementing-a-secure-hybrid-network-architecture], define a single network route from the on-premises network through the gateway to the jumpbox, in order to restrict access. + +If gateway connectivity from your on-premises network to Azure is down, you can still reach the jumpbox by deploying a public IP address, adding it to the jumpbox, and logging in from the Internet. + +## Security considerations + +This reference architecture implements multiple levels of security: + +* The Internet facing load balancer directs requests to the NVAs in the inbound public DMZ subnet, and only on the ports necessary for the application. +* The NSG rules for the inbound and outbound public DMZ subnets prevent the NVAs from being compromised, by blocking requests that fall outside of the NSG rules. +* The NAT routing configuration for the NVAs directs incoming requests on port 80 and port 443 to the web tier load balancer, but ignores requests on all other ports. + +You should log all incoming requests on all ports. Regularly audit the logs, paying attention to requests that fall outside of expected parameters, as these may indicate intrusion attempts. + +## Solution deployment + +A deployment for a reference architecture that implements these recommendations is available on [GitHub][github-folder]. The reference architecture can be deployed either with Windows or Linux VMs by following the directions below: + +1. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-secure-vnet-dmz%2FvirtualNetwork.azuredeploy.json) +2. Once the link has opened in the Azure portal, you must enter values for some of the settings: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-public-dmz-network-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Select the **Os Type** from the drop down box, **windows** or **linux**. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +3. Wait for the deployment to complete. +4. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-secure-vnet-dmz%2Fworkload.azuredeploy.json) +5. Once the link has opened in the Azure portal, you must enter values for some of the settings: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-public-dmz-wl-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +6. Wait for the deployment to complete. +7. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-secure-vnet-dmz%2Fsecurity.azuredeploy.json) +8. Once the link has opened in the Azure portal, you must enter values for some of the settings: + + * The **Resource group** name is already defined in the parameter file, so select **Use Existing** and enter `ra-public-dmz-network-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +9. Wait for the deployment to complete. +10. The parameter files include hard-coded administrator user name and password for all VMs, and it is strongly recommended that you immediately change both. For each VM in the deployment, select it in the Azure portal and then click **Reset password** in the **Support + troubleshooting** blade. Select **Reset password** in the **Mode** drop down box, then select a new **User name** and **Password**. Click the **Update** button to save. + + +[availability-set]: /azure/virtual-machines/virtual-machines-windows-manage-availability +[github-folder]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-secure-vnet-dmz + +[implementing-a-multi-tier-architecture-on-Azure]: ./guidance-compute-3-tier-vm.md +[implementing-a-secure-hybrid-network-architecture]: ./secure-vnet-hybrid.md +[iptables]: https://help.ubuntu.com/community/IptablesHowTo +[lb-probe]: /azure/load-balancer/load-balancer-custom-probe-overview +[load-balancer]: /azure/load-balancer/load-balancer-Internet-overview +[network-security-group]: /azure/virtual-network/virtual-networks-nsg + +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx + +[0]: ../_images/blueprints/hybrid-network-secure-vnet-dmz.png "Secure hybrid network architecture" \ No newline at end of file diff --git a/docs/blueprints/dmz/secure-vnet-hybrid.md b/docs/blueprints/dmz/secure-vnet-hybrid.md new file mode 100644 index 00000000000..f90338ec0fc --- /dev/null +++ b/docs/blueprints/dmz/secure-vnet-hybrid.md @@ -0,0 +1,248 @@ +--- +title: Implementing a secure hybrid network architecture in Azure +description: How to implement a secure hybrid network architecture in Azure. +services: 'guidance,vpn-gateway,expressroute,load-balancer,virtual-network' +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: azure-resource-manager +pnp.series.title: Network DMZ +pnp.series.prev: ./index +ms.assetid: 4a299a51-5925-4875-9164-c89a82dff035 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/23/2016 +ms.author: telmos +pnp.series.next: secure-vnet-dmz +cardTitle: DMZ between Azure and on-premises +--- +# Between Azure and your on-premises datacenter +[!INCLUDE [header](../../_includes/header.md)] + +This article describes best practices for implementing a secure hybrid network that extends an on-premises network to Azure. This reference architecture implements a DMZ (also called a perimeter network) between an on-premises network and an Azure virtual network. The DMZ includes highly available network virtual appliances (NVAs) that implement security functionality such as firewalls and packet inspection. All outgoing traffic from the VNet is force-tunneled to the Internet through the on-premises network, so that it can be audited. + +This architecture requires a connection to your on-premises datacenter, using either a [VPN gateway][ra-vpn] or an [ExpressRoute][ra-expressroute] connection. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager](/azure/azure-resource-manager/resource-group-overview) and classic. This reference architecture uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +Typical uses for this architecture include: + +* Hybrid applications where workloads run partly on-premises and partly in Azure. +* Infrastructure that requires granular control over traffic entering an Azure VNet from an on-premises datacenter. +* Applications that must audit outgoing traffic. This is often a regulatory requirement of many commercial systems and can help to prevent public disclosure of private information. + +## Architecture diagram +The following diagram highlights the important components in this architecture: + +> A Visio document that includes this architecture diagram is available for download from the [Microsoft download center][visio-download]. This diagram is on the "DMZ - Private" page. +> +> + +[![0]][0] + +* **On-premises network**. A private local-area network implemented in an organization. +* **Azure virtual network (VNet)**. The VNet hosts the application and other resources running in Azure. +* **Gateway**. The gateway provides connectivity between the routers in the on-premises network and the VNet. +* **Network virtual appliance (NVA)**. NVA is a generic term that describes a VM performing tasks such as allowing or denying access as a firewall, optimizing wide area network (WAN) operations (including network compression), custom routing, or other network functionality. +* **Web tier, business tier, and data tier subnets**. Subnets hosting the VMs and services that implement an example 3-tier application running in the cloud. See [Running Windows VMs for an N-tier architecture on Azure][ra-n-tier] for more information. +* **User defined routes (UDR)**. [User defined routes][udr-overview] define the flow of IP traffic within Azure VNets. + + > [!NOTE] + > Depending on the requirements of your VPN connection, you can configure Border Gateway Protocol (BGP) routes instead of using UDRs to implement the forwarding rules that direct traffic back through the on-premises network. + > + > + +* **Management subnet.** This subnet contains VMs that implement management and monitoring capabilities for the components running in the VNet. + +## Recommendations + +The following recommendations apply for most scenarios. Follow these recommendations unless you have a specific requirement that overrides them. + +### Access control recommendations + +Use [Role-Based Access Control ][rbac] (RBAC) to manage the resources in your application. Consider creating the following [custom roles][rbac-custom-roles]: + +- A DevOps role with permissions to administer the infrastructure for the application, deploy the application components, and monitor and restart VMs. + +- A centralized IT administrator role to manage and monitor network resources. + +- A security IT administrator role to manage secure network resources such as the NVAs. + +The DevOps and IT administrator roles should not have access to the NVA resources. This should be restricted to the security IT administrator role. + +### Resource group recommendations + +Azure resources such as VMs, VNets, and load balancers can be easily managed by grouping them together into resource groups. Assign RBAC roles to each resource group to restrict access. + +We recommend creating the following resource groups: + +* A resource group containing the subnets (excluding the VMs), NSGs, and the gateway resources for connecting to the on-premises network. Assign the centralized IT administrator role to this resource group. +* A resource group containing the VMs for the NVAs (including the load balancer), the jumpbox and other management VMs, and the UDR for the gateway subnet that forces all traffic through the NVAs. Assign the security IT administrator role to this resource group. +* Separate resource groups for each application tier that contain the load balancer and VMs. Note that this resource group shouldn't include the subnets for each tier. Assign the DevOps role to this resource group. + +### Virtual network gateway recommendations + +On-premises traffic passes to the VNet through a virtual network gateway. We recommend an [Azure VPN gateway][guidance-vpn-gateway] or an [Azure ExpressRoute gateway][guidance-expressroute]. + +### NVA recommendations + +NVAs provide different services for managing and monitoring network traffic. The Azure Marketplace offers several third-party vendor NVAs, including: + +* [Barracuda Web Application Firewall][barracuda-waf] and [Barracuda NextGen Firewall][barracuda-nf] +* [Cohesive Networks VNS3 Firewall/Router/VPN][vns3] +* [Fortinet FortiGate-VM][fortinet] +* [SecureSphere Web Application Firewall][securesphere] +* [DenyAll Web Application Firewall][denyall] +* [Check Point vSEC][checkpoint] +* [Kemp LoadMaster Load Balancer ADC Content Switch][kemp-loadmaster] + +If none of these third-party NVAs meet your requirements, you can create a custom NVA using VMs. As an example of creating custom NVAs, the solution deployment for this reference architecture implements the following functionality: + +* Traffic is routed using [IP forwarding][ip-forwarding] on the NVA network interfaces (NICs). +* Traffic is permitted to pass through the NVA only if it is appropriate to do so. Each NVA VM in the reference architecture is a simple Linux router. Inbound traffic arrives on network interface *eth0*, and outbound traffic matches rules defined by custom scripts dispatched through network interface *eth1*. +* The NVAs can only be configured from the management subnet. +* Traffic routed to the management subnet does not pass through the NVAs. Otherwise, if the NVAs fail, there would be no route to the management subnet to fix them. +* The VMs for the NVA are placed in an [availability set][availability-set] behind a load balancer. The UDR in the gateway subnet directs NVA requests to the load balancer. + +Include a layer-7 NVA to terminate application connections at the NVA level and maintain affinity with the backend tiers. This guarantees symmetric connectivity, in which response traffic from the backend tiers returns through the NVA. + +Another option to consider is connecting multiple NVAs in series, with each NVA performing a specialized security task. This allows each security function to be managed on a per-NVA basis. For example, an NVA implementing a firewall could be placed in series with an NVA running identity services. The tradeoff for ease of management is the addition of extra network hops that may increase latency, so ensure that this doesn't affect your application's performance. + + +### NSG recommendations + +The VPN gateway exposes a public IP address for the connection to the on-premises network. We recommend creating a network security group (NSG) for the inbound NVA subnet, with rules to block all traffic not originating from the on-premises network. + +We also recommend NSGs for each subnet to provide a second level of protection against inbound traffic bypassing an incorrectly configured or disabled NVA. For example, the web tier subnet in the reference architecture implements an NSG with a rule to ignore all requests other than those received from the on-premises network (192.168.0.0/16) or the VNet, and another rule that ignores all requests not made on port 80. + +### Internet access recommendations + +[Force-tunnel][azure-forced-tunneling] all outbound Internet traffic through your on-premises network using the site-to-site VPN tunnel, and route to the Internet using network address translation (NAT). This prevents accidental leakage of any confidential information stored in your data tier and allows inspection and auditing of all outgoing traffic. + +> [!NOTE] +> Don't completely block Internet traffic from the application tiers, as this will prevent these tiers from using Azure PaaS services that rely on public IP addresses, such as VM diagnostics logging, downloading of VM extensions, and other functionality. Azure diagnostics also requires that components can read and write to an Azure Storage account. +> +> + +Verify that outbound internet traffic is force-tunneled correctly. If you're using a VPN connection with the [routing and remote access service][routing-and-remote-access-service] on an on-premises server, use a tool such as [WireShark][wireshark] or [Microsoft Message Analyzer](https://www.microsoft.com/download/details.aspx?id=44226). + +### Management subnet recommendations + +The management subnet contains a jumpbox that performs management and monitoring functionality. Restrict execution of all secure management tasks to the jumpbox. + +Do not create a public IP address for the jumpbox. Instead, create one route to access the jumpbox through the incoming gateway. Create NSG rules so the management subnet only responds to requests from the allowed route. + +## Scalability considerations + +The reference architecture uses a load balancer to direct on-premises network traffic to a pool of NVA devices, which route the traffic. The NVAs are placed in an [availability set][availability-set]. This design allows you to monitor the throughput of the NVAs over time and add NVA devices in response to increases in load. + +The standard SKU VPN gateway supports sustained throughput of up to 100 Mbps. The High Performance SKU provides up to 200 Mbps. For higher bandwidths, consider upgrading to an ExpressRoute gateway. ExpressRoute provides up to 10 Gbps bandwidth with lower latency than a VPN connection. + +For more information about the scalability of Azure gateways, see the scalability consideration section in [Implementing a hybrid network architecture with Azure and on-premises VPN][guidance-vpn-gateway-scalability] and [Implementing a hybrid network architecture with Azure ExpressRoute][guidance-expressroute-scalability]. + +## Availability considerations + +As mentioned, the reference architecture uses a pool of NVA devices behind a load balancer. The load balancer uses a health probe to monitor each NVA and will remove any unresponsive NVAs from the pool. + +If you're using Azure ExpressRoute to provide connectivity between the VNet and on-premises network, [configure a VPN gateway to provide failover][ra-vpn-failover] if the ExpressRoute connection becomes unavailable. + +For specific information on maintaining availability for VPN and ExpressRoute connections, see the availability considerations in [Implementing a hybrid network architecture with Azure and on-premises VPN][guidance-vpn-gateway-availability] and [Implementing a hybrid network architecture with Azure ExpressRoute][guidance-expressroute-availability]. + +## Manageability considerations + +All application and resource monitoring should be performed by the jumpbox in the management subnet. Depending on your application requirements, you may need additional monitoring resources in the management subnet. If so, these resources should be accessed through the jumpbox. + +If gateway connectivity from your on-premises network to Azure is down, you can still reach the jumpbox by deploying a public IP address, adding it to the jumpbox, and remoting in from the internet. + +Each tier's subnet in the reference architecture is protected by NSG rules. You may need to create a rule to open port 3389 for remote desktop protocol (RDP) access on Windows VMs or port 22 for secure shell (SSH) access on Linux VMs. Other management and monitoring tools may require rules to open additional ports. + +If you're using ExpressRoute to provide the connectivity between your on-premises datacenter and Azure, use the [Azure Connectivity Toolkit (AzureCT)][azurect] to monitor and troubleshoot connection issues. + +You can find additional information specifically aimed at monitoring and managing VPN and ExpressRoute connections in the articles [Implementing a hybrid network architecture with Azure and on-premises VPN][guidance-vpn-gateway-manageability] and [Implementing a hybrid network architecture with Azure ExpressRoute][guidance-expressroute-manageability]. + +## Security considerations + +This reference architecture implements multiple levels of security. + +### Routing all on-premises user requests through the NVA +The UDR in the gateway subnet blocks all user requests other than those received from on-premises. The UDR passes allowed requests to the NVAs in the private DMZ subnet, and these requests are passed on to the application if they are allowed by the NVA rules. You can add other routes to the UDR, but make sure they don't inadvertently bypass the NVAs or block administrative traffic intended for the management subnet. + +The load balancer in front of the NVAs also acts as a security device by ignoring traffic on ports that are not open in the load balancing rules. The load balancers in the reference architecture only listen for HTTP requests on port 80 and HTTPS requests on port 443. Document any additional rules that you add to the load balancers, and monitor traffic to ensure there are no security issues. + +### Using NSGs to block/pass traffic between application tiers +Traffic between tiers is restricted by using NSGs. The business tier blocks all traffic that doesn't originate in the web tier, and the data tier blocks all traffic that doesn't originate in the business tier. If you have a requirement to expand the NSG rules to allow broader access to these tiers, weigh these requirements against the security risks. Each new inbound pathway represents an opportunity for accidental or purposeful data leakage or application damage. + +### DevOps access +Use [RBAC][rbac] to restrict the operations that DevOps can perform on each tier. When granting permissions, use the [principle of least privilege][security-principle-of-least-privilege]. Log all administrative operations and perform regular audits to ensure any configuration changes were planned. + +## Solution deployment + +A deployment for a reference architecture that implements these recommendations is available on [GitHub][github-folder]. The reference architecture can be deployed by following the directions below: + +1. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-secure-vnet%2Fazuredeploy.json) +2. Once the link has opened in the Azure portal, you must enter values for some of the settings: + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-private-dmz-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +3. Wait for the deployment to complete. +4. The parameter files include hard-coded administrator user name and password for all VMs, and it is strongly recommended that you immediately change both. For each VM in the deployment, select it in the Azure portal and then click **Reset password** in the **Support + troubleshooting** blade. Select **Reset password** in the **Mode** drop down box, then select a new **User name** and **Password**. Click the **Update** button to save. + +## Next steps + +* Learn how to implement a [DMZ between Azure and the Internet](secure-vnet-dmz.md). +* Learn how to implement a [highly available hybrid network architecture][ra-vpn-failover]. +* For more information about managing network security with Azure, see [Microsoft cloud services and network security][cloud-services-network-security]. +* For detailed information about protecting resources in Azure, see [Getting started with Microsoft Azure security][getting-started-with-azure-security]. +* For additional details on addressing security concerns across an Azure gateway connection, see [Implementing a hybrid network architecture with Azure and on-premises VPN][guidance-vpn-gateway-security] and [Implementing a hybrid network architecture with Azure ExpressRoute][guidance-expressroute-security]. +> + + + +[availability-set]: /azure/virtual-machines/virtual-machines-windows-create-availability-set +[azurect]: https://github.com/Azure/NetworkMonitoring/tree/master/AzureCT +[azure-forced-tunneling]: https://azure.microsoft.com/en-gb/documentation/articles/vpn-gateway-forced-tunneling-rm/ +[barracuda-nf]: https://azure.microsoft.com/marketplace/partners/barracudanetworks/barracuda-ng-firewall/ +[barracuda-waf]: https://azure.microsoft.com/marketplace/partners/barracudanetworks/waf/ +[checkpoint]: https://azure.microsoft.com/marketplace/partners/checkpoint/check-point-r77-10/ +[cloud-services-network-security]: https://azure.microsoft.com/documentation/articles/best-practices-network-security/ +[denyall]: https://azure.microsoft.com/marketplace/partners/denyall/denyall-web-application-firewall/ +[fortinet]: https://azure.microsoft.com/marketplace/partners/fortinet/fortinet-fortigate-singlevmfortigate-singlevm/ +[getting-started-with-azure-security]: /azure/security/azure-security-getting-started +[github-folder]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-secure-vnet +[guidance-expressroute]: ../hybrid-networking/expressroute.md +[guidance-expressroute-availability]: ../hybrid-networking/expressroute.md#availability-considerations +[guidance-expressroute-manageability]: ../hybrid-networking/expressroute.md#manageability-considerations +[guidance-expressroute-security]: ../hybrid-networking/expressroute.md#security-considerations +[guidance-expressroute-scalability]: ../hybrid-networking/expressroute.md#scalability-considerations +[guidance-vpn-gateway]: ../hybrid-networking/vpn.md +[guidance-vpn-gateway-availability]: ../hybrid-networking/vpn.md#availability-considerations +[guidance-vpn-gateway-manageability]: ../hybrid-networking/vpn.md#manageability-considerations +[guidance-vpn-gateway-scalability]: ../hybrid-networking/vpn.md#scalability-considerations +[guidance-vpn-gateway-security]: ../hybrid-networking/vpn.md#security-considerations +[ip-forwarding]: /azure/virtual-network/virtual-networks-udr-overview#ip-forwarding +[kemp-loadmaster]: https://azure.microsoft.com/marketplace/partners/kemptech/vlm-azure/ +[ra-expressroute]: ../hybrid-networking/expressroute.md +[ra-n-tier]: ../virtual-machines-windows/n-tier.md +[ra-vpn]: ../hybrid-networking/vpn.md +[ra-vpn-failover]: ../hybrid-networking/expressroute-vpn-failover.md +[rbac]: /azure/active-directory/role-based-access-control-configure +[rbac-custom-roles]: /azure/active-directory/role-based-access-control-custom-roles +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[routing-and-remote-access-service]: https://technet.microsoft.com/library/dd469790(v=ws.11).aspx +[securesphere]: https://azuremarketplace.microsoft.com/en-us/marketplace/apps/imperva.securesphere-waf +[security-principle-of-least-privilege]: https://msdn.microsoft.com/library/hdb58b2f(v=vs.110).aspx#Anchor_1 +[udr-overview]: /azure/virtual-network/virtual-networks-udr-overview +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vns3]: https://azure.microsoft.com/marketplace/partners/cohesive/cohesiveft-vns3-for-azure/ +[wireshark]: https://www.wireshark.org/ +[0]: ../_images/blueprints/hybrid-network-secure-vnet.png "Secure hybrid network architecture" \ No newline at end of file diff --git a/docs/blueprints/dmz/series.yml b/docs/blueprints/dmz/series.yml new file mode 100644 index 00000000000..53ca73fe9b4 --- /dev/null +++ b/docs/blueprints/dmz/series.yml @@ -0,0 +1,5 @@ +--- +columns: 2 +summary: Explains and compares the different methods available for protecting applications and components running in Azure as part of a hybrid system from unauthorized intrusion. +description: An on-premises network can be connected to a virtual network in Azure by using an Azure VPN gateway. The network boundary between these two environments can expose areas of weakness in terms of security, and it is necessary to protect this boundary to block unauthorized requests. Similar protection is required for applications running on VMs in Azure that are exposed to the public Internet. +--- \ No newline at end of file diff --git a/docs/blueprints/hybrid-networking/considerations.md b/docs/blueprints/hybrid-networking/considerations.md new file mode 100644 index 00000000000..eb56eaab6a5 --- /dev/null +++ b/docs/blueprints/hybrid-networking/considerations.md @@ -0,0 +1,105 @@ +--- +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance +--- + +# Choose a solution for connecting an on-premises network to Azure + +This article compares solutions for connecting an on-premises network to an Azure Virtual Network (VNet), including benefits and considerations. We provide a reference architecture and a deployable solution for each option. + +## Virtual private network (VPN) connection + +Traffic flows between your on-premises network and an Azure VNet through an IPSec VPN tunnel. + +[![VPN](./images/vpn.svg)][vpn] + +This architecture is suitable for hybrid applications where the traffic between on-premises hardware and the cloud is likely to be light, or you are willing to trade slightly extended latency for the flexibility and processing power of the cloud. + + + + + + + + + + +
    BenefitsConsiderations
    +
    • Simple to configure.
    +
    +
      +
    • Requires an on-premises VPN device.
    • +
    • Although Microsoft guarantees 99.9% availability for each VPN Gateway, this SLA only covers the VPN gateway, and not your network connection to the gateway.
    • +
    • A VPN connection over Azure VPN Gateway currently supports a maximum of 200 Mbps bandwidth. You may need to partition your Azure virtual network across multiple VPN connections if you expect to exceed this throughput.
    • +
    +
    + +[Read more...][vpn] + +## Azure ExpressRoute connection + +ExpressRoute connections use a private, dedicated connection through a third-party connectivity provider. The private connection extends your on-premises network into Azure. + +[![ExpressRoute](./images/expressroute.svg)][expressroute] + +This architecture is suitable for hybrid applications running large-scale, mission-critical workloads that require a high degree of scalability. + + + + + + + + + + +
    BenefitsConsiderations
    +
      +
    • Much higher bandwidth available; up to 10 Gbps depending on the connectivity provider.
    • +
    • Supports dynamic scaling of bandwidth to help reduce costs during periods of lower demand. However, not all connectivity providers have this option.
    • +
    • May allow your organization direct access to national clouds, depending on the connectivity provider.
    • +
    • 99.9% availability SLA across the entire connection.
    • +
    +
    +
      +
    • Can be complex to set up. Creating an ExpressRoute connection requires working with a third-party connectivity provider. The provider is responsible for provisioning the network connection.
    • +
    • Requires high-bandwidth routers on-premises.
    • +
    +
    + +[Read more...][expressroute] + +## ExpressRoute with VPN failover + +[![ExpressRoute with VPN ](./images/expressroute-vpn-failover.svg)][expressroute-vpn-failover] + +This architecture is suitable for hybrid applications that need the higher bandwidth of ExpressRoute, and also require highly available network connectivity. + + + + + + + + + + +
    BenefitsConsiderations
    +
      +
    • High availability if the ExpressRoute circuit fails, although the fallback connection is on a lower bandwidth network.
    • +
    +
    +
      +
    • Complex to configure. You need to set up both a VPN connection and an ExpressRoute circuit.
    • +
    • Requires redundant hardware (VPN appliances), and a redundant Azure VPN Gateway connection for which you pay charges.
    • +
    +
    + +[Read more...][expressroute-vpn-failover] + + +[expressroute]: ./expressroute.md +[expressroute-vpn-failover]: ./expressroute-vpn-failover.md +[vpn]: ./vpn.md \ No newline at end of file diff --git a/docs/blueprints/hybrid-networking/expressroute-vpn-failover.md b/docs/blueprints/hybrid-networking/expressroute-vpn-failover.md new file mode 100644 index 00000000000..6e44b4aa2d5 --- /dev/null +++ b/docs/blueprints/hybrid-networking/expressroute-vpn-failover.md @@ -0,0 +1,169 @@ +--- +title: Implementing a highly available hybrid network architecture +description: >- + How to implement a secure site-to-site network architecture that spans an + Azure virtual network and an on-premises network connected using ExpressRoute + with VPN gateway failover. +services: 'guidance,virtual-network,vpn-gateway,expressroute' +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: azure-resource-manager +pnp.series.title: Connect an on-premises network to Azure +pnp.series.prev: expressroute +ms.assetid: c0a6f115-ec55-4f98-8cca-606d5a98a3cd +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/28/2016 +ms.author: telmos +cardTitle: Improving availability +--- +# Connect an on-premises network to Azure using ExpressRoute with VPN failover + +This reference architecture shows how to connect an on-premises network to an Azure virtual network (VNet) using ExpressRoute, with a site-to-site virtual private network (VPN) as a failover connection. Traffic flows between the on-premises network and the Azure VNet through an ExpressRoute connection. If there is a loss of connectivity in the ExpressRoute circuit, traffic is routed through an IPSec VPN tunnel. [**Deploy this solution**.](#deploy-the-solution) + +Note that if the ExpressRoute circuit is unavailable, the VPN route will only handle private peering connections. Public peering and Microsoft peering connections will pass over the Internet. + + +![[0]][0] + +## Architecture + +The architecture consists of the following components. + +* **On-premises network**. A private local-area network running within an organization. + +* **VPN appliance**. A device or service that provides external connectivity to the on-premises network. The VPN appliance may be a hardware device, or it can be a software solution such as the Routing and Remote Access Service (RRAS) in Windows Server 2012. + + > [!NOTE] + > For a list of supported VPN appliances and information on configuring selected VPN appliances for connecting to Azure, see [About VPN devices for Site-to-Site VPN Gateway connections][vpn-appliance]. + > + > + +* **ExpressRoute circuit**. A layer 2 or layer 3 circuit supplied by the connectivity provider that joins the on-premises network with Azure through the edge routers. The circuit uses the hardware infrastructure managed by the connectivity provider. + +* **ExpressRoute virtual network gateway**. The ExpressRoute virtual network gateway enables the VNet to connect to the ExpressRoute circuit used for connectivity with your on-premises network. + +* **VPN virtual network gateway**. The VPN virtual network gateway enables the VNet to connect to the VPN appliance in the on-premises network. The VPN virtual network gateway is configured to accept requests from the on-premises network only through the VPN appliance. For more information, see [Connect an on-premises network to a Microsoft Azure virtual network][connect-to-an-Azure-vnet]. + +* **VPN connection**. The connection has properties that specify the connection type (IPSec) and the key shared with the on-premises VPN appliance to encrypt traffic. + +* **Azure Virtual Network (VNet)**. Each VNet resides in a single Azure region, and can host multiple application tiers. Application tiers can be segmented using subnets in each VNet. + +* **Gateway subnet**. The virtual network gateways are held in the same subnet. + +* **Cloud application**. The application hosted in Azure. It might include multiple tiers, with multiple subnets connected through Azure load balancers. The traffic in each subnet may be subject to rules defined using [network security groups][azure-network-security-group](NSGs). For more information, see [Getting started with Microsoft Azure security][getting-started-with-azure-security]. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager](/azure/azure-resource-manager/resource-group-overview) and classic. This reference architecture uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +## Recommendations + +The following recommendations apply for most scenarios. Follow these recommendations unless you have a specific requirement that overrides them. + +### VNet and GatewaySubnet + +Create the ExpressRoute virtual network gateway and the VPN virtual network gateway in the same VNet. This means that they should share the same subnet named *GatewaySubnet*. + +If the VNet already includes a subnet named *GatewaySubnet*, ensure that it has a /27 or larger address space. If the existing subnet is too small, use the following PowerShell command to remove the subnet: + +```powershell +$vnet = Get-AzureRmVirtualNetworkGateway -Name -ResourceGroupName +Remove-AzureRmVirtualNetworkSubnetConfig -Name GatewaySubnet -VirtualNetwork $vnet +``` + +If the VNet does not contain a subnet named **GatewaySubnet**, create a new one using the following Powershell command: + +```powershell +$vnet = Get-AzureRmVirtualNetworkGateway -Name -ResourceGroupName +Add-AzureRmVirtualNetworkSubnetConfig -Name "GatewaySubnet" -VirtualNetwork $vnet -AddressPrefix "10.200.255.224/27" +$vnet = Set-AzureRmVirtualNetwork -VirtualNetwork $vnet +``` + +### VPN and ExpressRoute gateways + +Verify that your organization meets the [ExpressRoute prerequisite requirements][expressroute-prereq] for connecting to Azure. + +If you already have a VPN virtual network gateway in your Azure VNet, use the following Powershell command to remove it: + +```powershell +Remove-AzureRmVirtualNetworkGateway -Name -ResourceGroupName +``` + +Follow the instructions in [Implementing a hybrid network architecture with Azure ExpressRoute][implementing-expressroute] to establish your ExpressRoute connection. + +Follow the instructions in [Implementing a hybrid network architecture with Azure and On-premises VPN][implementing-vpn] to establish your VPN virtual network gateway connection. + +After you have established the virtual network gateway connections, test the environment as follows: + +1. Make sure you can connect from your on-premises network to your Azure VNet. +2. Contact your provider to stop ExpressRoute connectivity for testing. +3. Verify that you can still connect from your on-premises network to your Azure VNet using the VPN virtual network gateway connection. +4. Contact your provider to reestablish ExpressRoute connectivity. + +## Considerations + +For ExpressRoute considerations, see the [Implementing a Hybrid Network Architecture with Azure ExpressRoute][guidance-expressroute] guidance. + +For site-to-site VPN considerations, see the [Implementing a Hybrid Network Architecture with Azure and On-premises VPN][guidance-vpn] guidance. + +For general Azure security considerations, see [Microsoft cloud services and network security][best-practices-security]. + +## Deploy the solution + +**Prequisites.** You must have an existing on-premises infrastructure already configured with a suitable network appliance. + +To deploy the solution, perform the following steps. + +1. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-vpn-er%2Fazuredeploy.json) +2. Wait for the link to open in the Azure portal, then follow these steps: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-hybrid-vpn-er-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +3. Wait for the deployment to complete. +4. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-vpn-er%2Fazuredeploy-expressRouteCircuit.json) +5. Wait for the link to open in the Azure portal, then enter then follow these steps: + + * Select **Use existing** in the **Resource group** section and enter `ra-hybrid-vpn-er-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. + + + +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[vpn-appliance]: /azure/vpn-gateway/vpn-gateway-about-vpn-devices +[azure-vpn-gateway]: /azure/vpn-gateway/vpn-gateway-about-vpngateways +[connect-to-an-Azure-vnet]: https://technet.microsoft.com/library/dn786406.aspx +[azure-network-security-group]: /azure/virtual-network/virtual-networks-nsg +[getting-started-with-azure-security]: /azure/security/azure-security-getting-started +[expressroute-prereq]: /azure/expressroute/expressroute-prerequisites +[implementing-expressroute]: ./expressroute.md +[implementing-vpn]: ./vpn.md +[guidance-expressroute]: ./expressroute.md +[guidance-vpn]: ./vpn.md +[best-practices-security]: /azure/best-practices-network-security +[solution-script]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn-er/Deploy-ReferenceArchitecture.ps1 +[solution-script-bash]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn-er/deploy-reference-architecture.sh +[vnet-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn-er/parameters/virtualNetwork.parameters.json +[virtualnetworkgateway-vpn-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn-er/parameters/virtualNetworkGateway-vpn.parameters.json +[virtualnetworkgateway-expressroute-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn-er/parameters/virtualNetworkGateway-expressRoute.parameters.json +[er-circuit-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn-er/parameters/expressRouteCircuit.parameters.json +[azure-powershell-download]: https://azure.microsoft.com/documentation/articles/powershell-install-configure/ +[naming conventions]: /azure/guidance/guidance-naming-conventions +[azure-cli]: https://azure.microsoft.com/documentation/articles/xplat-cli-install/ +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[0]: ../_images/blueprints/hybrid-network-expressroute-vpn-failover.png "Architecture of a highly available hybrid network architecture using ExpressRoute and VPN gateway" +[ARM-Templates]: https://azure.microsoft.com/documentation/articles/resource-group-authoring-templates/ \ No newline at end of file diff --git a/docs/blueprints/hybrid-networking/expressroute.md b/docs/blueprints/hybrid-networking/expressroute.md new file mode 100644 index 00000000000..9b2e0101678 --- /dev/null +++ b/docs/blueprints/hybrid-networking/expressroute.md @@ -0,0 +1,320 @@ +--- +title: Connect an on-premises network to Azure using ExpressRoute +description: >- + How to implement a secure site-to-site network architecture that spans an + Azure virtual network and an on-premises network connected using Azure + ExpressRoute. +services: '' +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: '' +pnp.series.title: Connect an on-premises network to Azure +pnp.series.next: expressroute-vpn-failover +pnp.series.prev: vpn +ms.assetid: 3ae3311f-04e8-4f7e-bdad-2d995dcccd6c +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/28/2016 +ms.author: telmos +cardTitle: ExpressRoute +--- +# Connect an on-premises network to Azure using ExpressRoute + +This referernce architure shows how to connect an on-premises network to virtual networks on Azure, using [Azure ExpressRoute][expressroute-introduction]. ExpressRoute connections use a private, dedicated connection through a third-party connectivity provider. The private connection extends your on-premises network into Azure. [**Deploy this solution**.](#deploy-the-solution) + +![[0]][0] + +## Architecture + +The architecture consists of the following components. + +* **On-premises corporate network**. A private local-area network running within an organization. + +* **ExpressRoute circuit**. A layer 2 or layer 3 circuit supplied by the connectivity provider that joins the on-premises network with Azure through the edge routers. The circuit uses the hardware infrastructure managed by the connectivity provider. + +* **Local edge routers**. Routers that connect the on-premises network to the circuit managed by the provider. Depending on how your connection is provisioned, you may need to provide the public IP addresses used by the routers. +* **Microsoft edge routers**. Two routers in an active-active highly available configuration. These routers enable a connectivity provider to connect their circuits directly to their datacenter. Depending on how your connection is provisioned, you may need to provide the public IP addresses used by the routers. + +* **Azure virtual networks (VNets)**. Each VNet resides in a single Azure region, and can host multiple application tiers. Application tiers can be segmented using subnets in each VNet. + +* **Azure public services**. Azure services that can be used within a hybrid application. These services are also available over the Internet, but accessing them using an ExpressRoute circuit provides low latency and more predictable performance, because traffic does not go through the Internet. Connections are performed using [public peering][expressroute-peering], with addresses that are either owned by your organization or supplied by your connectivity provider. + +* **Office 365 services**. The publicly available Office 365 applications and services provided by Microsoft. Connections are performed using [Microsoft peering][expressroute-peering], with addresses that are either owned by your organization or supplied by your connectivity provider. + + > [!NOTE] + > You can also connect directly to Microsoft CRM Online through Microsoft peering. + > + > + +* **Connectivity providers** (not shown). Companies that provide a connection either using layer 2 or layer 3 connectivity between your datacenter and an Azure datacenter. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager](/azure/azure-resource-manager/resource-group-overview) and classic. This article uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +## Recommendations + +The following recommendations apply for most scenarios. Follow these recommendations unless you have a specific requirement that overrides them. + +### Connectivity providers + +Select a suitable ExpressRoute connectivity provider for your location. To get a list of connectivity providers available at your location, use the following Azure PowerShell command: + +```powershell +Get-AzureRmExpressRouteServiceProvider +``` + +ExpressRoute connectivity providers connect your datacenter to Microsoft in the following ways: + +* **Co-located at a cloud exchange**. If you're co-located in a facility with a cloud exchange, you can order virtual cross-connections to Azure through the co-location provider’s Ethernet exchange. Co-location providers can offer either layer 2 cross-connections, or managed layer 3 cross-connections between your infrastructure in the co-location facility and Azure. +* **Point-to-point Ethernet connections**. You can connect your on-premises datacenters/offices to Azure through point-to-point Ethernet links. Point-to-point Ethernet providers can offer layer 2 connections, or managed layer 3 connections between your site and Azure. +* **Any-to-any (IPVPN) networks**. You can integrate your wide area network (WAN) with Azure. Internet protocol virtual private network (IPVPN) providers (typically a multiprotocol label switching VPN) offer any-to-any connectivity between your branch offices and datacenters. Azure can be interconnected to your WAN to make it look just like any other branch office. WAN providers typically offer managed layer 3 connectivity. + +For more information about connectivity providers, see the [ExpressRoute introduction][expressroute-introduction]. + +### ExpressRoute circuit + +Ensure that your organization has met the [ExpressRoute prerequisite requirements][expressroute-prereqs] for connecting to Azure. + +If you haven't already done so, add a subnet named `GatewaySubnet` to your Azure VNet and create an ExpressRoute virtual network gateway using the Azure VPN gateway service. For more information about this process, see [ExpressRoute workflows for circuit provisioning and circuit states][ExpressRoute-provisioning]. + +Create an ExpressRoute circuit as follows: + +1. Run the following PowerShell command: + + ```powershell + New-AzureRmExpressRouteCircuit -Name <> -ResourceGroupName <> -Location <> -SkuTier <> -SkuFamily <> -ServiceProviderName <> -PeeringLocation <> -BandwidthInMbps <> + ``` +2. Send the `ServiceKey` for the new circuit to the service provider. + +3. Wait for the provider to provision the circuit. To verify the provisioning state of a circuit, run the following PowerShell command: + + ```powershell + Get-AzureRmExpressRouteCircuit -Name <> -ResourceGroupName <> + ``` + + The `Provisioning state` field in the `Service Provider` section of the output will change from `NotProvisioned` to `Provisioned` when the circuit is ready. + + > [!NOTE] + > If you're using a layer 3 connection, the provider should configure and manage routing for you. You provide the information necessary to enable the provider to implement the appropriate routes. + > + > + +4. If you're using a layer 2 connection: + + 1. Reserve two /30 subnets composed of valid public IP addresses for each type of peering you want to implement. These /30 subnets will be used to provide IP addresses for the routers used for the circuit. If you are implementing private, public, and Microsoft peering, you'll need 6 /30 subnets with valid public IP addresses. + + 2. Configure routing for the ExpressRoute circuit. Run the following PowerShell commands for each type of peering you want to configure (private, public, and Microsoft). For more information, see [Create and modify routing for an ExpressRoute circuit][configure-expressroute-routing]. + + ```powershell + Set-AzureRmExpressRouteCircuitPeeringConfig -Name <> -Circuit <> -PeeringType <> -PeerASN <> -PrimaryPeerAddressPrefix <> -SecondaryPeerAddressPrefix <> -VlanId <> + + Set-AzureRmExpressRouteCircuit -ExpressRouteCircuit <> + ``` + + 3. Reserve another pool of valid public IP addresses to use for network address translation (NAT) for public and Microsoft peering. It is recommended to have a different pool for each peering. Specify the pool to your connectivity provider, so they can configure border gateway protocol (BGP) advertisements for those ranges. + +5. Run the following PowerShell commands to link your private VNet(s) to the ExpressRoute circuit. For more information,see [Link a virtual network to an ExpressRoute circuit][link-vnet-to-expressroute]. + + ```powershell + $circuit = Get-AzureRmExpressRouteCircuit -Name <> -ResourceGroupName <> + $gw = Get-AzureRmVirtualNetworkGateway -Name <> -ResourceGroupName <> + New-AzureRmVirtualNetworkGatewayConnection -Name <> -ResourceGroupName <> -Location < -VirtualNetworkGateway1 $gw -PeerId $circuit.Id -ConnectionType ExpressRoute + ``` + +You can connect multiple VNets located in different regions to the same ExpressRoute circuit, as long as all VNets and the ExpressRoute circuit are located within the same geopolitical region. + +### Troubleshooting + +If a previously functioning ExpressRoute circuit now fails to connect, in the absence of any configuration changes on-premises or within your private VNet, you may need to contact the connectivity provider and work with them to correct the issue. Use the following Powershell commands to verify that the ExpressRoute circuit has been provisioned: + +```powershell +Get-AzureRmExpressRouteCircuit -Name <> -ResourceGroupName <> +``` + +The output of this command shows several properties for your circuit, including `ProvisioningState`, `CircuitProvisioningState`, and `ServiceProviderProvisioningState` as shown below. + +``` +ProvisioningState : Succeeded +Sku : { + "Name": "Standard_MeteredData", + "Tier": "Standard", + "Family": "MeteredData" + } +CircuitProvisioningState : Enabled +ServiceProviderProvisioningState : NotProvisioned +``` + +If the `ProvisioningState` is not set to `Succeeded` after you tried to create a new circuit, remove the circuit by using the command below and try to create it again. + +```powershell +Remove-AzureRmExpressRouteCircuit -Name <> -ResourceGroupName <> +``` + +If your provider had already provisioned the circuit, and the `ProvisioningState` is set to `Failed`, or the `CircuitProvisioningState` is not `Enabled`, contact your provider for further assistance. + +## Scalability considerations + +ExpressRoute circuits provide a high bandwidth path between networks. Generally, the higher the bandwidth the greater the cost. + +ExpressRoute offers two [pricing plans][expressroute-pricing] to customers, a metered plan and an unlimited data plan. Charges vary according to circuit bandwidth. Available bandwidth will likely vary from provider to provider. Use the `Get-AzureRmExpressRouteServiceProvider` cmdlet to see the providers available in your region and the bandwidths that they offer. + +A single ExpressRoute circuit can support a certain number of peerings and VNet links. See [ExpressRoute limits](/azure/azure-subscription-service-limits) for more information. + +For an extra charge, the ExpressRoute Premium add-on provides some additional capability: + +* Increased route limits for public and private peering. +* Increased number of VNet links per ExpressRoute circuit. +* Global connectivity for services. + +See [ExpressRoute pricing][expressroute-pricing] for details. + +ExpressRoute circuits are designed to allow temporary network bursts up to two times the bandwidth limit that you procured for no additional cost. This is achieved by using redundant links. However, not all connectivity providers support this feature. Verify that your connectivity provider enables this feature before depending on it. + +Although some providers allow you to change your bandwidth, make sure you pick an initial bandwidth that surpasses your needs and provides room for growth. If you need to increase bandwidth in the future, you are left with two options: + +- Increase the bandwidth. You should avoid this option as much as possible, and not all providers allow you to increase bandwidth dynamically. But if a bandwidth increase is needed, check with your provider to verify they support changing ExpressRoute bandwidth properties via Powershell commands. If they do, run the commands below. + + ```powershell + $ckt = Get-AzureRmExpressRouteCircuit -Name <> -ResourceGroupName <> + $ckt.ServiceProviderProperties.BandwidthInMbps = <> + Set-AzureRmExpressRouteCircuit -ExpressRouteCircuit $ckt + ``` + + You can increase the bandwidth without loss of connectivity. Downgrading the bandwidth will result in disruption in connectivity, because you must delete the circuit and recreate it with the new configuration. + +- Change your pricing plan and/or upgrade to Premium. To do so, run the following commands. The `Sku.Tier` property can be `Standard` or `Premium`; the `Sku.Name` property can be `MeteredData` or `UnlimitedData`. + + ```powershell + $ckt = Get-AzureRmExpressRouteCircuit -Name <> -ResourceGroupName <> + + $ckt.Sku.Tier = "Premium" + $ckt.Sku.Family = "MeteredData" + $ckt.Sku.Name = "Premium_MeteredData" + + Set-AzureRmExpressRouteCircuit -ExpressRouteCircuit $ckt + ``` + + > [!IMPORTANT] + > Make sure the `Sku.Name` property matches the `Sku.Tier` and `Sku.Family`. If you change the family and tier, but not the name, your connection will be disabled. + > + > + + You can upgrade the SKU without disruption, but you cannot switch from the unlimited pricing plan to metered. When downgrading the SKU, your bandwidth consumption must remain within the default limit of the standard SKU. + +## Availability considerations + +ExpressRoute does not support router redundancy protocols such as hot standby routing protocol (HSRP) and virtual router redundancy protocol (VRRP) to implement high availability. Instead, it uses a redundant pair of BGP sessions per peering. To facilitate highly-available connections to your network, Azure provisions you with two redundant ports on two routers (part of the Microsoft edge) in an active-active configuration. + +By default, BGP sessions use an idle timeout value of 60 seconds. If a session times out three times (180 seconds total), the router is marked as unavailable, and all traffic is redirected to the remaining router. This 180-second timeout might be too long for critical applications. If so, you can change your BGP time-out settings on the on-premises router to a smaller value. + +You can configure high availability for your Azure connection in different ways, depending on the type of provider you use, and the number of ExpressRoute circuits and virtual network gateway connections you're willing to configure. The following summarizes your availability options: + +* If you're using a layer 2 connection, deploy redundant routers in your on-premises network in an active-active configuration. Connect the primary circuit to one router, and the secondary circuit to the other. This will give you a highly available connection at both ends of the connection. This is necessary if you require the ExpressRoute service level agreement (SLA). See [SLA for Azure ExpressRoute][sla-for-expressroute] for details. + + The following diagram shows a configuration with redundant on-premises routers connected to the primary and secondary circuits. Each circuit handles the traffic for a public peering and a private peering (each peering is designated a pair of /30 address spaces, as described in the previous section). + + ![[1]][1] + +* If you're using a layer 3 connection, verify that it provides redundant BGP sessions that handle availability for you. + +* Connect the VNet to multiple ExpressRoute circuits, supplied by different service providers. This strategy provides additional high-availability and disaster recovery capabilities. + +* Configure a site-to-site VPN as a failover path for ExpressRoute. For more about this option, see [Connect an on-premises network to Azure using ExpressRoute with VPN failover][highly-available-network-architecture]. + This option only applies to private peering. For Azure and Office 365 services, the Internet is the only failover path. + +## Manageability considerations + +You can use the [Azure Connectivity Toolkit (AzureCT)][azurect] to monitor connectivity between your on-premises datacenter and Azure. + +## Security considerations + +You can configure security options for your Azure connection in different ways, depending on your security concerns and compliance needs. + +ExpressRoute operates in layer 3. Threats in the application layer can be prevented by using a network security appliance that restricts traffic to legitimate resources. Additionally, ExpressRoute connections using public peering can only be initiated from on-premises. This prevents a rogue service from accessing and compromising on-premises data from the Internet. + +To maximize security, add network security appliances between the on-premises network and the provider edge routers. This will help to restrict the inflow of unauthorized traffic from the VNet: + +![[2]][2] + +For auditing or compliance purposes, it may be necessary to prohibit direct access from components running in the VNet to the Internet and implement [forced tunneling][forced-tuneling]. In this situation, Internet traffic should be redirected back through a proxy running on-premises where it can be audited. The proxy can be configured to block unauthorized traffic flowing out, and filter potentially malicious inbound traffic. + +![[3]][3] + +To maximize security, do not enable a public IP address for your VMs, and use NSGs to ensure that these VMs aren't publicly accessible. VMs should only be available using the internal IP address. These addresses can be made accessible through the ExpressRoute network, enabling on-premises DevOps staff to perform configuration or maintenance. + +If you must expose management endpoints for VMs to an external network, use NSGs or access control lists to restrict the visibility of these ports to a whitelist of IP addresses or networks. + +> [!NOTE] +> By default, Azure VMs deployed through the Azure portal include a public IP address that provides login access. +> +> + + +## Deploy the solution + +**Prequisites.** You must have an existing on-premises infrastructure already configured with a suitable network appliance. + +To deploy the solution, perform the following steps. + +1. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-er%2Fazuredeploy.json) +2. Wait for the link to open in the Azure portal, then follow these steps: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-hybrid-er-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +3. Wait for the deployment to complete. +4. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-er%2Fazuredeploy-expressRouteCircuit.json) +5. Wait for the link to open in the Azure portal, then follow these steps: + + * Select **Use existing** in the **Resource group** section and enter `ra-hybrid-er-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +6. Wait for the deployment to complete. + + + +[forced-tuneling]: ../dmz/secure-vnet-hybrid.md +[highly-available-network-architecture]: ./expressroute-vpn-failover.md +[naming-conventions]: /azure/guidance/guidance-naming-conventions + +[expressroute-technical-overview]: /azure/expressroute/expressroute-introduction +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[azure-powershell]: /azure/powershell-azure-resource-manager +[expressroute-prereqs]: /azure/expressroute/expressroute-prerequisites +[configure-expressroute-routing]: /azure/expressroute/expressroute-howto-routing-arm +[sla-for-expressroute]: https://azure.microsoft.com/support/legal/sla/expressroute/v1_0/ +[link-vnet-to-expressroute]: /azure/expressroute/expressroute-howto-linkvnet-arm +[ExpressRoute-provisioning]: /azure/expressroute/expressroute-workflows +[expressroute-introduction]: /azure/expressroute/expressroute-introduction +[expressroute-peering]: /azure/expressroute/expressroute-circuit-peerings +[expressroute-pricing]: https://azure.microsoft.com/pricing/details/expressroute/ +[expressroute-limits]: /azure/azure-subscription-service-limits#networking-limits +[sample-script]: #sample-solution-script +[azurect]: https://github.com/Azure/NetworkMonitoring/tree/master/AzureCT +[arm-templates]: /azure/resource-group-authoring-templates +[solution-script]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-er/Deploy-ReferenceArchitecture.ps1 +[solution-script-bash]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-er/deploy-reference-architecture.sh +[vnet-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-er/parameters/virtualNetwork.parameters.json +[virtualnetworkgateway-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-er/parameters/virtualNetworkGateway.parameters.json +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[er-circuit-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-er/parameters/expressRouteCircuit.parameters.json +[azure-powershell-download]: https://azure.microsoft.com/documentation/articles/powershell-install-configure/ +[azure-cli]: https://azure.microsoft.com/documentation/articles/xplat-cli-install/ +[0]: ../_images/guidance-hybrid-network-expressroute/figure1.png "Hybrid network architecture using Azure ExpressRoute" +[1]: ../_images/guidance-hybrid-network-expressroute/figure2.png "Using redundant routers with ExpressRoute primary and secondary circuits" +[2]: ../_images/guidance-hybrid-network-expressroute/figure3.png "Adding security devices to the on-premises network" +[3]: ../_images/guidance-hybrid-network-expressroute/figure4.png "Using forced tunneling to audit Internet-bound traffic" +[4]: ../_images/guidance-hybrid-network-expressroute/figure5.png "Locating the ServiceKey of an ExpressRoute circuit" \ No newline at end of file diff --git a/docs/blueprints/hybrid-networking/images/expressroute-vpn-failover.svg b/docs/blueprints/hybrid-networking/images/expressroute-vpn-failover.svg new file mode 100644 index 00000000000..b070ca55b6f --- /dev/null +++ b/docs/blueprints/hybrid-networking/images/expressroute-vpn-failover.svg @@ -0,0 +1,545 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + +Azure Virtual Network + + rect4743 + + + + g5181 + + path3 + + + + path5 + + + + path7 + + + + path9 + + + + circle11 + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + On-premises network + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + VPN Connection + + + + + + + + + + ExpressRoute + + diff --git a/docs/blueprints/hybrid-networking/images/expressroute.svg b/docs/blueprints/hybrid-networking/images/expressroute.svg new file mode 100644 index 00000000000..8541d5f68c7 --- /dev/null +++ b/docs/blueprints/hybrid-networking/images/expressroute.svg @@ -0,0 +1,485 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + +Azure Virtual Network + + rect4743 + + + + g5181 + + path3 + + + + path5 + + + + path7 + + + + path9 + + + + circle11 + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + On-premises network + + + + + + + + + + + + + + + + + + + + + + + + + + ExpressRoute + + + + + + diff --git a/docs/blueprints/hybrid-networking/images/vpn.svg b/docs/blueprints/hybrid-networking/images/vpn.svg new file mode 100644 index 00000000000..b7f48b6f901 --- /dev/null +++ b/docs/blueprints/hybrid-networking/images/vpn.svg @@ -0,0 +1,607 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + text10286 + Azure Virtual Network + + + + + +Azure Virtual Network + + + rect4743 + + + + g5181 + + path3 + + + + path5 + + + + path7 + + + + path9 + + + + circle11 + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + On-premises network + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + VPN Connection + + + + + + + diff --git a/docs/blueprints/hybrid-networking/index.md b/docs/blueprints/hybrid-networking/index.md new file mode 100644 index 00000000000..ad40e216c7f --- /dev/null +++ b/docs/blueprints/hybrid-networking/index.md @@ -0,0 +1,68 @@ +--- +title: Connect an on-premises network to Azure | Architectural Blueprints +description: Recommended architectures for secure, robust network connections between on-premises networks and Azure. +layout: LandingPage +pnp.series.title: Connect an on-premises network to Azure +pnp.series.next: vpn +--- + + +# Series overview +[!INCLUDE [header](../../_includes/header.md)] + +Many organizations wish to integrate an existing on-premises infrastructure with Azure. A key part of this scenario is to establish a secure and robust network connection between the on-premises network and Azure. + + + diff --git a/docs/blueprints/hybrid-networking/series.yml b/docs/blueprints/hybrid-networking/series.yml new file mode 100644 index 00000000000..063132b6814 --- /dev/null +++ b/docs/blueprints/hybrid-networking/series.yml @@ -0,0 +1,5 @@ +--- +columns: 3 +summary: Recommended architectures for secure, robust network connections between on-premises networks and Azure. +description: Many organizations wish to integrate an existing on-premises infrastructure with Azure. A key part of this scenario is to establish a secure and robust network connection between the on-premises network and Azure. +--- \ No newline at end of file diff --git a/docs/blueprints/hybrid-networking/vpn.md b/docs/blueprints/hybrid-networking/vpn.md new file mode 100644 index 00000000000..b81513528f2 --- /dev/null +++ b/docs/blueprints/hybrid-networking/vpn.md @@ -0,0 +1,537 @@ +--- +title: Connect an on-premises network to Azure using VPN +description: >- + How to implement a secure site-to-site network architecture that spans an + Azure virtual network and an on-premises network connected using a VPN. +services: '' +documentationcenter: na +author: RohitSharma-pnp +manager: christb +editor: '' +tags: '' +pnp.series.title: Connect an on-premises network to Azure +pnp.series.next: expressroute +ms.assetid: b6e3c36b-c9c1-4427-9b85-7b9f7f69141e +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/28/2016 +ms.author: roshar +pnp.series.prev: ./index +cardTitle: VPN +--- +# Connect an on-premises network to Azure using a VPN gateway + +This reference architure shows how to extend an on-premises network to Azure, using a site-to-site virtual private network (VPN). Traffic flows between the on-premises network and an Azure Virtual Network (VNet) through an IPSec VPN tunnel. [**Deploy this solution**.](#deploy-the-solution) + +![[0]][0] + +## Architecture + +The architecture consists of the following components. + +* **On-premises network**. A private local-area network running within an organization. + +* **VPN appliance**. A device or service that provides external connectivity to the on-premises network. The VPN appliance may be a hardware device, or it can be a software solution such as the Routing and Remote Access Service (RRAS) in Windows Server 2012. + + > [!NOTE] + > For a list of supported VPN appliances and information on configuring them to connect to an Azure VPN gateway, see the instructions for the selected device in the article [About VPN devices for Site-to-Site VPN Gateway connections][vpn-appliance]. + > + > + +* **Virtual network (VNet)**. The cloud application and the components for the Azure VPN gateway reside in the same [VNet][azure-virtual-network]. + +* **Azure VPN gateway**. The [VPN gateway][azure-vpn-gateway] service enables you to connect the VNet to the on-premises network through a VPN appliance. For more information, see [Connect an on-premises network to a Microsoft Azure virtual network][connect-to-an-Azure-vnet]. The VPN gateway includes the following elements: + + * **Virtual network gateway**. A resource that provides a virtual VPN appliance for the VNet. It is responsible for routing traffic from the on-premises network to the VNet. + * **Local network gateway**. An abstraction of the on-premises VPN appliance. Network traffic from the cloud application to the on-premises network is routed through this gateway. + * **Connection**. The connection has properties that specify the connection type (IPSec) and the key shared with the on-premises VPN appliance to encrypt traffic. + * **Gateway subnet**. The virtual network gateway is held in its own subnet, which is subject to various requirements, described in the Recommendations section below. + +* **Cloud application**. The application hosted in Azure. It might include multiple tiers, with multiple subnets connected through Azure load balancers. The traffic in each subnet may be subject to rules defined by using [Network Security Groups (NSGs)][azure-network-security-group]. For more information, see [Getting started with Microsoft Azure security][getting-started-with-azure-security]. + + > [!NOTE] + > This article does not focus on the application infrastructure. See [Running an N-tier architecture on Azure][implementing-a-multi-tier-architecture-on-Azure] for detailed information. + > + > + +* **Internal load balancer**. Network traffic from the VPN gateway is routed to the cloud application through an internal load balancer. The load balancer is located in the front-end subnet of the application. + +> [!NOTE] +> Azure has two different deployment models: [Azure Resource Manager][resource-manager-overview] and classic. This reference architecture uses Resource Manager, which Microsoft recommends for new deployments. +> +> + + + +## Recommendations + +The following recommendations apply for most scenarios. Follow these recommendations unless you have a specific requirement that overrides them. + +### VNet and gateway subnet + +Create an Azure VNet with an address space large enough for all of your required resources. Ensure that the VNet address space has sufficient room for growth if additional VMs are likely to be needed in the future. The address space of the VNet must not overlap with the on-premises network. For example, the diagram above uses the address space 10.20.0.0/16 for the VNet. + +Create a subnet named *GatewaySubnet*, with an address range of /27. This subnet is required by the virtual network gateway. Allocating 32 addresses to this subnet will help to prevent reaching gateway size limitations in the future. Also, avoid placing this subnet in the middle of the address space. A good practice is to set the address space for the gateway subnet at the upper end of the VNet address space. The example shown in the diagram uses 10.20.255.224/27. Here is a quick procedure to calculate the [CIDR]: + +1. Set the variable bits in the address space of the VNet to 1, up to the bits being used by the gateway subnet, then set the remaining bits to 0. +2. Convert the resulting bits to decimal and express it as an address space with the prefix length set to the size of the gateway subnet. + +For example, for a VNet with an IP address range of 10.20.0.0/16, applying step #1 above becomes 10.20.0b11111111.0b11100000. Converting that to decimal and expressing it as an address space yields 10.20.255.224/27. + +> [!WARNING] +> Do not deploy any VMs to the gateway subnet. Also, do not assign an NSG to this subnet, as it will cause the gateway to stop functioning. +> +> + +### Virtual network gateway + +Allocate a public IP address for the virtual network gateway. + +Create the virtual network gateway in the gateway subnet and assign it the newly allocated public IP address. Use the gateway type that most closely matches your requirements and that is enabled by your VPN appliance: + +- Create a [policy-based gateway][policy-based-routing] if you need to closely control how requests are routed based on policy criteria such as address prefixes. Policy-based gateways use static routing, and only work with site-to-site connections. + +- Create a [route-based gateway][route-based-routing] if you connect to the on-premises network using RRAS, support multi-site or cross-region connections, or implement VNet-to-VNet connections (including routes that traverse multiple VNets). Route-based gateways use dynamic routing to direct traffic between networks. They can tolerate failures in the network path better than static routes because they can try alternative routes. Route-based gateways can also reduce the management overhead because routes might not need to be updated manually when network addresses change. + +For a list of supported VPN appliances, see [About VPN devices for Site-to-Site VPN Gateway connections][vpn-appliances]. + +> [!NOTE] +> After the gateway has been created, you cannot change between gateway types without deleting and re-creating the gateway. +> +> + +Select the Azure VPN gateway SKU that most closely matches your throughput requirements. Azure VPN gateway is available in three SKUs shown in the following table. + +| SKU | VPN Throughput | Max IPSec Tunnels | +| --- | --- | --- | +| Basic |100 Mbps |10 | +| Standard |100 Mbps |10 | +| High Performance |200 Mbps |30 | + +> [!NOTE] +> The Basic SKU is not compatible with Azure ExpressRoute. You can [change the SKU][changing-SKUs] after the gateway has been created. +> +> + +You are charged based on the amount of time that the gateway is provisioned and available. See [VPN Gateway Pricing][azure-gateway-charges]. + +Create routing rules for the gateway subnet that direct incoming application traffic from the gateway to the internal load balancer, rather than allowing requests to pass directly to the application VMs. + +### On-premises network connection + +Create a local network gateway. Specify the public IP address of the on-premises VPN appliance, and the address space of the on-premises network. Note that the on-premises VPN appliance must have a public IP address that can be accessed by the local network gateway in Azure VPN Gateway. The VPN device cannot be located behind a network address translation (NAT) device. + +Create a site-to-site connection for the virtual network gateway and the local network gateway. Select the site-to-site (IPSec) connection type, and specify the shared key. Site-to-site encryption with the Azure VPN gateway is based on the IPSec protocol, using preshared keys for authentication. You specify the key when you create the Azure VPN gateway. You must configure the VPN appliance running on-premises with the same key. Other authentication mechanisms are not currently supported. + +Ensure that the on-premises routing infrastructure is configured to forward requests intended for addresses in the Azure VNet to the VPN device. + +Open any ports required by the cloud application in the on-premises network. + +Test the connection to verify that: + +* The on-premises VPN appliance correctly routes traffic to the cloud application through the Azure VPN gateway. +* The VNet correctly routes traffic back to the on-premises network. +* Prohibited traffic in both directions is blocked correctly. + +## Scalability considerations + +You can achieve limited vertical scalability by moving from the Basic or Standard VPN Gateway SKUs to the High Performance VPN SKU. + +For VNets that expect a large volume of VPN traffic, consider distributing the different workloads into separate smaller VNets and configuring a VPN gateway for each of them. + +You can partition the VNet either horizontally or vertically. To partition horizontally, move some VM instances from each tier into subnets of the new VNet. The result is that each VNet has the same structure and functionality. To partition vertically, redesign each tier to divide the functionality into different logical areas (such as handling orders, invoicing, customer account management, and so on). Each functional area can then be placed in its own VNet. + +Replicating an on-premises Active Directory domain controller in the VNet, and implementing DNS in the VNet, can help to reduce some of the security-related and administrative traffic flowing from on-premises to the cloud. For more information, see [Extending Active Directory Domain Services (AD DS) to Azure][adds-extend-domain]. + +## Availability considerations + +If you need to ensure that the on-premises network remains available to the Azure VPN gateway, implement a failover cluster for the on-premises VPN gateway. + +If your organization has multiple on-premises sites, create [multi-site connections][vpn-gateway-multi-site] to one or more Azure VNets. This approach requires dynamic (route-based) routing, so make sure that the on-premises VPN gateway supports this feature. + +For details about service level agreements, see [SLA for VPN Gateway][sla-for-vpn-gateway]. + +## Manageability considerations + +Monitor diagnostic information from on-premises VPN appliances. This process depends on the features provided by the VPN appliance. For example, if you are using the Routing and Remote Access Service on Windows Server 2012, [RRAS logging][rras-logging]. + +Use [Azure VPN gateway diagnostics][gateway-diagnostic-logs] to capture information about connectivity issues. These logs can be used to track information such as the source and destinations of connection requests, which protocol was used, and how the connection was established (or why the attempt failed). + +Monitor the operational logs of the Azure VPN gateway using the audit logs available in the Azure portal. Separate logs are available for the local network gateway, the Azure network gateway, and the connection. This information can be used to track any changes made to the gateway, and can be useful if a previously functioning gateway stops working for some reason. + +![[2]][2] + +Monitor connectivity, and track connectivity failure events. You can use a monitoring package such as [Nagios][nagios] to capture and report this information. + +## Security considerations + +Generate a different shared key for each VPN gateway. Use a strong shared key to help resist brute-force attacks. + +> [!NOTE] +> Currently, you cannot use Azure Key Vault to preshare keys for the Azure VPN gateway. +> +> + +Ensure that the on-premises VPN appliance uses an encryption method that is [compatible with the Azure VPN gateway][vpn-appliance-ipsec]. For policy-based routing, the Azure VPN gateway supports the AES256, AES128, and 3DES encryption algorithms. Route-based gateways support AES256 and 3DES. + +If your on-premises VPN appliance is on a perimeter network (DMZ) that has a firewall between the perimeter network and the Internet, you might have to configure [additional firewall rules][additional-firewall-rules] to allow the site-to-site VPN connection. + +If the application in the VNet sends data to the Internet, consider [implementing forced tunneling][forced-tunneling] to route all Internet-bound traffic through the on-premises network. This approach enables you to audit outgoing requests made by the application from the on-premises infrastructure. + +> [!NOTE] +> Forced tunneling can impact connectivity to Azure services (the Storage Service, for example) and the Windows license manager. +> +> + + +## Troubleshooting + +For general information on troubleshooting common VPN-related errors, see [Troubleshooting common VPN related errors][troubleshooting-vpn-errors]. + +The following recommendations are useful for determining if your on-premises VPN appliance is functioning correctly. + +- **Check any log files generated by the VPN appliance for errors or failures.** + + This will help you determine if the VPN appliance is functioning correctly. The location of this information will vary according to your appliance. For example, if you are using RRAS on Windows Server 2012, you can use the following PowerShell command to display error event information for the RRAS service: + + ```PowerShell + Get-EventLog -LogName System -EntryType Error -Source RemoteAccess | Format-List -Property * + ``` + + The *Message* property of each entry provides a description of the error. Some common examples are: + + - Inability to connect, possibly due to an incorrect IP address specified for the Azure VPN gateway in the RRAS VPN network interface configuration. + + ``` + EventID : 20111 + MachineName : on-prem-vm + Data : {41, 3, 0, 0} + Index : 14231 + Category : (0) + CategoryNumber : 0 + EntryType : Error + Message : RoutingDomainID- {00000000-0000-0000-0000-000000000000}: A demand dial connection to the remote + interface AzureGateway on port VPN2-4 was successfully initiated but failed to complete + successfully because of the following error: The network connection between your computer and + the VPN server could not be established because the remote server is not responding. This could + be because one of the network devices (for example, firewalls, NAT, routers, and so on) between your computer + and the remote server is not configured to allow VPN connections. Please contact your + Administrator or your service provider to determine which device may be causing the problem. + Source : RemoteAccess + ReplacementStrings : {{00000000-0000-0000-0000-000000000000}, AzureGateway, VPN2-4, The network connection between + your computer and the VPN server could not be established because the remote server is not + responding. This could be because one of the network devices (for example, firewalls, NAT, routers, and so on) + between your computer and the remote server is not configured to allow VPN connections. Please + contact your Administrator or your service provider to determine which device may be causing the + problem.} + InstanceId : 20111 + TimeGenerated : 3/18/2016 1:26:02 PM + TimeWritten : 3/18/2016 1:26:02 PM + UserName : + Site : + Container : + ``` + + - The wrong shared key being specified in the RRAS VPN network interface configuration. + + ``` + EventID : 20111 + MachineName : on-prem-vm + Data : {233, 53, 0, 0} + Index : 14245 + Category : (0) + CategoryNumber : 0 + EntryType : Error + Message : RoutingDomainID- {00000000-0000-0000-0000-000000000000}: A demand dial connection to the remote + interface AzureGateway on port VPN2-4 was successfully initiated but failed to complete + successfully because of the following error: Internet key exchange (IKE) authentication credentials are unacceptable. + + Source : RemoteAccess + ReplacementStrings : {{00000000-0000-0000-0000-000000000000}, AzureGateway, VPN2-4, IKE authentication credentials are + unacceptable. + } + InstanceId : 20111 + TimeGenerated : 3/18/2016 1:34:22 PM + TimeWritten : 3/18/2016 1:34:22 PM + UserName : + Site : + Container : + ``` + + You can also obtain event log information about attempts to connect through the RRAS service using the following PowerShell command: + + ``` + Get-EventLog -LogName Application -Source RasClient | Format-List -Property * + ``` + + In the event of a failure to connect, this log will contain errors that look similar to the following: + + ``` + EventID : 20227 + MachineName : on-prem-vm + Data : {} + Index : 4203 + Category : (0) + CategoryNumber : 0 + EntryType : Error + Message : CoId={B4000371-A67F-452F-AA4C-3125AA9CFC78}: The user SYSTEM dialed a connection named + AzureGateway that has failed. The error code returned on failure is 809. + Source : RasClient + ReplacementStrings : {{B4000371-A67F-452F-AA4C-3125AA9CFC78}, SYSTEM, AzureGateway, 809} + InstanceId : 20227 + TimeGenerated : 3/18/2016 1:29:21 PM + TimeWritten : 3/18/2016 1:29:21 PM + UserName : + Site : + Container : + ``` + +- **Verify connectivity and routing across the VPN gateway.** + + The VPN appliance may not be correctly routing traffic through the Azure VPN Gateway. Use a tool such as [PsPing][psping] to verify connectivity and routing across the VPN gateway. For example, to test connectivity from an on-premises machine to a web server located on the VNet, run the following command (replacing `<>` with the address of the web server): + + ``` + PsPing -t <>:80 + ``` + + If the on-premises machine can route traffic to the web server, you should see output similar to the following: + + ``` + D:\PSTools>psping -t 10.20.0.5:80 + + PsPing v2.01 - PsPing - ping, latency, bandwidth measurement utility + Copyright (C) 2012-2014 Mark Russinovich + Sysinternals - www.sysinternals.com + + TCP connect to 10.20.0.5:80: + Infinite iterations (warmup 1) connecting test: + Connecting to 10.20.0.5:80 (warmup): 6.21ms + Connecting to 10.20.0.5:80: 3.79ms + Connecting to 10.20.0.5:80: 3.44ms + Connecting to 10.20.0.5:80: 4.81ms + + Sent = 3, Received = 3, Lost = 0 (0% loss), + Minimum = 3.44ms, Maximum = 4.81ms, Average = 4.01ms + ``` + + If the on-premises machine cannot communicate with the specified destination, you will see messages like this: + + ``` + D:\PSTools>psping -t 10.20.1.6:80 + + PsPing v2.01 - PsPing - ping, latency, bandwidth measurement utility + Copyright (C) 2012-2014 Mark Russinovich + Sysinternals - www.sysinternals.com + + TCP connect to 10.20.1.6:80: + Infinite iterations (warmup 1) connecting test: + Connecting to 10.20.1.6:80 (warmup): This operation returned because the timeout period expired. + Connecting to 10.20.1.6:80: This operation returned because the timeout period expired. + Connecting to 10.20.1.6:80: This operation returned because the timeout period expired. + Connecting to 10.20.1.6:80: This operation returned because the timeout period expired. + Connecting to 10.20.1.6:80: + Sent = 3, Received = 0, Lost = 3 (100% loss), + Minimum = 0.00ms, Maximum = 0.00ms, Average = 0.00ms + ``` + +- **Verify that the on-premises firewall allows VPN traffic to pass and that the correct ports are opened.** + +- **Verify that the on-premises VPN appliance uses an encryption method that is [compatible with the Azure VPN gateway][vpn-appliance].** For policy-based routing, the Azure VPN gateway supports the AES256, AES128, and 3DES encryption algorithms. Route-based gateways support AES256 and 3DES. + +The following recommendations are useful for determining if there is a problem with the Azure VPN gateway: + +- **Examine [Azure VPN gateway diagnostic logs][gateway-diagnostic-logs] for potential issues.** + +- **Verify that the Azure VPN gateway and on-premises VPN appliance are configured with the same shared authentication key.** + + You can view the shared key stored by the Azure VPN gateway using the following Azure CLI command: + + ``` + azure network vpn-connection shared-key show <> <> + ``` + + Use the command appropriate for your on-premises VPN appliance to show the shared key configured for that appliance. + + Verify that the *GatewaySubnet* subnet holding the Azure VPN gateway is not associated with an NSG. + + You can view the subnet details using the following Azure CLI command: + + ``` + azure network vnet subnet show -g <> -e <> -n GatewaySubnet + ``` + + Ensure there is no data field named *Network Security Group id*. The following example shows the results for an instance of the *GatewaySubnet* that has an assigned NSG (*VPN-Gateway-Group*). This can prevent the gateway from working correctly if there are any rules defined for this NSG. + + ``` + C:\>azure network vnet subnet show -g profx-prod-rg -e profx-vnet -n GatewaySubnet + info: Executing command network vnet subnet show + + Looking up virtual network "profx-vnet" + + Looking up the subnet "GatewaySubnet" + data: Id : /subscriptions/########-####-####-####-############/resourceGroups/profx-prod-rg/providers/Microsoft.Network/virtualNetworks/profx-vnet/subnets/GatewaySubnet + data: Name : GatewaySubnet + data: Provisioning state : Succeeded + data: Address prefix : 10.20.3.0/27 + data: Network Security Group id : /subscriptions/########-####-####-####-############/resourceGroups/profx-prod-rg/providers/Microsoft.Network/networkSecurityGroups/VPN-Gateway-Group + info: network vnet subnet show command OK + ``` + +- **Verify that the virtual machines in the Azure VNet are configured to permit traffic coming in from outside the VNet.** + + Check any NSG rules associated with subnets containing these virtual machines. You can view all NSG rules using the following Azure CLI command: + + ``` + azure network nsg show -g <> -n <> + ``` + +- **Verify that the Azure VPN gateway is connected.** + + You can use the following Azure PowerShell command to check the current status of the Azure VPN connection. The `<>` parameter is the name of the Azure VPN connection that links the virtual network gateway and the local gateway. + + ``` + Get-AzureRmVirtualNetworkGatewayConnection -Name <> - ResourceGroupName <> + ``` + + The following snippets highlight the output generated if the gateway is connected (the first example), and disconnected (the second example): + + ``` + PS C:\> Get-AzureRmVirtualNetworkGatewayConnection -Name profx-gateway-connection -ResourceGroupName profx-prod-rg + + AuthorizationKey : + VirtualNetworkGateway1 : Microsoft.Azure.Commands.Network.Models.PSVirtualNetworkGateway + VirtualNetworkGateway2 : + LocalNetworkGateway2 : Microsoft.Azure.Commands.Network.Models.PSLocalNetworkGateway + Peer : + ConnectionType : IPsec + RoutingWeight : 0 + SharedKey : #################################### + ConnectionStatus : Connected + EgressBytesTransferred : 55254803 + IngressBytesTransferred : 32227221 + ProvisioningState : Succeeded + ... + ``` + + ``` + PS C:\> Get-AzureRmVirtualNetworkGatewayConnection -Name profx-gateway-connection2 -ResourceGroupName profx-prod-rg + + AuthorizationKey : + VirtualNetworkGateway1 : Microsoft.Azure.Commands.Network.Models.PSVirtualNetworkGateway + VirtualNetworkGateway2 : + LocalNetworkGateway2 : Microsoft.Azure.Commands.Network.Models.PSLocalNetworkGateway + Peer : + ConnectionType : IPsec + RoutingWeight : 0 + SharedKey : #################################### + ConnectionStatus : NotConnected + EgressBytesTransferred : 0 + IngressBytesTransferred : 0 + ProvisioningState : Succeeded + ... + ``` + +The following recommendations are useful for determining if there is an issue with Host VM configuration, network bandwidth utilization, or application performance: + +- **Verify that the firewall in the guest operating system running on the Azure VMs in the subnet is configured correctly to allow permitted traffic from the on-premises IP ranges.** + +- **Verify that the volume of traffic is not close to the limit of the bandwidth available to the Azure VPN gateway.** + + How to verify this depends on the VPN appliance running on-premises. For example, if you are using RRAS on Windows Server 2012, you can use Performance Monitor to track the volume of data being received and transmitted over the VPN connection. Using the *RAS Total* object, select the *Bytes Received/Sec* and *Bytes Transmitted/Sec* counters: + + ![[3]][3] + + You should compare the results with the bandwidth available to the VPN gateway (100 Mbps for the Basic and Standard SKUs, and 200 Mbps for the High Performance SKU): + + ![[4]][4] + +- **Verify that you have deployed the right number and size of VMs for your application load.** + + Determine if any of the virtual machines in the Azure VNet are running slowly. If so, they may be overloaded, there may be too few to handle the load, or the load-balancers may not be configured correctly. To determine this, [capture and analyze diagnostic information][azure-vm-diagnostics]. You can examine the results using the Azure portal, but many third-party tools are also available that can provide detailed insights into the performance data. + +- **Verify that the application is making efficient use of cloud resources.** + + Instrument application code running on each VM to determine whether applications are making the best use of resources. You can use tools such as [Application Insights][application-insights]. + +## Deploy the solution + + +**Prequisites.** You must have an existing on-premises infrastructure already configured with a suitable network appliance. + +To deploy the solution, perform the following steps. + +1. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-hybrid-network-vpn%2Fazuredeploy.json) +2. Wait for the link to open in the Azure portal, then follow these steps: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-hybrid-vpn-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +3. Wait for the deployment to complete. + + + + + +[implementing-a-multi-tier-architecture-on-Azure]: ../virtual-machines-windows/n-tier.md +[adds-extend-domain]: ../identity/adds-extend-domain.md +[expressroute]: ../hybrid-networking/expressroute.md + +[naming conventions]: /azure/guidance/guidance-naming-conventions + +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[arm-templates]: /azure/resource-group-authoring-templates +[azure-cli]: /azure/virtual-machines-command-line-tools +[azure-portal]: /azure/azure-portal/resource-group-portal +[azure-powershell]: /azure/powershell-azure-resource-manager +[azure-virtual-network]: /azure/virtual-network/virtual-networks-overview +[vpn-appliance]: /azure/vpn-gateway/vpn-gateway-about-vpn-devices +[azure-vpn-gateway]: https://azure.microsoft.com/services/vpn-gateway/ +[azure-gateway-charges]: https://azure.microsoft.com/pricing/details/vpn-gateway/ +[azure-network-security-group]: https://azure.microsoft.com/documentation/articles/virtual-networks-nsg/ +[connect-to-an-Azure-vnet]: https://technet.microsoft.com/library/dn786406.aspx +[vpn-gateway-multi-site]: /azure/vpn-gateway/vpn-gateway-multi-site +[policy-based-routing]: https://en.wikipedia.org/wiki/Policy-based_routing +[route-based-routing]: https://en.wikipedia.org/wiki/Static_routing +[network-security-group]: /azure/virtual-network/virtual-networks-nsg +[sla-for-vpn-gateway]: https://azure.microsoft.com/support/legal/sla/vpn-gateway/v1_2/ +[additional-firewall-rules]: https://technet.microsoft.com/library/dn786406.aspx#firewall +[nagios]: https://www.nagios.org/ +[azure-vpn-gateway-diagnostics]: http://blogs.technet.com/b/keithmayer/archive/2014/12/18/diagnose-azure-virtual-network-vpn-connectivity-issues-with-powershell.aspx +[ping]: https://technet.microsoft.com/library/ff961503.aspx +[tracert]: https://technet.microsoft.com/library/ff961507.aspx +[psping]: http://technet.microsoft.com/sysinternals/jj729731.aspx +[nmap]: http://nmap.org +[changing-SKUs]: https://azure.microsoft.com/blog/azure-virtual-network-gateway-improvements/ +[gateway-diagnostic-logs]: http://blogs.technet.com/b/keithmayer/archive/2015/12/07/step-by-step-capturing-azure-resource-manager-arm-vnet-gateway-diagnostic-logs.aspx +[troubleshooting-vpn-errors]: https://blogs.technet.microsoft.com/rrasblog/2009/08/12/troubleshooting-common-vpn-related-errors/ +[rras-logging]: https://www.petri.com/enable-diagnostic-logging-in-windows-server-2012-r2-routing-and-remote-access +[create-on-prem-network]: https://technet.microsoft.com/library/dn786406.aspx#routing +[create-azure-vnet]: /azure/virtual-network/virtual-networks-create-vnet-classic-cli +[azure-vm-diagnostics]: https://azure.microsoft.com/blog/windows-azure-virtual-machine-monitoring-with-wad-extension/ +[application-insights]: /azure/application-insights/app-insights-overview-usage +[forced-tunneling]: https://azure.microsoft.com/documentation/articles/vpn-gateway-about-forced-tunneling/ +[getting-started-with-azure-security]: /azure/security/azure-security-getting-started +[vpn-appliances]: /azure/vpn-gateway/vpn-gateway-about-vpn-devices +[installing-ad]: /azure/active-directory/active-directory-install-replica-active-directory-domain-controller +[deploying-ad]: https://msdn.microsoft.com/library/azure/jj156090.aspx +[creating-dns]: https://blogs.msdn.microsoft.com/mcsuksoldev/2014/03/04/creating-a-dns-server-in-azure-iaas/ +[configuring-dns]: /azure/virtual-network/virtual-networks-manage-dns-in-vnet +[stormshield]: https://azure.microsoft.com/marketplace/partners/stormshield/stormshield-network-security-for-cloud/ +[vpn-appliance-ipsec]: /azure/vpn-gateway/vpn-gateway-about-vpn-devices#ipsec-parameters +[solution-script]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn/Deploy-ReferenceArchitecture.ps1 +[solution-script-bash]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn/deploy-reference-architecture.sh +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vnet-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn/parameters/virtualNetwork.parameters.json +[virtualNetworkGateway-parameters]: https://github.com/mspnp/reference-architectures/tree/master/guidance-hybrid-network-vpn/parameters/virtualNetworkGateway.parameters.json +[azure-powershell-download]: https://azure.microsoft.com/documentation/articles/powershell-install-configure/ +[azure-cli]: https://azure.microsoft.com/documentation/articles/xplat-cli-install/ +[CIDR]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing +[0]: ../_images/blueprints/hybrid-network-vpn.png "Structure of a hybrid network spanning the on-premises and cloud infrastructures" +[1]: ../_images/guidance-hybrid-network-vpn/partitioned-vpn.png "Partitioning a VNet to improve scalability" +[2]: ../_images/guidance-hybrid-network-vpn/audit-logs.png "Audit logs in the Azure portal" +[3]: ../_images/guidance-hybrid-network-vpn/RRAS-perf-counters.png "Performance counters for monitoring VPN network traffic" +[4]: ../_images/guidance-hybrid-network-vpn/RRAS-perf-graph.png "Example VPN network performance graph" \ No newline at end of file diff --git a/docs/blueprints/identity/adds-extend-domain.md b/docs/blueprints/identity/adds-extend-domain.md new file mode 100644 index 00000000000..e6be9b7550c --- /dev/null +++ b/docs/blueprints/identity/adds-extend-domain.md @@ -0,0 +1,188 @@ +--- +title: Extending Active Directory Domain Services (AD DS) to Azure +description: >- + How to implement a secure hybrid network architecture with Active Directory + authorization in Azure. +services: >- + guidance,vpn-gateway,expressroute,load-balancer,virtual-network,active-directory +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: azure-resource-manager +pnp.series.title: Identity management +ms.assetid: 4821d1de-1473-4748-a599-ada73323fdb2 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/28/2016 +ms.author: telmos +pnp.series.prev: azure-ad +pnp.series.next: adds-forest +cardTitle: Extend AD DS to Azure +--- +# Active Directory Domain Services (AD DS) +[!INCLUDE [header](../../_includes/header.md)] + +This article describes best practices for extending your Active Directory environment to Azure to provide distributed authentication services using [Active Directory Domain Services (AD DS)][active-directory-domain-services]. This architecture extends the architectures described in [Implementing a secure hybrid network architecture in Azure][implementing-a-secure-hybrid-network-architecture] and [Implementing a secure hybrid network architecture with Internet access in Azure][implementing-a-secure-hybrid-network-architecture-with-internet-access]. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This reference architecture uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +AD DS is used to authenticate user, computer, application, or other identities that are included in a security domain. It can be hosted on-premises, but if your application is hosted partly on-premises and partly in Azure, it may be more efficient to replicate this functionality in Azure. This can reduce the latency caused by sending authentication and local authorization requests from the cloud back to AD DS running on-premises. + +There are two ways to host your directory services in Azure: + +* Use [Azure Active Directory][azure-active-directory] (Azure AD) to create an Active Directory domain, using [Azure AD Connect][azure-ad-connect] to integrate your on-premises AD directories with Azure AD. This approach is described in [Integrating on-premises Active Directory domains with Azure AD][guidance-identity-aad]. + +* Extend your existing on-premises Active Directory infrastructure to Azure by deploying a VM that runs AD DS as a domain controller. Depending on your security requirements, the AD installation in the cloud can be part of the same domain as that held on-premises, a new domain within a shared forest, or a separate forest. + +This article describes the second option, extending an on-premises infrastructure by deploying an AD DS domain controller to Azure, with both using the same domain. + +This architecture is commonly used when the on-premises network and the Azure virtual network are connected by a VPN or ExpressRoute connection. This architecture also supports bidirectional replication, meaning changes can be made either on-premises or in the cloud, and both sources will be kept consistent. Typical uses for this architecture include hybrid applications in which functionality is distributed between on-premises and Azure, and applications and services that perform authentication using Active Directory. + + +## Architecture diagram + +The following diagram highlights the important components in this architecture. + +> A Visio document that includes this architecture diagram is available for download from the [Microsoft download center][visio-download]. This diagram is on the "Identity - ADDS (same domain)" page. +> +> + +[![0]][0] + +* **On-premises network**. The on-premises network includes local Active Directory servers that can perform authentication and authorization for components located on-premises. +* **Active Directory servers**. These are domain controllers implementing directory services (AD DS) running as VMs in the cloud. These servers can provide authentication of components running in your Azure virtual network. +* **Active Directory subnet**. The AD DS servers are hosted in a separate subnet. Network security group (NSG) rules protect the AD DS servers and provide a firewall against traffic from unexpected sources. +* **Azure Gateway and Active Directory synchronization**. The Azure gateway provides a connection between the on-premises network and the Azure VNet. This can be a [VPN connection][azure-vpn-gateway] or [Azure ExpressRoute][azure-expressroute]. All synchronization requests between the Active Directory servers in the cloud and on-premises pass through the gateway. User-defined routes (UDRs) handle routing for on-premises traffic that passes to Azure. Traffic to and from the Active Directory servers does not pass through the network virtual appliances (NVAs) used in this scenario. + +For more information about the parts of the architecture that are not related to AD DS, read [Implementing a secure hybrid network architecture with Internet access in Azure][implementing-a-secure-hybrid-network-architecture-with-internet-access]. + +For more information about configuring UDRs and the NVAs, see [Implementing a secure hybrid network architecture in Azure][implementing-a-secure-hybrid-network-architecture]. + +## Recommendations + +The following recommendations apply for most scenarios. Follow these recommendations unless you have a specific requirement that overrides them. + +### VM recommendations + +Determine your [VM size][vm-windows-sizes] requirements based on the expected volume of authentication requests. Use the specifications of the machines hosting AD DS on premises as a starting point, and match them with the Azure VM sizes. Once deployed, monitor utilization and scale up or down based on the actual load on the VMs. For more information about sizing AD DS domain controllers, see [Capacity Planning for Active Directory Domain Services][capacity-planning-for-adds]. + +Create a separate virtual data disk for storing the database, logs, and SYSVOL for Active Directory. Do not store these items on the same disk as the operating system. Note that by default, data disks that are attached to a VM use write-through caching. However, this form of caching can conflict with the requirements of AD DS. For this reason, set the *Host Cache Preference* setting on the data disk to *None*. For more information, see [Placement of the Windows Server AD DS database and SYSVOL][adds-data-disks]. + +Deploy at least two VMs running AD DS as domain controllers and add them to an [availability set][availability-set]. + +### Networking recommendations + +Configure the VM network interface (NIC) for each AD DS server with a static private IP address for full domain name service (DNS) support. For more information, see [How to set a static private IP address in the Azure portal][set-a-static-ip-address]. + +> [!NOTE] +> Do not configure the VM NIC for any AD DS with a public IP address. See [Security considerations][security-considerations] for more details. +> +> + +The Active Directory subnet NSG requires rules to permit incoming traffic from on-premises. For detailed information on the ports used by AD DS, see [Active Directory and Active Directory Domain Services Port Requirements][ad-ds-ports]. Also, ensure the UDR tables do not route AD DS traffic through the NVAs used in this architecture. + +### Active Directory site + +In AD DS, a site represents a physical location, network, or collection of devices. AD DS sites are used to manage AD DS database replication by grouping together AD DS objects that are located close to one another and are connected by a high speed network. AD DS includes logic to select the best strategy for replacating the AD DS database between sites. + +We recommend that you create an AD DS site including the subnets defined for your application in Azure. Then, configure a site link between your on-premises AD DS sites, and AD DS will automatically perform the most efficent database replication possible. Note that this database replication requires little beyond the initial configuration. + +### Active Directory operations masters + +The operations masters role can be assigned to AD DS domain controllers to support consistency checking between instances of replicated AD DS databases. There are five operations master roles: schema master, domain naming master, relative identifier master, primary domain controller master emulator, and infrastructure master. For more information about these roles, see [What are Operations Masters?][ad-ds-operations-masters]. + +We recommend you do not assign operations masters roles to the domain controllers deployed in Azure. + +### Monitoring + +Monitor the resources of the domain controller VMs as well as the AD DS Services and create a plan to quickly correct any problems. For more information, see [Monitoring Active Directory][monitoring_ad]. You can also install tools such as [Microsoft Systems Center][microsoft_systems_center] on the monitoring server (see the architecture diagram) to help perform these tasks. + +## Scalability considerations + +AD DS is designed for scalability. You don't need to configure a load balancer or traffic controller to direct requests to AD DS domain controllers. The only scalability consideration is to configure the VMs running AD DS with the correct size for your network load requirements, monitor the load on the VMs, and scale up or down as necessary. + +## Availability considerations + +Deploy the VMs running AD DS into an [availability set][availability-set]. Also, consider assigning the role of [standby operations master][standby-operations-masters] to at least one server, and possibly more depending on your requirements. A standby operations master is an active copy of the operations master that can be used in place of the primary operations masters server during fail over. + +## Manageability considerations + +Perform regular AD DS backups. Don't simply copy the VHD files of domain controllers instead of performing regular backups, because the AD DS database file on the VHD may not be in a consistent state when it's copied, making it impossible to restart the database. + +Do not shut down a domain controller VM using Azure portal. Instead, shut down and restart from the guest operating system. Shuting down through the portal causes the VM to be deallocated, which resets both the `VM-GenerationID` and the `invocationID` of the Active Directory repository. This discards the AD DS relative identifier (RID) pool and marks SYSVOL as nonauthoritative, and may require reconfiguration of the domain controller. + +## Security considerations + +AD DS servers provide authentication services and are an attractive target for attacks. To secure them, prevent direct Internet connectivity by placing the AD DS servers in a separate subnet with an NSG acting as a firewall. Close all ports on the AD DS servers except those necessary for authentication, authorization, and server synchronization. For more information, see [Active Directory and Active Directory Domain Services Port Requirements][ad-ds-ports]. + +Consider implementing an additional security perimeter around servers with a pair of subnets and NVAs, as described in [Implementing a secure hybrid network architecture with Internet access in Azure][implementing-a-secure-hybrid-network-architecture-with-internet-access]. + +Use either BitLocker or Azure disk encryption to encrypt the disk hosting the AD DS database. + +## Solution deployment + +A solution is available on [Github][github] to deploy this reference architecture. You will need the latest version of the [Azure CLI][azure-powershell] to run the Powershell script that deploys the solution. To deploy the reference architecture, follow these steps: + +1. Download or clone the solution folder from [Github][github] to your local machine. + +2. Open the Azure CLI and navigate to the local solution folder. + +3. Run the following command: + ```Powershell + .\Deploy-ReferenceArchitecture.ps1 + ``` + Replace `` with your Azure subscription ID. + For ``, specify an Azure region, such as `eastus` or `westus`. + The `` parameter controls the granularity of the deployment, and can be one of the following values: + * `Onpremise`: deploys the simulated on-premises environment. + * `Infrastructure`: deploys the VNet infrastructure and jump box in Azure. + * `CreateVpn`: deploys the Azure virtual network gateway and connects it to the simulated on-premises network. + * `AzureADDS`: deploys the VMs acting as AD DS servers, deploys Active Directory to these VMs, and deploys the domain in Azure. + * `Workload`: deploys the public and private DMZs and the workload tier. + * `All`: deploys all of the preceding deployments. **This is the recommended option if If you do not have an existing on-premises network but you want to deploy the complete reference architecture described above for testing or evaluation.** + +4. Wait for the deployment to complete. If you are deploying the `All` deployment, it will take several hours. + +## Next steps + +* Learn the best practices for [creating an AD DS resource forest][adds-resource-forest] in Azure. +* Learn the best practices for [creating an Active Directory Federation Services (AD FS) infrastructure][adfs] in Azure. + + +[adds-resource-forest]: adds-forest.md +[adfs]: adfs.md +[guidance-identity-aad]: azure-ad.md + +[implementing-a-secure-hybrid-network-architecture]: ../dmz/secure-vnet-hybrid.md +[implementing-a-secure-hybrid-network-architecture-with-internet-access]: ../dmz/secure-vnet-dmz.md + +[active-directory-domain-services]: https://technet.microsoft.com/library/dd448614.aspx +[ad-azure-guidelines]: https://msdn.microsoft.com/library/azure/jj156090.aspx +[adds-data-disks]: https://msdn.microsoft.com/library/azure/jj156090.aspx#BKMK_PlaceDB +[ad-ds-operations-masters]: https://technet.microsoft.com/library/cc779716(v=ws.10).aspx +[ad-ds-ports]: https://technet.microsoft.com/library/dd772723(v=ws.11).aspx +[availability-set]: /azure/virtual-machines/virtual-machines-windows-create-availability-set +[azure-active-directory]: /azure/active-directory-domain-services/active-directory-ds-overview +[azure-ad-connect]: /azure/active-directory/active-directory-aadconnect +[azure-expressroute]: https://azure.microsoft.com/documentation/articles/expressroute-introduction/ +[azure-powershell]: /powershell/azureps-cmdlets-docs +[azure-vpn-gateway]: https://azure.microsoft.com/documentation/articles/vpn-gateway-about-vpngateways/ +[capacity-planning-for-adds]: http://social.technet.microsoft.com/wiki/contents/articles/14355.capacity-planning-for-active-directory-domain-services.aspx +[GitHub]: https://github.com/mspnp/reference-architectures/tree/master/guidance-ra-identity-adds +[microsoft_systems_center]: https://www.microsoft.com/server-cloud/products/system-center-2016/ +[monitoring_ad]: https://msdn.microsoft.com/library/bb727046.aspx +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[security-considerations]: #security-considerations +[set-a-static-ip-address]: https://azure.microsoft.com/documentation/articles/virtual-networks-static-private-ip-arm-pportal/ +[standby-operations-masters]: https://technet.microsoft.com/library/cc794737(v=ws.10).aspx +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vm-windows-sizes]: /azure/virtual-machines/virtual-machines-windows-sizes + +[0]: ../_images/guidance-iaas-ra-secure-vnet-ad/figure1.png "Secure hybrid network architecture with Active Directory" \ No newline at end of file diff --git a/docs/blueprints/identity/adds-forest.md b/docs/blueprints/identity/adds-forest.md new file mode 100644 index 00000000000..c7c74933027 --- /dev/null +++ b/docs/blueprints/identity/adds-forest.md @@ -0,0 +1,183 @@ +--- +title: Create an AD DS resource forest in Azure +description: How to create a trusted Active Directory domain in Azure. +services: >- + guidance,vpn-gateway,expressroute,load-balancer,virtual-network,active-directory +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: azure-resource-manager +pnp.series.title: Identity management +ms.assetid: 67d86788-c22d-4394-beaf-b4acdf4e2e56 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/28/2016 +ms.author: telmos +pnp.series.prev: adds-extend-domain +pnp.series.next: adfs +cardTitle: Create an AD DS forest in Azure +--- +# Create an Active Directory Domain Services (AD DS) resource forest in Azure +[!INCLUDE [header](../../_includes/header.md)] + +This article describes how to create an Active Directory domain in Azure that is separate from, but trusted by, domains in your on-premises forest. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This reference architecture uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +Active Directory Domain Services (AD DS) is a distributed database service that stores identity information about users, devices, and other resources in a hierarchical structure. The top node in the hierarchical structure is known as a forest. A forest contains domains, and domains contain other types of objects. + +You can use AD DS to create trust relationships between top level forest objects, in order to provide interoperability between domains. That is, logons in one domain can be trusted to provide access to resources in other domains. + +This reference architecture shows how to create an AD DS forest in Azure with a one-way outgoing trust relationship with an on-premises domain. The forest in Azure contains a domain that does not exist on-premises, but because of the trust relationship, logons made against on-premises domains can be trusted for access to resources in the separate Azure domain. + +Typical uses for this architecture include maintaining security separation for objects and identities held in the cloud, and migrating individual domains from on-premises to the cloud. + +## Architecture diagram + +The following diagram highlights the important components in this architecture. + +> A Visio document that includes this architecture diagram is available for download from the [Microsoft download center][visio-download]. This diagram is on the "Identity - AADS (resource forest)" page. +> +> + +[![0]][0] + +* **On-premises network**. The on-premises network contains its own Active Directory forest and domains. +* **Active Directory servers**. These are domain controllers implementing domain services running as VMs in the cloud. These servers host a forest containing one or more domains, separate from those located on-premises. +* **One-way trust relationship**. The example in the diagram shows a one-way trust from the domain in Azure to the on-premises domain. This relationship enables on-premises users to access resources in the domain in Azure, but not the other way around. It is possible to create a two-way trust if cloud users also require access to on-premises resources. +* **Active Directory subnet**. The AD DS servers are hosted in a separate subnet. Network security group (NSG) rules protect the AD DS servers and provide a firewall against traffic from unexpected sources. +* **Azure gateway**. The Azure gateway provides a connection between the on-premises network and the Azure VNet. This can be a [VPN connection][azure-vpn-gateway] or [Azure ExpressRoute][azure-expressroute]. For more information, see [Implementing a secure hybrid network architecture in Azure][implementing-a-secure-hybrid-network-architecture]. + +## Recommendations + +For specific recommendations on implementing Active Directory in Azure, see the following articles: + +- [Extending Active Directory Domain Services (AD DS) to Azure][adds-extend-domain]. +- [Guidelines for Deploying Windows Server Active Directory on Azure Virtual Machines][ad-azure-guidelines]. + +### Trust + +The on-premises domains are contained within a different forest from the domains in the cloud. To enable authentication of on-premises users in the cloud, the domains in Azure must trust the logon domain in the on-premises forest. Similarly, if the cloud provides a logon domain for external users, it may be necessary for the on-premises forest to trust the cloud domain. + +You can establish trusts at the forest level by [creating forest trusts][creating-forest-trusts], or at the domain level by [creating external trusts][creating-external-trusts]. A forest level trust creates a relationship between all domains in two forests. An external domain level trust only creates a relationship between two specified domains. You should only create external domain level trusts between domains in different forests. + +Trusts can be unidirectional (one-way) or bidirectional (two-way): + +* A one-way trust enables users in one domain or forest (known as the *incoming* domain or forest) to access the resources held in another (the *outgoing* domain or forest). +* A two-way trust enables users in either domain or forest to access resources held in the other. + +The following table summarizes trust configurations for some simple scenarios: + +| Scenario | On-premises trust | Cloud trust | +| --- | --- | --- | +| On-premises users require access to resources in the cloud, but not vice versa |One-way, incoming |One-way, outgoing | +| Users in the cloud require access to resources located on-premises, but not vice versa |One-way, outgoing |One-way, incoming | +| Users in the cloud and on-premises both requires access to resources held in the cloud and on-premises |Two-way, incoming and outgoing |Two-way, incoming and outgoing | + +## Scalability considerations + +Active Directory is automatically scalable for domain controllers that are part of the same domain. Requests are distributed across all controllers within a domain. You can add another domain controller, and it synchronizes automatically with the domain. Do not configure a separate load balancer to direct traffic to controllers within the domain. Ensure that all domain controllers have sufficient memory and storage resources to handle the domain database. Make all domain controller VMs the same size. + +## Availability considerations + +Provision at least two domain controllers for each domain. This enables automatic replication between servers. Create an availability set for the VMs acting as Active Directory servers handling each domain. Put at least two servers in this availability set. + +Also, consider designating one or more servers in each domain as [standby operations masters][standby-operations-masters] in case connectivity to a server acting as a flexible single master operation (FSMO) role fails. + +## Manageability considerations + +For information about management and monitoring considerations, see [Extending Active Directory to Azure][adds-extend-domain]. + +For additional information, see [Monitoring Active Directory][monitoring_ad]. You can install tools such as [Microsoft Systems Center][microsoft_systems_center] on a monitoring server in the management subnet to help perform these tasks. + +## Security considerations + +Forest level trusts are transitive. If you establish a forest level trust between an on-premises forest and a forest in the cloud, this trust is extended to other new domains created in either forest. If you use domains to provide separation for security purposes, consider creating trusts at the domain level only. Domain level trusts are non-transitive. + +For Active Directory-specific security considerations, see the security considerations section in [Extending Active Directory to Azure][adds-extend-domain]. + +## Solution deployment + +A solution is available on [Github][github] to deploy this reference architecture. You will need the latest version of the Azure CLI to run the Powershell script that deploys the solution. To deploy the reference architecture, follow these steps: + +1. Download or clone the solution folder from [Github][github] to your local machine. + +2. Open the Azure CLI and navigate to the local solution folder. + +3. Run the following command: + + ```Powershell + .\Deploy-ReferenceArchitecture.ps1 + ``` + + Replace `` with your Azure subscription ID. + + For ``, specify an Azure region, such as `eastus` or `westus`. + + The `` parameter controls the granularity of the deployment, and can be one of the following values: + + * `Onpremise`: deploys the simulated on-premises environment. + * `Infrastructure`: deploys the VNet infrastructure and jump box in Azure. + * `CreateVpn`: deploys the Azure virtual network gateway and connects it to the simulated on-premises network. + * `AzureADDS`: deploys the VMs acting as Active Directory DS servers, deploys Active Directory to these VMs, and deploys the domain in Azure. + * `WebTier`: deploys the web tier VMs and load balancer. + * `Prepare`: deploys all of the preceding deployments. **This is the recommended option if If you do not have an existing on-premises network but you want to deploy the complete reference architecture described above for testing or evaluation.** + * `Workload`: deploys the business and data tier VMs and load balancers. Note that these VMs are not included in the `Prepare` deployment. + +4. Wait for the deployment to complete. If you are deploying the `Prepare` deployment, it will take several hours. + +5. If you are using the simulated on-premises configuration, configure the incoming trust relationship: + + 1. Connect to the jump box (*ra-adtrust-mgmt-vm1* in the *ra-adtrust-security-rg* resource group). Log in as *testuser* with password *AweS0me@PW*. + 2. On the jump box open an RDP session on the first VM in the *contoso.com* domain (the on-premises domain). This VM has the IP address 192.168.0.4. The username is *contoso\testuser* with password *AweS0me@PW*. + 3. Download the [incoming-trust.ps1][incoming-trust] script and run it to create the incoming trust from the *treyresearch.com* domain. + +6. If you are using your own on-premises infrastructure: + + 1. Download the [incoming-trust.ps1][incoming-trust] script. + 2. Edit the script and replace the value of the `$TrustedDomainName` variable with the name of your own domain. + 3. Run the script. + +7. From the jump-box, connect to the first VM in the *treyresearch.com* domain (the domain in the cloud). This VM has the IP address 10.0.4.4. The username is *treyresearch\testuser* with password *AweS0me@PW*. + +8. Download the [outgoing-trust.ps1][outgoing-trust] script and run it to create the incoming trust from the *treyresearch.com* domain. If you are using your own on-premises machines, then edit the script first. Set the `$TrustedDomainName` variable to the name of your on-premises domain, and specify the IP addresses of the Active Directory DS servers for this domain in the `$TrustedDomainDnsIpAddresses` variable. + +9. Wait a few minutes for the previous steps to complete, then connect to an on-premises VM and perform the steps outlined in the article [Verify a Trust][verify-a-trust] to determine whether the trust relationship between the *contoso.com* and *treyresearch.com* domains is correctly configured. + +## Next steps + +* Learn the best practices for [extending your on-premises AD DS domain to Azure][adds-extend-domain] +* Learn the best practices for [creating an AD FS infrastructure][adfs] in Azure. + + +[adds-extend-domain]: adds-extend-domain.md +[adfs]: adfs.md + +[implementing-a-secure-hybrid-network-architecture]: ../dmz/secure-vnet-hybrid.md +[implementing-a-secure-hybrid-network-architecture-with-internet-access]: ../dmz/secure-vnet-dmz.md + +[running-VMs-for-an-N-tier-architecture-on-Azure]: ../virtual-machines-windows/n-tier.md + +[ad-azure-guidelines]: https://msdn.microsoft.com/library/azure/jj156090.aspx +[azure-expressroute]: https://azure.microsoft.com/documentation/articles/expressroute-introduction/ +[azure-vpn-gateway]: https://azure.microsoft.com/documentation/articles/vpn-gateway-about-vpngateways/ +[creating-external-trusts]: https://technet.microsoft.com/library/cc816837(v=ws.10).aspx +[creating-forest-trusts]: https://technet.microsoft.com/library/cc816810(v=ws.10).aspx +[github]: https://github.com/mspnp/reference-architectures/tree/master/guidance-identity-adds-trust +[incoming-trust]: https://raw.githubusercontent.com/mspnp/reference-architectures/master/guidance-identity-adds-trust/extensions/incoming-trust.ps1 +[microsoft_systems_center]: https://www.microsoft.com/server-cloud/products/system-center-2016/ +[monitoring_ad]: https://msdn.microsoft.com/library/bb727046.aspx +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[solution-script]: https://raw.githubusercontent.com/mspnp/reference-architectures/master/guidance-identity-adds-trust/Deploy-ReferenceArchitecture.ps1 +[standby-operations-masters]: https://technet.microsoft.com/library/cc794737(v=ws.10).aspx +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[outgoing-trust]: https://raw.githubusercontent.com/mspnp/reference-architectures/master/guidance-identity-adds-trust/extensions/outgoing-trust.ps1 +[verify-a-trust]: https://technet.microsoft.com/library/cc753821.aspx +[0]: ../_images/guidance-identity-aad-resource-forest/figure1.png "Secure hybrid network architecture with separate Active Directory domains" \ No newline at end of file diff --git a/docs/blueprints/identity/adfs.md b/docs/blueprints/identity/adfs.md new file mode 100644 index 00000000000..36fdb273382 --- /dev/null +++ b/docs/blueprints/identity/adfs.md @@ -0,0 +1,339 @@ +--- +title: Implementing Active Directory Federation Services (AD FS) in Azure +description: >- + How to implement a secure hybrid network architecture with Active Directory + Federation Service authorization in Azure. +services: >- + guidance,vpn-gateway,expressroute,load-balancer,virtual-network,active-directory +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: azure-resource-manager +pnp.series.title: Identity management +ms.assetid: 18525321-1926-4447-9db2-cadbdd4c1ab9 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/28/2016 +ms.author: telmos +pnp.series.prev: adds-forest +cardTitle: Extend AD FS to Azure +--- +# Active Directory Federation Services (AD FS) +[!INCLUDE [header](../../_includes/header.md)] + +This article describes how to implement a secure hybrid network that extends your on-premises network to Azure and uses [Active Directory Federation Services (AD FS)][active-directory-federation-services] to perform federated authentication and authorization for components running in Azure. This architecture extends the implementation described in [Extending Active Directory to Azure][extending-ad-to-azure]. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This reference architecture uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +AD FS can be hosted on-premises, but if your application is a hybrid in which some parts are implemented in Azure, it may be more efficient to replicate AD FS in the cloud. Typical uses for this architecture include: + +* Hybrid applications where workloads run partly on-premises and partly in Azure. +* Solutions that use federated authorization to expose web applications to partner organizations. +* Systems that support access from web browsers running outside of the organizational firewall. +* Systems that enable users to access to web applications by connecting from authorized external devices such as remote computers, notebooks, and other mobile devices. + +For more information about how AD FS works, see [Active Directory Federation Services Overview][active-directory-federation-services-overview]. Also, the article [AD FS deployment in Azure][adfs-intro] contains a detailed step-by-step introduction to implementation. + +This reference architecture focuses on *passive federation*, in which the federation servers decide how and when to authenticate a user. The user provides sign in information when the application is started. This mechanism is most commonly used by web browsers and involves a protocol that redirects the browser to a site where the user authenticates. AD FS also supports *active federation*, where an application takes on responsibility for supplying credentials without further user interaction, but that scenario is outside the scope of this architecture. + +## Architecture diagram + +The following diagram highlights the important components in this architecture. + +The diagram shows the following scenarios: + +* Application code from a partner organization accesses a web application hosted inside your Azure VNet. +* An external, registered user with credentials stored inside Active Directory Domain Services (DS) accesses a web application hosted inside your Azure VNet. +* A user connected to your VNet using an authorized device executes a web application hosted inside your Azure VNet. + +> A Visio document that includes this architecture diagram is available for download from the [Microsoft download center][visio-download]. This diagram is on the "Identity - ADFS" page. +> +> + +[![0]][0] + +* **AD DS subnet**. The AD DS servers are contained in their own subnet with network security group (NSG) rules acting as a firewall. + +* **AD DS servers**. Domain controllers running as VMs in Azure. These servers provide authentication of local identities within the domain. + +* **AD FS subnet**. The AD FS servers are located within their own subnet with NSG rules acting as a firewall. + +* **AD FS servers**. The AD FS servers provide federated authorization and authentication. In this architecture, they perform the following tasks: + + * Receiving security tokens containing claims made by a partner federation server on behalf of a partner user. AD FS verifies that the tokens are valid before passing the claims to the web application running in Azure to authorize requests. + + The web application running in Azure is the *relying party*. The partner federation server must issue claims that are understood by the web application. The partner federation servers are referred to as *account partners*, because they submit access requests on behalf of authenticated accounts in the partner organization. The AD FS servers are called *resource partners* because they provide access to resources (the web application). + + * Authenticating and authorizing incoming requests from external users running a web browser or device that needs access to web applications, by using AD DS and the [Active Directory Device Registration Service][ADDRS]. + + The AD FS servers are configured as a farm accessed through an Azure load balancer. This implementation improves availability and scalability. The AD FS servers are not exposed directly to the Internet. All Internet traffic is filtered through AD FS web application proxy servers and a DMZ (also referred to as a perimeter network). + +* **AD FS proxy subnet**. The AD FS proxy servers can be contained within their own subnet, with NSG rules providing protection. The servers in this subnet are exposed to the Internet through a set of network virtual appliances that provide a firewall between your Azure virtual network and the Internet. + +* **AD FS web application proxy (WAP) servers**. These VMs act as AD FS servers for incoming requests from partner organizations and external devices. The WAP servers act as a filter, shielding the AD FS servers from direct access from the Internet. As with the AD FS servers, deploying the WAP servers in a farm with load balancing gives you greater availability and scalability than deploying a collection of stand-alone servers. + + > [!NOTE] + > For detailed information about installing WAP servers, see [Install and Configure the Web Application Proxy Server][install_and_configure_the_web_application_proxy_server] + > + > + +* **Partner organization**. A partner organization running a web application that requests access to a web application running in Azure. The federation server at the partner organization authenticates requests locally, and submits security tokens containing claims to AD FS running in Azure. AD FS in Azure validates the security tokens, and if valid can pass the claims to the web application running in Azure to authorize them. + + > [!NOTE] + > You can also configure a VPN tunnel using Azure gateway to provide direct access to AD FS for trusted partners. Requests received from these partners do not pass through the WAP servers. + > + > + +For more information about the parts of the architecture that are not related to AD FS, see the following: +- [Implementing a secure hybrid network architecture in Azure][implementing-a-secure-hybrid-network-architecture] +- [Implementing a secure hybrid network architecture with Internet access in Azure][implementing-a-secure-hybrid-network-architecture-with-internet-access] +- [Implementing a secure hybrid network architecture with Active Directory identities in Azure][extending-ad-to-azure]. + + +## Recommendations + +The following recommendations apply for most scenarios. Follow these recommendations unless you have a specific requirement that overrides them. + +### VM recommendations + +Create VMs with sufficient resources to handle the expected volume of traffic. Use the size of the existing machines hosting AD FS on premises as a starting point. Monitor the resource utilization. You can resize the VMs and scale down if they are too large. + +Follow the recommendations listed in [Running a Windows VM on Azure][vm-recommendations]. + +### Networking recommendations + +Configure the network interface for each of the VMs hosting AD FS and WAP servers with static private IP addresses. + +Do not give the AD FS VMs public IP addresses. For more information, see the Security considerations section. + +Set the IP address of the preferred and secondary domain name service (DNS) servers for the network interfaces for each AD FS and WAP VM to reference the Active Directory DS VMs. The Active Directory DS VMS should be running DNS. This step is necessary to enable each VM to join the domain. + +### AD FS availability + +Create an AD FS farm with at least two servers to increase availability of the service. Use different storage accounts for each AD FS VM in the farm. This approach helps to ensure that a failure in a single storage account does not make the entire farm inaccessible. + +Create separate Azure availability sets for the AD FS and WAP VMs. Ensure that there are at least two VMs in each set. Each availability set must have at least two update domains and two fault domains. + +Configure the load balancers for the AD FS VMs and WAP VMs as follows: + +* Use an Azure load balancer to provide external access to the WAP VMs, and an internal load balancer to distribute the load across the AD FS servers in the farm. +* Only pass traffic appearing on port 443 (HTTPS) to the AD FS/WAP servers. +* Give the load balancer a static IP address. +* Create a health probe using the TCP protocol rather than HTTPS. You can ping port 443 to verify that an AD FS server is functioning. + + > [!NOTE] + > AD FS servers use the Server Name Indication (SNI) protocol, so attempting to probe using an HTTPS endpoint from the load balancer fails. + > + > +* Add a DNS *A* record to the domain for the AD FS load balancer. Specify the IP address of the load balancer, and give it a name in the domain (such as adfs.contoso.com). This is the name clients and the WAP servers use to access the AD FS server farm. + +### AD FS security + +Prevent direct exposure of the AD FS servers to the Internet. AD FS servers are domain-joined computers that have full authorization to grant security tokens. If a server is compromised, a malicious user can issue full access tokens to all web applications and to all federation servers that are protected by AD FS. If your system must handle requests from external users not connecting from trusted partner sites, use WAP servers to handle these requests. For more information, see [Where to Place a Federation Server Proxy][where-to-place-an-fs-proxy]. + +Place AD FS servers and WAP servers in separate subnets with their own firewalls. You can use NSG rules to define firewall rules. If you require more comprehensive protection you can implement an additional security perimeter around servers by using a pair of subnets and network virtual appliances (NVAs), as described in the document [Implementing a secure hybrid network architecture with Internet access in Azure][implementing-a-secure-hybrid-network-architecture-with-internet-access]. All firewalls should allow traffic on port 443 (HTTPS). + +Restrict direct sign in access to the AD FS and WAP servers. Only DevOps staff should be able to connect. + +Do not join the WAP servers to the domain. + +### AD FS installation + +The article [Deploying a Federation Server Farm][Deploying_a_federation_server_farm] provides detailed instructions for installing and configuring AD FS. Perform the following tasks before configuring the first AD FS server in the farm: + +1. Obtain a publicly trusted certificate for performing server authentication. The *subject name* must contain the name clients use to access the federation service. This can be the DNS name registered for the load balancer, for example, *adfs.contoso.com* (avoid using wildcard names such as **.contoso.com*, for security reasons). Use the same certificate on all AD FS server VMs. You can purchase a certificate from a trusted certification authority, but if your organization uses Active Directory Certificate Services you can create your own. + + The *subject alternative name* is used by the device registration service (DRS) to enable access from external devices. This should be of the form *enterpriseregistration.contoso.com*. + + For more information, see [Obtain and Configure a Secure Sockets Layer (SSL) Certificate for AD FS][adfs_certificates]. + +2. On the domain controller, generate a new root key for the Key Distribution Service. Set the effective time to the current time minus 10 hours (this configuration reduces the delay that can occur in distributing and synchronizing keys across the domain). This step is necessary to support creating the group service account that is used to run the AD FS service. The following PowerShell command shows an example of how to do this: + + ```powershell + Add-KdsRootKey -EffectiveTime (Get-Date).AddHours(-10) + ``` + +3. Add each AD FS server VM to the domain. + +> [!NOTE] +> To install AD FS, the domain controller running the primary domain controller (PDC) emulator flexible single master operation (FSMO) role for the domain must be running and accessible from the AD FS VMs. <> +> +> + +### AD FS trust + +Establish federation trust between your AD FS installation, and the federation servers of any partner organizations. Configure any claims filtering and mapping required. + +* DevOps staff at each partner organization must add a relying party trust for the web applications accessible through your AD FS servers. +* DevOps staff in your organization must configure claims-provider trust to enable your AD FS servers to trust the claims that partner organizations provide. +* DevOps staff in your organization must also configure AD FS to pass claims on to your organization's web applications. + +For more information, see [Establishing Federation Trust][establishing-federation-trust]. + +Publish your organization's web applications and make them available to external partners by using preauthentication through the WAP servers. For more information, see [Publish Applications using AD FS Preauthentication][publish_applications_using_AD_FS_preauthentication] + +AD FS supports token transformation and augmentation. Azure Active Directory does not provide this feature. With AD FS, when you set up the trust relationships, you can: + +* Configure claim transformations for authorization rules. For example, you can map group security from a representation used by a non-Microsoft partner organization to something that that Active Directory DS can authorize in your organization. +* Transform claims from one format to another. For example, you can map from SAML 2.0 to SAML 1.1 if your application only supports SAML 1.1 claims. + +### AD FS monitoring + +The [Microsoft System Center Management Pack for Active Directory Federation Services 2012 R2][oms-adfs-pack] provides both proactive and reactive monitoring of your AD FS deployment for the federation server. This management pack monitors: + +* Events that the AD FS service records in its event logs. +* The performance data that the AD FS performance counters collect. +* The overall health of the AD FS system and web applications (relying parties), and provides alerts for critical issues and warnings. + +## Scalability considerations + +The following considerations, summarized from the article [Plan your AD FS deployment][plan-your-adfs-deployment], give a starting point for sizing AD FS farms: + +* If you have fewer than 1000 users, do not create dedicated servers, but instead install AD FS on each of the Active Directory DS servers in the cloud. Make sure that you have at least two Active Directory DS servers to maintain availability. Create a single WAP server. +* If you have between 1000 and 15000 users, create two dedicated AD FS servers and two dedicated WAP servers. +* If you have between 15000 and 60000 users, create between three and five dedicated AD FS servers and at least two dedicated WAP servers. + +These considerations assume that you are using dual quad-core VM (Standard D4_v2, or better) sizes in Azure. + +If you are using the Windows Internal Database to store AD FS configuration data, you are limited to eight AD FS servers in the farm. If you anticipate that you will need more in the future, use SQL Server. For more information, see [The Role of the AD FS Configuration Database][adfs-configuration-database]. + +## Availability considerations + +You can use either SQL Server or the Windows Internal Database to hold AD FS configuration information. The Windows Internal Database provides basic redundancy. Changes are written directly to only one of the AD FS databases in the AD FS cluster, while the other servers use pull replication to keep their databases up to date. Using SQL Server can provide full database redundancy and high availability using failover clustering or mirroring. + +## Manageability considerations + +DevOps staff should be prepared to perform the following tasks: + +* Managing the federation servers, including managing the AD FS farm, managing trust policy on the federation servers, and managing the certificates used by the federation services. +* Managing the WAP servers including managing the WAP farm and certificates. +* Managing web applications including configuring relying parties, authentication methods, and claims mappings. +* Backing up AD FS components. + +## Security considerations + +AD FS utilizes the HTTPS protocol, so make sure that the NSG rules for the subnet containing the web tier VMs permit HTTPS requests. These requests can originate from the on-premises network, the subnets containing the web tier, business tier, data tier, private DMZ, public DMZ, and the subnet containing the AD FS servers. + +Consider using a set of network virtual appliances that logs detailed information on traffic traversing the edge of your virtual network for auditing purposes. + +## Solution deployment + +A solution is available on [Github][github] to deploy this reference architecture. You will need the latest version of the [Azure CLI][azure-cli] to run the Powershell script that deploys the solution. To deploy the reference architecture, follow these steps: + +1. Download or clone the solution folder from [Github][github] to your local machine. + +2. Open the Azure CLI and navigate to the local solution folder. + +3. Run the following command: + + ```powershell + .\Deploy-ReferenceArchitecture.ps1 + ``` + + Replace `` with your Azure subscription ID. + + For ``, specify an Azure region, such as `eastus` or `westus`. + + The `` parameter controls the granularity of the deployment, and can be one of the following values: + + * `Onpremise`: Deploys a simulated on-premises environment. You can use this deployment to test and experiment if you do not have an existing on-premises network, or if you want to test this reference architecture without changing the configuration of your existing on-premises network. + * `Infrastructure`: deploys the VNet infrastructure and jump box. + * `CreateVpn`: deploys an Azure virtual network gateway and connects it to the simulated on-premises network. + * `AzureADDS`: deploys the VMs acting as ACtive Directory DS servers, deploys Active Directory to these VMs, and creates the domain in Azure. + * `AdfsVm`: deploys the AD FS VMs and joins them to the domain in Azure. + * `PublicDMZ`: deploys the public DMZ in Azure. + * `ProxyVm`: deploys the AD FS proxy VMs and joins them to the domain in Azure. + * `Prepare`: deploys all of the preceding deployments. **This is the recommended option if you are building an entirely new deployment and you don't have an existing on-premises infrastructure.** + * `Workload`: optionally deploys web, business, and data tier VMs and supporting network. Not included in the `Prepare` deployment mode. + * `PrivateDMZ`: optionally deploys the private DMZ in Azure in front of the `Workload` VMs deployed above. Not included in the `Prepare` deployment mode. + +4. Wait for the deployment to complete. If you used the `Prepare` option, the deployment takes several hours to complete, and finishes with the message `Preparation is completed. Please install certificate to all AD FS and proxy VMs.` + +5. Restart the jump box (*ra-adfs-mgmt-vm1* in the *ra-adfs-security-rg* group) to allow its DNS settings to take effect. + +6. [Obtain an SSL Certificate for AD FS][adfs_certificates] and install this certificate on the AD FS VMs. Note that you can connect to them through the jump box. The IP addresses are *10.0.5.4* and *10.0.5.5*. The default username is *contoso\testuser* with password *AweSome@PW*. + + > [!NOTE] + > The comments in the Deploy-ReferenceArchitecture.ps1 script at this point provides detailed instructions for creating a self-signed test certificate and authority using the `makecert` command. However, perform these steps as a **test** only and do not use the certificates generated by makecert in a production environment. + > + > + +7. Run the following PowerShell command to deploy the AD FS server farm: + + ```powershell + .\Deploy-ReferenceArchitecture.ps1 Adfs + ``` + +8. On the jump box, browse to `https://adfs.contoso.com/adfs/ls/idpinitiatedsignon.htm` to test the AD FS installation (you may receive a certificate warning that you can ignore for this test). Verify that the Contoso Corporation sign-in page appears. Sign in as *contoso\testuser* with password *AweS0me@PW*. + +9. Install the SSL certificate on the AD FS proxy VMs. The IP addresses are *10.0.6.4* and *10.0.6.5*. + +10. Run the following PowerShell command to deploy the first AD FS proxy server: + + ```powershell + .\Deploy-ReferenceArchitecture.ps1 Proxy1 + ``` + +11. Follow the instructions displayed by the script to test the installation of the first proxy server. + +12. Run the following PowerShell command to deploy the second proxy server: + + ```powershell + .\Deploy-ReferenceArchitecture.ps1 Proxy2 + ``` + +13. Follow the instructions displayed by the script to test the complete proxy configuration. + +## Next steps + +* Learn about [Azure Active Directory][aad]. +* Learn about [Azure Active Directory B2C][aadb2c]. + + +[extending-ad-to-azure]: adds-extend-domain.md + +[vm-recommendations]: ../virtual-machines-windows/single-vm.md +[implementing-a-secure-hybrid-network-architecture]: ../dmz/secure-vnet-hybrid.md +[implementing-a-secure-hybrid-network-architecture-with-internet-access]: ../dmz/secure-vnet-dmz.md +[hybrid-azure-on-prem-vpn]: ../hybrid-networking/vpn.md + +[naming-conventions]: /azure/guidance/guidance-naming-conventions + +[azure-cli]: /azure/azure-resource-manager/xplat-cli-azure-resource-manager +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[DRS]: https://technet.microsoft.com/library/dn280945.aspx +[where-to-place-an-fs-proxy]: https://technet.microsoft.com/library/dd807048.aspx +[ADDRS]: https://technet.microsoft.com/library/dn486831.aspx +[plan-your-adfs-deployment]: https://msdn.microsoft.com/library/azure/dn151324.aspx +[ad_network_recommendations]: #network_configuration_recommendations_for_AD_DS_VMs +[domain_and_forests]: https://technet.microsoft.com/library/cc759073(v=ws.10).aspx +[adfs_certificates]: https://technet.microsoft.com/library/dn781428(v=ws.11).aspx +[create_service_account_for_adfs_farm]: https://technet.microsoft.com/library/dd807078.aspx +[import_server_authentication_certificate]: https://technet.microsoft.com/library/dd807088.aspx +[adfs-configuration-database]: https://technet.microsoft.com/library/ee913581(v=ws.11).aspx +[active-directory-federation-services]: https://technet.microsoft.com/windowsserver/dd448613.aspx +[security-considerations]: #security-considerations +[recommendations]: #recommendations +[claims-aware applications]: https://msdn.microsoft.com/library/windows/desktop/bb736227(v=vs.85).aspx +[active-directory-federation-services-overview]: https://technet.microsoft.com/library/hh831502(v=ws.11).aspx +[establishing-federation-trust]: https://blogs.msdn.microsoft.com/alextch/2011/06/27/establishing-federation-trust/ +[Deploying_a_federation_server_farm]: https://azure.microsoft.com/documentation/articles/active-directory-aadconnect-azure-adfs/ +[install_and_configure_the_web_application_proxy_server]: https://technet.microsoft.com/library/dn383662.aspx +[publish_applications_using_AD_FS_preauthentication]: https://technet.microsoft.com/library/dn383640.aspx +[managing-adfs-components]: https://technet.microsoft.com/library/cc759026.aspx +[oms-adfs-pack]: https://www.microsoft.com/download/details.aspx?id=41184 +[azure-powershell-download]: https://azure.microsoft.com/documentation/articles/powershell-install-configure/ +[aad]: https://azure.microsoft.com/documentation/services/active-directory/ +[aadb2c]: https://azure.microsoft.com/documentation/services/active-directory-b2c/ +[adfs-intro]: /azure/active-directory/active-directory-aadconnect-azure-adfs +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[github]: https://raw.githubusercontent.com/mspnp/reference-architectures/master/guidance-identity-adfs/Deploy-ReferenceArchitecture.ps1 +[adfs_certificates]: https://technet.microsoft.com/library/dn781428(v=ws.11).aspx +[0]: ../_images/guidance-iaas-ra-secure-vnet-adfs/figure1.png "Secure hybrid network architecture with Active Directory" \ No newline at end of file diff --git a/docs/blueprints/identity/azure-ad.md b/docs/blueprints/identity/azure-ad.md new file mode 100644 index 00000000000..015401f43c6 --- /dev/null +++ b/docs/blueprints/identity/azure-ad.md @@ -0,0 +1,305 @@ +--- +title: Integrate on-premises AD domains with Azure Active Directory +description: >- + How to implement a secure hybrid network architecture using Azure Active + Directory. +services: 'guidance,virtual-network,active-directory' +documentationcenter: na +author: telmosampaio +manager: christb +editor: '' +tags: azure-resource-manager +pnp.series.title: Identity management +ms.assetid: f42fa2c2-2dea-46a2-a916-6dd60082a8da +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/28/2016 +ms.author: telmos +pnp.series.next: adds-extend-domain +pnp.series.prev: ./index +cardTitle: Integrate on-premises AD with Azure AD +--- +# Integrate on-premises Active Directory domains with Azure Active Directory +[!INCLUDE [header](../../_includes/header.md)] + +Azure Active Directory (Azure AD) is a cloud based multi-tenant directory and identity service. This article describes best practices for integrating on-premises Active Directory domains and forests with Azure AD to provide cloud-based identity authentication. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This reference architecture uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +Many organizations use [Active Directory Domain Services (AD DS)][active-directory-domain-services] to authenticate identities associated with users, computers, applications, or other resources that are included in a security boundary. Directory and identity services are typically hosted on-premises, but if your application is hosted partly on-premises and partly in Azure, there may be latency sending authentication requests from Azure back to on-premises. Implementing directory and identity services in Azure can reduce this latency. + +Azure provides two solutions for implementing directory and identity services in Azure: + +* Use [Azure AD][azure-active-directory] to create an Active Directory domain in the cloud and connect it to your on-premises Active Directory domain. [Azure AD Connect][azure-ad-connect] integrates your on-premises directories with Azure AD. + + The Azure AD directory is not an extension of an on-premises directory. Rather, it's a copy that contains the same objects and identities. Changes made to these items on-premises are copied to Azure AD, but changes made in Azure AD are not replicated back to the on-premises domain. + + You can also use Azure AD without using an on-premises directory. In this case, Azure AD acts as the primary source of all identity information, rather than containing data replicated from an on-premises directory. + +* Extend your existing on-premises Active Directory infrastructure to Azure, by deploying a VM in Azure that runs AD DS as a domain controller. This architecture is more common when the on-premises network and the Azure virtual network (VNet) are connected by a VPN or ExpressRoute connection. + +This article describes the first option, an on-premises network synchronizing with Azure AD. For information about the second option, see [Extending Active Directory Domain Services (AD DS) to Azure][adds-extend-domain]. + +Typical uses for this reference architecture include: + +* Web applications deployed in Azure that provide access to remote users who belong to your organization. +* Implementing self-service capabilities for end-users, such as resetting their passwords, and delegating group management. Note that this requires Azure AD Premium edition. +* Architectures in which the on-premises network and the application's Azure VNet are not connected using a VPN tunnel or ExpressRoute circuit. + +> [!NOTE] +> Azure AD currently supports user authentication only. Some applications and services, such as SQL Server, may require computer authentication, in which case this solution is not appropriate. +> +> + +## Architecture diagram +The following diagram highlights the important components in this architecture. This article focuses on the interaction between the Azure AD tenant and the Azure VNet. For more information on the web, business, and data tiers, see [Running VMs for an N-tier architecture on Azure][implementing-a-multi-tier-architecture-on-Azure]: + +> A Visio document that includes this architecture diagram is available for download from the [Microsoft download center][visio-download]. This diagram is on the "Identity - Azure AD" page. +> +> + +[![0]][0] + +> [!NOTE] +> For simplicity, this diagram only shows the connections directly related to Azure AD, and does not show protocol-related traffic that may occur as part of authentication and identity federation. For example, a web application may redirect the web browser to authenticate the request through Azure AD. Once authenticated, the request can be passed back to the web application, with the appropriate identity information. +> +> + +* **Azure AD tenant**. An instance of Azure AD created by your organization. It acts as a directory service for cloud applications by storing objects copied from the on-premises Active Directory and provides identity services. +* **Web tier subnet**. This subnet holds VMs that run a web application. Azure AD can act as an identity broker for this application. +* **On-premises AD DS server**. An on-premise directory and identity service. The AD DS directory can be synchronized with Azure AD to enable it to authenticate on-premise users. +* **Azure AD Connect sync server**. An on-premises computer that runs the Azure AD Connect sync service. This service synchronizes information held in the on-premises Active Directory to Azure AD. For example, if you provision or deprovision groups and users on-premises, these changes propagate to Azure AD. + + > [!NOTE] + > For security reasons, Azure AD stores user's passwords as a hash. If a user requires a password reset, this must be performed on-premises and the new hash must be sent to Azure AD. Azure AD Premium editions include features that can automate this task to enable users to reset their own passwords. + > + > + +## Recommendations + +The following recommendations apply for most scenarios. Follow these recommendations unless you have a specific requirement that overrides them. + +### Azure AD Connect sync service + +The Azure AD Connect sync service ensures that identity information stored in the cloud is consistent with that held on-premises. You install this service using the Azure AD Connect software. + +Before implementing Azure AD Connect sync, determine the synchronization requirements of your organization. For example, what to synchronize, from which domains, and how frequently. For more information, see [Determine directory synchronization requirements][aad-sync-requirements]. + +You can run the Azure AD Connect sync service on a VM or a computer hosted on-premises. Depending on the volatility of the information in your Active Directory directory, the load on the Azure AD Connect sync service is unlikely to be high after the initial synchronization with Azure AD. Running the service on a VM makes it easier to scale the server if needed. Monitor the activity on the VM as described in the Monitoring considerations section to determine whether scaling is necessary. + +If you have multiple on-premises domains in a forest, we recommend storing and synchronizing information for the entire forest to a single Azure AD tenant. Filter information for identities that occur in more than one domain, so that each identity appears only once in Azure AD, rather than being duplicated. Duplication can lead to inconsistencies when data is synchronized. For more information, see the Topology section below. + +Use filtering so that only necessary data is stored in Azure AD. For example, your organization might not want to store information about inactive accounts in Azure AD. Filtering can be group-based, domain-based, organization unit (OU)-based, or attribute-based. You can combine filters to generate more complex rules. For example, you could synchronize objects held in a domain that have a specific value in a selected attribute. For detailed information, see [Azure AD Connect sync: Configure Filtering][aad-filtering]. + +To implement high availability for the AD Connect sync service, run a secondary staging server. For more information, see the Topology recommendations section. + +### Security recommendations + +**User password management.** The Azure AD Premium editions support password writeback, enabling your on-premises users to perform self-service password resets from within the Azure portal. This feature should only be enabled after reviewing your organization's password security policy. For example, you can restrict which users can change their passwords, and you can tailor the password management experience. For more information, see [Customizing Password Management to fit your organization's needs][aad-password-management]. + +**Protect on-premises applications that can be accessed externally.** Use the Azure AD Application Proxy to provide controlled access to on-premises web applications for external users through Azure AD. Only users that have valid credentials in your Azure directory have permission to use the application. For more information, see the article [Enable Application Proxy in the Azure portal][aad-application-proxy]. + +**Actively monitor Azure AD for signs of suspicious activity.** Consider using Azure AD Premium P2 edition, which includes Azure AD Identity Protection. Identity Protection uses adaptive machine learning algorithms and heuristics to detect anomalies and risk events that may indicate that an identity has been compromised. For example, it can detect potentially unusual activity such as irregular sign-in activities, sign-ins from unknown sources or from IP addresses with suspicious activity, or sign-ins from devices that may be infected. Using this data, Identity Protection generates reports and alerts that enables you to investigate these risk events and take appropriate action. For more information, see [Azure Active Directory Identity Protection][aad-identity-protection]. + +You can use the reporting feature of Azure AD in the Azure portal to monitor security-related activities occurring in your system. For more information about using these reports, see [Azure Active Directory Reporting Guide][aad-reporting-guide]. + +### Topology recommendations + +Configure Azure AD Connect to implement a topology that most closely matches the requirements of your organization. Topologies that Azure AD Connect supports include the following: + +* **Single forest, single Azure AD directory**. In this topology, Azure AD Connect synchronizes objects and identity information from one or more domains in a single on-premises forest into a single Azure AD tenant. This is the default topology implemented by the express installation of Azure AD Connect. + + > [!NOTE] + > Don't use multiple Azure AD Connect sync servers to connect different domains in the same on-premises forest to the same Azure AD tenant, unless you are running a server in staging mode, described below. + > + > + +* **Multiple forests, single Azure AD directory**. In this topology, Azure AD Connect synchronizes objects and identity information from multiple forests into a single Azure AD tenant. Use this topology if your organization has more than one on-premises forest. You can consolidate identity information so that each unique user is represented once in the Azure AD directory, even if the same user exists in more than one forest. All forests use the same Azure AD Connect sync server. The Azure AD Connect sync server does not have to be part of any domain, but it must be reachable from all forests. + + > [!NOTE] + > In this topology, don't use separate Azure AD Connect sync servers to connect each on-premises forest to a single Azure AD tenant. This can result in duplicated identity information in Azure AD if users are present in more than one forest. + > + > + +* **Multiple forests, separate topologies**. This topology merges identity information from separate forests into a single Azure AD tenant, treating all forests as separate entities. This topology is useful if you are combining forests from different organizations and the identity information for each user is held in only one forest. + + > [!NOTE] + > If the global address lists (GAL) in each forest are synchronized, a user in one forest may be present in another as a contact. This can occur if your organization has implemented GALSync with Forefront Identity manager 2010 or Microsoft Identity Manager 2016. In this scenario, you can specify that users should be identified by their *Mail* attribute. You can also match identities using the *ObjectSID* and *msExchMasterAccountSID* attributes. This is useful if you have one or more resource forests with disabled accounts. + > + > + +* **Staging server**. In this configuration, you run a second instance of the Azure AD Connect sync server in parallel with the first. This structure supports scenarios such as: + + * High availability. + * Testing and deploying a new configuration of the Azure AD Connect sync server. + * Introducing a new server and decommissioning an old configuration. + + In these scenarios, the second instance runs in *staging mode*. The server records imported objects and synchronization data in its database, but does not pass the data to Azure AD. If you disable staging mode, the server starts writing data to Azure AD, and also starts performing password write-backs into the on-premises directories where appropriate. For more information, see [Azure AD Connect sync: Operational tasks and considerations][aad-connect-sync-operational-tasks]. + +* **Multiple Azure AD directories**. It is recommended that you create a single Azure AD directory for an organization, but there may be situations where you need to partition information across separate Azure AD directories. In this case, avoid synchronization and password write-back issues by ensuring that each object from the on-premises forest appears in only one Azure AD directory. To implement this scenario, configure separate Azure AD Connect sync servers for each Azure AD directory, and use filtering so that each Azure AD Connect sync server operates on a mutually exclusive set of objects. + +For more information about these topologies, see [Topologies for Azure AD Connect][aad-topologies]. + +### User authentication + +By default, the Azure AD Connect sync server configures password synchronization between the on-premises domain and Azure AD, and the Azure AD service assumes that users authenticate by providing the same password that they use on-premises. For many organizations, this is appropriate, but you should consider your organization's existing policies and infrastructure. For example: + +* The security policy of your organization might prohibit synchronizing password hashes to the cloud. +* You might require that users experience seamless single sign-on (SSO) when accessing cloud resources from domain-joined machines on the corporate network. +* Your organization might already have Active Directory Federation Services (AD FS) or a third party federation provider deployed. You can configure Azure AD to use this infrastructure to implement authentication and SSO rather than by using password information held in the cloud. + +For more information, see [Azure AD Connect User Sign on options][aad-user-sign-in]. + +### Azure AD application proxy + +Use Azure AD to provide access to on-premises applications. + +Expose your on-premises web applications using application proxy connectors managed by the Azure AD application proxy component. The application proxy connector opens an outbound network connection to the Azure AD application proxy, and remote users' requests are routed back from Azure AD through this connection to the web apps. This removes the need to open inbound ports in the on-premises firewall and reduces the attack surface exposed by your organization. + +For more information, see [Publish applications using Azure AD Application proxy][aad-application-proxy]. + +### Object synchronization + +Azure AD Connect's default configuration synchronizes objects from your local Active Directory directory based on the rules specified in the article [Azure AD Connect sync: Understanding the default configuration][aad-connect-sync-default-rules]. Objects that satisfy these rules are synchronized while all other objects are ignored. Some example rules: + +* User objects must have a unique *sourceAnchor* attribute and the *accountEnabled* attribute must be populated. +* User objects must have a *sAMAccountName* attribute and cannot start with the text *Azure AD_* or *MSOL_*. + +Azure AD Connect applies several rules to User, Contact, Group, ForeignSecurityPrincipal, and Computer objects. Use the Synchronization Rules Editor installed with Azure AD Connect if you need to modify the default set of rules. For more information, see [Azure AD Connect sync: Understanding the default configuration][aad-connect-sync-default-rules]). + +You can also define your own filters to limit the objects to be synchronized by domain or OU. Alternatively, you can implement more complex custom filtering such as that described in [Azure AD Connect sync: Configure Filtering][aad-filtering]. + +### Monitoring + +Health monitoring is performed by the following agents installed on-premises: + +* Azure AD Connect installs an agent that captures information about synchronization operations. Use the Azure AD Connect Health blade in the Azure portal to monitor its health and performance. For more information, see [Using Azure AD Connect Health for sync][aad-health]. +* To monitor the health of the AD DS domains and directories from Azure, install the Azure AD Connect Health for AD DS agent on a machine within the on-premises domain. Use the Azure Active Directory Connect Health blade in the Azure portal for health monitoring. For more information, see [Using Azure AD Connect Health with AD DS][aad-health-adds] +* Install the Azure AD Connect Health for AD FS agent to monitor the health of services running on on-premises, and use the Azure Active Directory Connect Health blade in the Azure portal to monitor AD FS. For more information, see [Using Azure AD Connect Health with AD FS][aad-health-adfs] + +For more information on installing the AD Connect Health agents and their requirements, see [Azure AD Connect Health Agent Installation][aad-agent-installation]. + +## Scalability considerations + +The Azure AD service supports scalability based on replicas, with a single primary replica that handles write operations plus multiple read-only secondary replicas. Azure AD transparently redirects attempted writes made against secondary replicas to the primary replica and provides eventual consistency. All changes made to the primary replica are propagated to the secondary replicas. This architecture scales well because most operations against Azure AD are reads rather than writes. For more information, see [Azure AD: Under the hood of our geo-redundant, highly available, distributed cloud directory][aad-scalability]. + +For the Azure AD Connect sync server, determine how many objects you are likely to synchronize from your local directory. If you have less than 100,000 objects, you can use the default SQL Server Express LocalDB software provided with Azure AD Connect. If you have a larger number of objects, you should install a production version of SQL Server and perform a custom installation of Azure AD Connect, specifying that it should use an existing instance of SQL Server. + +## Availability considerations + +The Azure AD service is geo-distributed and runs in multiple data centers spread around the world with automated failover. If a data center becomes unavailable, Azure AD ensures that your directory data is available for instance access in at least two more regionally dispersed data centers. + +> [!NOTE] +> The service level agreement (SLA) for Azure AD Basic and Premium services guarantees at least 99.9% availability. There is no SLA for the Free tier of Azure AD. For more information, see [SLA for Azure Active Directory][sla-aad]. +> +> + +Consider provisioning a second instance of Azure AD Connect sync server in staging mode to increase availability, as discussed in the topology recommendations section. + +If you are not using the SQL Server Express LocalDB instance that comes with Azure AD Connect, consider using SQL clustering to achieve high availability. Solutions such as mirroring and Always On are not supported by Azure AD Connect. + +For additional considerations about achieving high availability of the Azure AD Connect sync server and also how to recover after a failure, see [Azure AD Connect sync: Operational tasks and considerations - Disaster Recovery][aad-sync-disaster-recovery]. + +## Manageability considerations + +There are two aspects to managing Azure AD: + +* Administering Azure AD in the cloud. +* Maintaining the Azure AD Connect sync servers. + +Azure AD provides the following options for managing domains and directories in the cloud: + +* **Azure Active Directory PowerShell Module**. Use this [module][aad-powershell] if you need to script common Azure AD administrative tasks such as user management, domain management, and configuring single sign-on. +* **Azure AD management blade in the Azure portal**. This blade provides an interactive management view of the directory, and enables you to control and configure most aspects of Azure AD. + +Azure AD Connect installs the following tools to maintain Azure AD Connect sync services from your on-premises machines: + +* **Microsoft Azure Active Directory Connect console**. This tool enables you to modify the configuration of the Azure AD Sync server, customize how synchronization occurs, enable or disable staging mode, and switch the user sign-in mode. Note that you can enable Active Directory FS sign-in using your on-premises infrastructure. +* **Synchronization Service Manager**. Use the *Operations* tab in this tool to manage the synchronization process and detect whether any parts of the process have failed. You can trigger synchronizations manually using this tool. The *Connectors* tab enables you to control the connections for the domains that the synchronization engine is attached to. +* **Synchronization Rules Editor**. Use this tool to customize the way objects are transformed when they are copied between an on-premises directory and Azure AD. This tool enables you to specify additional attributes and objects for synchronization, then executes filters to determine which objects should or should not be synchronized. For more information, see the Synchronization Rule Editor section in the document [Azure AD Connect sync: Understanding the default configuration][aad-connect-sync-default-rules]. + +For more information and tips for managing Azure AD Connect, see [Azure AD Connect sync: Best practices for changing the default configuration][aad-sync-best-practices]. + +## Security considerations + +Use conditional access control to deny authentication requests from unexpected sources: + +- Trigger [Azure Multi-Factor Authentication (MFA)][azure-multifactor-authentication] if a user attempts to connect from a nontrusted location such as across the Internet instead of a trusted network. + +- Use the device platform type of the user (iOS, Android, Windows Mobile, Windows) to determine access policy to applications and features. + +- Record the enabled/disabled state of users' devices, and incorporate this information into the access policy checks. For example, if a user's phone is lost or stolen it should be recorded as disabled to prevent it from being used to gain access. + +- Control user access to resources based on group membership. Use [Azure AD dynamic membership rules][aad-dynamic-membership-rules] to simplify group administration. For a brief overview of how this works, see [Introduction to Dynamic Memberships for Groups][aad-dynamic-memberships]. + +- Use conditional access risk policies with Azure AD Identity Protection to provide advanced protection based on unusual sign-in activities or other events. + +For more information, see [Azure Active Directory conditional access][aad-conditional-access]. + +## Solution deployment + +A deployment for a reference architecture that implements these recommendations and considerations is available on GitHub. This reference architecture deploys a simulated on-premise network in Azure that you can use to test and experiment. The reference architecture can be deployed with either with Windows or Linux VMs by following the directions below: + +1. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-identity-aad%2Fazuredeploy.json) +2. Once the link has opened in the Azure portal, you must enter values for some of the settings: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-aad-onpremise-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Select **windows** or **linux** in the **Os Type** the drop down box. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +3. Wait for the deployment to complete. +4. The parameter files include a hard-coded administrator user names and passwords, and it is strongly recommended that you immediately change both on all the VMs. Click each VM in the Azure Portal then click on **Reset password** in the **Support + troubleshooting** blade. Select **Reset password** in the **Mode** drop down box, then select a new **User name** and **Password**. Click the **Update** button to persist the new user name and password. + + + +## Next steps +* Learn the best practices for [extending your on-premises ADDS domain to Azure][adds-extend-domain]. +* Learn the best practices for [creating an ADDS resource forest][adds-resource-forest] in Azure. + + +[adds-extend-domain]: adds-extend-domain.md +[adds-resource-forest]: adds-forest.md + +[implementing-a-multi-tier-architecture-on-Azure]: ../virtual-machines-windows/n-tier.md + +[aad-agent-installation]: /azure/active-directory/active-directory-aadconnect-health-agent-install +[aad-application-proxy]: /azure/active-directory/active-directory-application-proxy-enable +[aad-conditional-access]: /azure/active-directory//active-directory-conditional-access +[aad-connect-sync-default-rules]: /azure/active-directory/active-directory-aadconnectsync-understanding-default-configuration +[aad-connect-sync-operational-tasks]: /azure/active-directory/active-directory-aadconnectsync-operations#staging-mode +[aad-dynamic-memberships]: https://youtu.be/Tdiz2JqCl9Q +[aad-dynamic-membership-rules]: /azure/active-directory/active-directory-accessmanagement-groups-with-advanced-rules +[aad-editions]: /azure/active-directory/active-directory-editions +[aad-filtering]: /azure/active-directory/active-directory-aadconnectsync-configure-filtering +[aad-health]: /azure/active-directory/active-directory-aadconnect-health-sync +[aad-health-adds]: /azure/active-directory/active-directory-aadconnect-health-adds +[aad-health-adfs]: /azure/active-directory/active-directory-aadconnect-health-adfs +[aad-identity-protection]: /azure/active-directory/active-directory-identityprotection +[aad-password-management]: /azure/active-directory/active-directory-passwords-customize +[aad-powershell]: https://msdn.microsoft.com/library/azure/mt757189.aspx +[aad-reporting-guide]: /azure/active-directory/active-directory-reporting-guide +[aad-scalability]: https://blogs.technet.microsoft.com/enterprisemobility/2014/09/02/azure-ad-under-the-hood-of-our-geo-redundant-highly-available-distributed-cloud-directory/ +[aad-sync-best-practices]: /azure/active-directory/active-directory-aadconnectsync-best-practices-changing-default-configuration +[aad-sync-disaster-recovery]: /azure/active-directory/active-directory-aadconnectsync-operations#disaster-recovery +[aad-sync-requirements]: /azure/active-directory/active-directory-hybrid-identity-design-considerations-directory-sync-requirements +[aad-topologies]: /azure/active-directory/active-directory-aadconnect-topologies +[aad-user-sign-in]: /azure/active-directory/active-directory-aadconnect-user-signin +[active-directory-domain-services]: https://technet.microsoft.com/library/dd448614.aspx +[ad-azure-guidelines]: https://msdn.microsoft.com/library/azure/jj156090.aspx +[azure-active-directory]: /azure/active-directory-domain-services/active-directory-ds-overview +[azure-ad-connect]: /azure/active-directory/active-directory-aadconnect +[azure-multifactor-authentication]: /azure/multi-factor-authentication/multi-factor-authentication +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[sla-aad]: https://azure.microsoft.com/support/legal/sla/active-directory/v1_0/ +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx + +[0]: ../_images/guidance-identity-aad/figure1.png "Cloud identity architecture using Azure Active Directory" \ No newline at end of file diff --git a/docs/blueprints/identity/considerations.md b/docs/blueprints/identity/considerations.md new file mode 100644 index 00000000000..8a3a359299f --- /dev/null +++ b/docs/blueprints/identity/considerations.md @@ -0,0 +1,88 @@ +--- +title: Managing identity in Azure +description: Explains and compares the different methods available for managing identity in hybrid systems that span the on-premises/cloud boundary with Azure. +--- + +# Considerations +[!INCLUDE [header](../../_includes/header.md)] + +### Azure AD + +Azure AD is a straightforward way to implement a security domain in Azure. It is used by many Microsoft applications, such as Microsoft Office 365. + +You can use Azure AD to create a domain in Azure and link it to an on-premises AD domain. Azure AD enables you to configure single sign-on for users running applications accessed through the cloud. + +Benefits: + +* You don't need to maintain an Active Directory infrastructure in the cloud. Azure AD is entirely managed and maintained by Microsoft. +* Azure AD provides the same identity information that is available on-premises. +* Authentication can happen in Azure, reducing the need for external applications and users to contact the on-premises domain. + +Considerations: + +* Identity services are limited to users and groups. There is no ability to authenticate service and computer accounts. +* You must configure connectivity with your on-premises domain to keep the Azure AD directory synchronized. +* You are responsible for publishing applications that users can access in the cloud through Azure AD. + + +### AD DS in Azure joined to an on-premises forest + +An organization might need to use features that are provided by AD Domain Services (AD DS) but are not currently implemented by Azure AD. You can host AD DS on-premises, but in a hybrid scenario where elements of an application are located in Azure, it can be more efficient to replicate this functionality and the AD repository to the cloud. This approach can help reduce the latency caused by sending authentication and local authorization requests from the cloud back to AD DS running on-premises. + +This approach requires that you create your own domain in the cloud and join it to the on-premises forest. You create VMs to host the AD DS services. + +Benefits: + +* Provides the ability to authenticate user, service, and computer accounts on-premises and in the cloud. +* Provides access to the same identity information that is available on-premises. +* There is no need to manage a separate AD forest; the domain in the cloud can belong to the on-premises forest. +* You can apply group policy defined by on-premises Group Policy Objects to the domain in the cloud. + +Considerations: + +* You must deploy and manage your own AD DS servers and domain in the cloud. +* There may be some synchronization latency between the domain servers in the cloud and the servers running on-premises. + + +### AD DS in Azure with a separate forest + +An organization that runs Active Directory (AD) on-premises might have a forest comprising many different domains. You can use domains to provide isolation between functional areas that must be kept separate, possibly for security reasons, but you can share information between domains by establishing trust relationships. + +An organization that utilizes separate domains can take advantage of Azure by relocating one or more of these domains into a separate forest in the cloud. Alternatively, an organization might wish to keep all cloud resources logically distinct from those held on-premises, and store information about cloud resources in their own directory, as part of a forest also held in the cloud. + +Benefits: + +* You can implement on-premises identities and separate Azure-only identities. +* There is no need to replicate from the on-premises AD forest to Azure. + +Considerations: + +* Authentication for on-premises identities in the cloud performs extra network hops to the on-premises AD servers. +* You must deploy your own AD DS servers and forest in the cloud, and establish the appropriate trust relationships between forests. + + +### AD FS in Azure + +AD FS can run on-premises, but in a hybrid scenario where applications are located in Azure, it can be more efficient to implement this functionality in the cloud. + +This architecture is especially useful for: + +* Solutions that utilize federated authorization to expose web applications to partner organizations. +* Systems that support access from web browsers running outside of the organizational firewall. +* Systems that enable users to access to web applications by connecting from authorized external devices such as remote computers, notebooks, and other mobile devices. + +Benefits: + +* You can leverage claims-aware applications. +* It provides the ability to trust external partners for authentication. +* It provides compatibility with large set of authentication protocols. + +Considerations: + +* You must deploy your own AD DS, AD FS, and AD FS Web Application Proxy servers in the cloud. +* This architecture can be complex to configure. + + + + +[aad]: https://azure.microsoft.com/services/active-directory/ diff --git a/docs/blueprints/identity/images/adds-extend-domain.svg b/docs/blueprints/identity/images/adds-extend-domain.svg new file mode 100644 index 00000000000..55c1385d0d9 --- /dev/null +++ b/docs/blueprints/identity/images/adds-extend-domain.svg @@ -0,0 +1,635 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + On-premises network + + + Azure VNet + + + + + + + DomainController + + Client + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + Domain Controllers + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + Application + + + + + + Active Directory + synchronization + + + diff --git a/docs/blueprints/identity/images/adds-forest.svg b/docs/blueprints/identity/images/adds-forest.svg new file mode 100644 index 00000000000..ff922ca485e --- /dev/null +++ b/docs/blueprints/identity/images/adds-forest.svg @@ -0,0 +1,604 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + On-premises network + + + Azure VNet + + + + + + AD DS trust + relationship + + + + + + + + + AD Forest + + Client + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + AD Forest + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + Application + + + + diff --git a/docs/blueprints/identity/images/adfs.svg b/docs/blueprints/identity/images/adfs.svg new file mode 100644 index 00000000000..7ea4eecd11e --- /dev/null +++ b/docs/blueprints/identity/images/adfs.svg @@ -0,0 +1,668 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + Partner network + + Azure VNet + + + + + + + + + On-premises network + + + + + + Trust relationship + + + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + Domain Controllers + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + AD FS Servers + + + + + diff --git a/docs/blueprints/identity/images/azure-ad.svg b/docs/blueprints/identity/images/azure-ad.svg new file mode 100644 index 00000000000..7991ae58f97 --- /dev/null +++ b/docs/blueprints/identity/images/azure-ad.svg @@ -0,0 +1,639 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1071 + + + + + + + + + + + On-premises network + + + Azure VNet + + + + + + + + + DomainController + + Client + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + + + + + + + + + + + + Azure AD + + + + + + Synchronization + + + + + Authentication + + + + + + Sheet.975 + + Sheet.976 + + + + Sheet.977 + + + + Sheet.978 + + + + + + + diff --git a/docs/blueprints/identity/index.md b/docs/blueprints/identity/index.md new file mode 100644 index 00000000000..36b82a49782 --- /dev/null +++ b/docs/blueprints/identity/index.md @@ -0,0 +1,85 @@ +--- +title: Identity management | Architectural Blueprints +description: Explains and compares the different methods available for managing identity in hybrid systems that span the on-premises/cloud boundary with Azure. +layout: LandingPage +pnp.series.title: Identity management +pnp.series.next: azure-ad +--- + + +# Series overview +[!INCLUDE [header](../../_includes/header.md)] + +Most enterprise systems based on Windows use Active Directory (AD) for identity management. When you extend your network infrastructure to the cloud, there are several options for managing identity. + + + diff --git a/docs/blueprints/identity/series.yml b/docs/blueprints/identity/series.yml new file mode 100644 index 00000000000..c9a6d26b995 --- /dev/null +++ b/docs/blueprints/identity/series.yml @@ -0,0 +1,5 @@ +--- +columns: 2 +summary: Explains and compares the different methods available for managing identity in hybrid systems that span the on-premises/cloud boundary with Azure. +description: Most enterprise systems based on Windows use Active Directory (AD) for identity management. When you extend your network infrastructure to the cloud, there are several options for managing identity. +--- \ No newline at end of file diff --git a/docs/blueprints/index.liquid.md b/docs/blueprints/index.liquid.md new file mode 100644 index 00000000000..16bac5a7178 --- /dev/null +++ b/docs/blueprints/index.liquid.md @@ -0,0 +1,30 @@ +--- +title: Azure | Architecture +description: Architectural Blueprints +layout: LandingPage +tocRel: toc.json +--- + + +# Architectural Blueprints +[!INCLUDE [header](../_includes/header.md)] + +Our reference architectures are arranged by scenario, with multiple related architectures grouped together. +Each individual architecture offers recommended practices and prescriptive steps, as well as an executable component that embodies the recommendations. +Many of the architectures are progressive; building on top of preceding architectures that have fewer requirements. + +{% for item in series -%} +
    +

    {{ item.title }}

    + {%- capture path -%}{{ item.path }}/{%- endcapture -%} + {% include 'series' with item %} +

    {{ item.description }}

    + +
    +{% endfor %} \ No newline at end of file diff --git a/docs/blueprints/index.md b/docs/blueprints/index.md new file mode 100644 index 00000000000..64f882ed628 --- /dev/null +++ b/docs/blueprints/index.md @@ -0,0 +1,423 @@ +--- +title: Azure | Architecture +description: Architectural Blueprints +layout: LandingPage +--- + + +# Architectural Blueprints +[!INCLUDE [header](../_includes/header.md)] + +Our reference architectures are arranged by scenario, with multiple related architectures grouped together. +Each individual architecture offers recommended practices and prescriptive steps, as well as an executable component that embodies the recommendations. +Many of the architectures are progressive; building on top of preceding architectures that have fewer requirements. + +
    +

    Linux VM workloads

    +

    Running a virtual machine (VM) in Azure involves more moving parts than just the VM itself. Other considerations include networking, load balancers, network security groups (NSGs), and redundancy within a region or across multiple regions.

    + +
    +
    +

    Windows VM workloads

    +

    Running a virtual machine (VM) in Azure involves more moving parts than just the VM itself. Other considerations include networking, load balancers, network security groups (NSGs), and redundancy within a region or across multiple regions.

    + +
    +
    +

    Azure App Service

    +

    Azure App Service is a fully managed cloud service for hosting web applications and web APIs. However, most applications require more than just a web tier. For example, a typical application may use a database, cache, or CDN. Other considerations include deployment, diagnostics, and monitoring.

    + +
    +
    +

    Identity management

    +

    Most enterprise systems based on Windows use Active Directory (AD) for identity management. When you extend your network infrastructure to the cloud, there are several options for managing identity.

    + +
    +
    +

    Connect an on-premises network to Azure

    +

    Many organizations wish to integrate an existing on-premises infrastructure with Azure. A key part of this scenario is to establish a secure and robust network connection between the on-premises network and Azure.

    + +
    +
    +

    Network DMZ

    +

    An on-premises network can be connected to a virtual network in Azure by using an Azure VPN gateway. The network boundary between these two environments can expose areas of weakness in terms of security, and it is necessary to protect this boundary to block unauthorized requests. Similar protection is required for applications running on VMs in Azure that are exposed to the public Internet.

    + +
    diff --git a/docs/blueprints/managed-web-app/basic-web-app.md b/docs/blueprints/managed-web-app/basic-web-app.md new file mode 100644 index 00000000000..cc0174e05f0 --- /dev/null +++ b/docs/blueprints/managed-web-app/basic-web-app.md @@ -0,0 +1,262 @@ +--- +title: Basic web application +description: >- + Recommended architecture for a basic web application running in Microsoft + Azure. +services: 'app-service,app-service\web,sql-database' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Azure App Service +pnp.series.prev: ./index +pnp.series.next: web-queue-worker.md +ms.assetid: 12c8df2b-5693-4795-966f-533b54849799 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/23/2016 +ms.author: mwasson +cardTitle: Basic web application +--- +# Basic web application +[!INCLUDE [header](../../_includes/header.md)] + +This reference architecture shows a set of proven practices for a web application that uses [Azure App Service][app-service] and [Azure SQL Database][sql-db]. [**Deploy this solution.**](#deploy-the-solution) + + +![[0]][0] + +## Architecture + +> [!NOTE] +> This architecture doe not focus on application development, and does not assume any particular application framework. The goal is to understand how various Azure services fit together. +> +> + +The architecture has the following components: + +* **Resource group**. A [resource group](/azure/azure-resource-manager/resource-group-overview) is a logical container for Azure resources. +* **App Service app**. [Azure App Service][app-service] is a fully managed platform for creating and deploying cloud applications. +* **App Service plan**. An [App Service plan][app-service-plans] provides the managed virtual machines (VMs) that host your app. All apps associated with a plan run on the same VM instances. + +* **Deployment slots**. A [deployment slot][deployment-slots] lets you stage a deployment and then swap it with the production deployment. That way, you avoid deploying directly into production. See the [Manageability](#manageability-considerations) section for specific recommendations. + +* **IP address**. The App Service app has a public IP address and a domain name. The domain name is a subdomain of `azurewebsites.net`, such as `contoso.azurewebsites.net`. To use a custom domain name, such as `contoso.com`, create domain name service (DNS) records that map the custom domain name to the IP address. For more information, see [Configure a custom domain name in Azure App Service][custom-domain-name]. +* **Azure SQL Database**. [SQL Database][sql-db] is a relational database-as-a-service in the cloud. +* **Logical server**. In Azure SQL Database, a logical server hosts your databases. You can create multiple databases per logical server. +* **Azure Storage**. Create an Azure storage account with a blob container to store diagnostic logs. +* **Azure Active Directory** (Azure AD). Use Azure AD or another identity provider for authentication. + +## Recommendations + +Your requirements might differ from the architecture described here. Use the recommendations in this section as a starting point. + +### App Service plan +Use the Standard or Premium tiers, because they support scale out, autoscale, and secure sockets layer (SSL). Each tier supports several *instance sizes* that differ by number of cores and memory. You can change the tier or instance size after you create a plan. For more information about App Service plans, see [App Service Pricing][app-service-plans-tiers]. + +You are charged for the instances in the App Service plan, even if the app is stopped. Make sure to delete plans that you aren't using (for example, test deployments). + +### SQL Database +Use the [V12 version][sql-db-v12] of SQL Database. SQL Database supports Basic, Standard, and Premium [service tiers][sql-db-service-tiers], with multiple performance levels within each tier measured in [Database Transaction Units (DTUs)][sql-dtu]. Perform capacity planning and choose a tier and performance level that meets your requirements. + +### Region +Provision the App Service plan and the SQL Database in the same region to minimize network latency. Generally, choose the region closest to your users. + +The resource group also has a region, which specifies where deployment metadata is stored. Put the resource group and its resources in the same region. This can improve availability during deployment. + +## Scalability considerations + +A major benefit of Azure App Service is the ability to scale your application based on load. Here are some considerations to keep in mind when planning to scale your application. + +### Scaling the App Service app + +There are two ways to scale an App Service app: + +* *Scale up*, which means changing the instance size. The instance size determines the memory, number of cores, and storage on each VM instance. You can scale up manually by changing the instance size or the plan tier. + +* *Scale out*, which means adding instances to handle increased load. Each pricing tier has a maximum number of instances. + + You can scale out manually by changing the instance count, or use [autoscaling][web-app-autoscale] to have Azure automatically add or remove instances based on a schedule and/or performance metrics. Each scale operation happens quickly—typically within seconds. + + To enable autoscaling, create an autoscale *profile* that defines the minimum and maximum number of instances. Profiles can be scheduled. For example, you might create separate profiles for weekdays and weekends. Optionally, a profile contains rules for when to add or remove instances. (Example: Add two instances if CPU usage is above 70% for 5 minutes.) + +Recommendations for scaling a web app: + +* As much as possible, avoid scaling up and down, because it may trigger an application restart. Instead, select a tier and size that meet your performance requirements under typical load and then scale out the instances to handle changes in traffic volume. +* Enable autoscaling. If your application has a predictable, regular workload, create profiles to schedule the instance counts ahead of time. If the workload is not predictable, use rule-based autoscaling to react to changes in load as they occur. You can combine both approaches. +* CPU usage is generally a good metric for autoscale rules. However, you should load test your application, identify potential bottlenecks, and base your autoscale rules on that data. +* Autoscale rules include a *cool-down* period, which is the interval to wait after a scale action has completed before starting a new scale action. The cool-down period lets the system stabilize before scaling again. Set a shorter cool-down period for adding instances, and a longer cool-down period for removing instances. For example, set 5 minutes to add an instance, but 60 minutes to remove an instance. It's better to add new instances quickly under heavy load to handle the additional traffic, and then gradually scale back. + +### Scaling SQL Database +If you need a higher service tier or performance level for SQL Database, you can scale up individual databases with no application downtime. For details, see [Change the service tier and performance level of a SQL database][sql-db-scale]. + +## Availability considerations +At the time of writing, the service level agreement (SLA) for App Service is 99.95% and the SLA for SQL Database is 99.99% for Basic, Standard, and Premium tiers. + +> [!NOTE] +> The App Service SLA applies to both single and multiple instances. +> +> + +### Backups +In the event of data loss, SQL Database provides point-in-time restore and geo-restore. These features are available in all tiers and are automatically enabled. You don't need to schedule or manage the backups. + +- Use point-in-time restore to [recover from human error][sql-human-error] by returning the database to an earlier point in time. +- Use geo-restore to [recover from a service outage][sql-outage-recovery] by restoring a database from a geo-redundant backup. + +For more information, see [Cloud business continuity and database disaster recovery with SQL Database][sql-backup]. + +App Service provides a [backup and restore][web-app-backup] feature for your application files. However, be aware that the backed-up files include app settings in plain text and these may include secrets, such as connection strings. Avoid using the App Service backup feature to back up your SQL databases because it exports the database to a SQL .bacpac file, consuming [DTUs][sql-dtu]. Instead, use SQL Database point-in-time restore described above. + +## Manageability considerations +Create separate resource groups for production, development, and test environments. This makes it easier to manage deployments, delete test deployments, and assign access rights. + +When assigning resources to resource groups, consider the following: + +* Lifecycle. In general, put resources with the same lifecycle into the same resource group. +* Access. You can use [role-based access control][rbac] (RBAC) to apply access policies to the resources in a group. +* Billing. You can view the rolled-up costs for the resource group. + +For more information, see [Azure Resource Manager overview](/azure/azure-resource-manager/resource-group-overview). + +### Deployment +Deployment involves two steps: + +1. Provisioning the Azure resources. We recommend that you use [Azure Resoure Manager templates][arm-template] for this step. Templates make it easier to automate deployments via PowerShell or the Azure command line interface (CLI). +2. Deploying the application (code, binaries, and content files). You have several options, including deploying from a local Git repository, using Visual Studio, or continuous deployment from cloud-based source control. See [Deploy your app to Azure App Service][deploy]. + +An App Service app always has one deployment slot named `production`, which represents the live production site. We recommend creating a staging slot for deploying updates. The benefits of using a staging slot include: + +* You can verify the deployment succeeded, before swapping it into production. +* Deploying to a staging slot ensures that all instances are warmed up before being swapped into production. Many applications have a significant warmup and cold-start time. + +We also recommend creating a third slot to hold the last-known-good deployment. After you swap staging and production, move the previous production deployment (which is now in staging) into the last-known-good slot. That way, if you discover a problem later, you can quickly revert to the last-known-good version. + +![[1]][1] + +If you revert to a previous version, make sure any database schema changes are backward compatible. + +Don't use slots on your production deployment for testing because all apps within the same App Service plan share the same VM instances. For example, load tests might degrade the live production site. Instead, create separate App Service plans for production and test. By putting test deployments into a separate plan, you isolate them from the production version. + +### Configuration +Store configuration settings as [app settings][app-settings]. Define the app settings in your Resource Manager templates, or using PowerShell. At runtime, app settings are available to the application as environment variables. + +Never check passwords, access keys, or connection strings into source control. Instead, pass these as parameters to a deployment script that stores these values as app settings. + +When you swap a deployment slot, the app settings are swapped by default. If you need different settings for production and staging, you can create app settings that stick to a slot and don't get swapped. + +### Diagnostics and monitoring +Enable [diagnostics logging][diagnostic-logs], including application logging and web server logging. Configure logging to use Blob storage. For performance reasons, create a separate storage account for diagnostic logs. Don't use the same storage account for logs and application data. For more detailed guidance on logging, see [Monitoring and diagnostics guidance][monitoring-guidance]. + +Use a service such as [New Relic][new-relic] or [Application Insights][app-insights] to monitor application performance and behavior under load. Be aware of the [data rate limits][app-insights-data-rate] for Application Insights. + +Perform load testing, using a tool such as [Visual Studio Team Services][vsts]. For a general overview of performance analysis in cloud applications, see [Performance Analysis Primer][perf-analysis]. + +Tips for troubleshooting your application: + +* Use the [troubleshoot blade][troubleshoot-blade] in the Azure portal to find solutions to common problems. +* Enable [log streaming][web-app-log-stream] to see logging information in near-real time. +* The [Kudu dashboard][kudu] has several tools for monitoring and debugging your application. For more information, see [Azure Websites online tools you should know about][kudu] (blog post). You can reach the Kudu dashboard from the Azure portal. Open the blade for your app and click **Tools**, then click **Kudu**. +* If you use Visual Studio, see the article [Troubleshoot a web app in Azure App Service using Visual Studio][troubleshoot-web-app] for debugging and troubleshooting tips. + +## Security considerations +This section lists security considerations that are specific to the Azure services described in this article. It's not a complete list of security best practices. For some additional security considerations, see [Secure an app in Azure App Service][app-service-security]. + +### SQL Database auditing +Auditing can help you maintain regulatory compliance and get insight into discrepancies and irregularities that could indicate business concerns or suspected security violations. See [Get started with SQL database auditing][sql-audit]. + +### Deployment slots +Each deployment slot has a public IP address. Secure the nonproduction slots using [Azure Active Directory login][aad-auth] so that only members of your development and DevOps teams can reach those endpoints. + +### Logging +Logs should never record users' passwords or other information that might be used to commit identity fraud. Scrub those details from the data before storing it. + +### SSL +An App Service app includes an SSL endpoint on a subdomain of `azurewebsites.net` at no additional cost. The SSL endpoint includes a wildcard certificate for the `*.azurewebsites.net` domain. If you use a custom domain name, you must provide a certificate that matches the custom domain. The simplest approach is to buy a certificate directly through the Azure portal. You can also import certificates from other certificate authorities. For more information, see [Buy and Configure an SSL Certificate for your Azure App Service][ssl-cert]. + +As a security best practice, your app should enforce HTTPS by redirecting HTTP requests. You can implement this inside your application or use a URL rewrite rule as described in [Enable HTTPS for an app in Azure App Service][ssl-redirect]. + +### Authentication +We recommend authenticating through an identity provider (IDP), such as Azure AD, Facebook, Google, or Twitter. Use OAuth 2 or OpenID Connect (OIDC) for the authentication flow. Azure AD provides functionality to manage users and groups, create application roles, integrate your on-premises identities, and consume backend services such as Office 365 and Skype for Business. + +Avoid having the application manage user logins and credentials directly, as it creates a potential attack surface. At a minimum, you would need to have email confirmation, password recovery, and multi-factor authentication; validate password strength; and store password hashes securely. The large identity providers handle all of those things for you, and are constantly monitoring and improving their security practices. + +Consider using [App Service authentication][app-service-auth] to implement the OAuth/OIDC authentication flow. The benefits of App Service authentication include: + +* Easy to configure. +* No code is required for simple authentication scenarios. +* Supports delegated authorization using OAuth access tokens to consume resources on behalf of the user. +* Provides a built-in token cache. + +Some limitations of App Service authentication: + +* Limited customization options. +* Delegated authorization is restricted to one backend resource per login session. +* If you use more than one IDP, there is no built-in mechanism for home realm discovery. +* For multi-tenant scenarios, the application must implement the logic to validate the token issuer. + +## Deploy the solution +An example Resoure Manager template for this architecture is [available on GitHub][paas-basic-arm-template]. + +To deploy the template using PowerShell, run the following commands: + +``` +New-AzureRmResourceGroup -Name -Location "West US" + +$parameters = @{"appName"="";"environment"="dev";"locationShort"="uw";"databaseName"="app-db";"administratorLogin"="";"administratorLoginPassword"=""} + +New-AzureRmResourceGroupDeployment -Name -ResourceGroupName -TemplateFile .\PaaS-Basic.json -TemplateParameterObject $parameters +``` + +For more information, see [Deploy resources with Azure Resource Manager templates][deploy-arm-template]. + + + +[aad-auth]: /azure/app-service-mobile/app-service-mobile-how-to-configure-active-directory-authentication +[app-insights]: /azure/application-insights/app-insights-overview +[app-insights-data-rate]: /azure/application-insights/app-insights-pricing +[app-service]: https://azure.microsoft.com/documentation/services/app-service/ +[app-service-auth]: /azure/app-service-api/app-service-api-authentication +[app-service-plans]: /azure/app-service/azure-web-sites-web-hosting-plans-in-depth-overview +[app-service-plans-tiers]: https://azure.microsoft.com/pricing/details/app-service/ +[app-service-security]: /azure/app-service-web/web-sites-security +[app-settings]: /azure/app-service-web/web-sites-configure +[arm-template]: /azure/azure-resource-manager/resource-group-overview#resource-groups +[custom-domain-name]: /azure/app-service-web/web-sites-custom-domain-name +[deploy]: /azure/app-service-web/web-sites-deploy +[deploy-arm-template]: /azure/resource-group-template-deploy +[deployment-slots]: /azure/app-service-web/web-sites-staged-publishing +[diagnostic-logs]: /azure/app-service-web/web-sites-enable-diagnostic-log +[kudu]: https://azure.microsoft.com/blog/windows-azure-websites-online-tools-you-should-know-about/ +[monitoring-guidance]: ../../best-practices/monitoring.md +[new-relic]: http://newrelic.com/ +[paas-basic-arm-template]: https://github.com/mspnp/reference-architectures/tree/master/guidance-web-apps-basic/Paas-Basic/Templates +[perf-analysis]: https://github.com/mspnp/performance-optimization/blob/master/Performance-Analysis-Primer.md +[rbac]: /azure/active-directory/role-based-access-control-what-is +[resource-group]: /azure/azure-resource-manager/resource-group-overview +[sla]: https://azure.microsoft.com/support/legal/sla/ +[sql-audit]: /azure/sql-database/sql-database-auditing-get-started +[sql-backup]: /azure/sql-database/sql-database-business-continuity +[sql-db]: https://azure.microsoft.com/documentation/services/sql-database/ +[sql-db-overview]: /azure/sql-database/sql-database-technical-overview +[sql-db-scale]: /azure/sql-database/sql-database-scale-up-powershell +[sql-db-service-tiers]: /azure/sql-database/sql-database-service-tiers +[sql-db-v12]: /azure/sql-database/sql-database-features +[sql-dtu]: /azure/sql-database/sql-database-service-tiers +[sql-human-error]: /azure/sql-database/sql-database-business-continuity#recover-a-database-after-a-user-or-application-error +[sql-outage-recovery]: /azure/sql-database/sql-database-business-continuity#recover-a-database-to-another-region-from-an-azure-regional-data-center-outage +[ssl-redirect]: /azure/app-service-web/web-sites-configure-ssl-certificate#bkmk_enforce +[sql-resource-limits]: /azure/sql-database/sql-database-resource-limits +[ssl-cert]: /azure/app-service-web/web-sites-purchase-ssl-web-site +[troubleshoot-blade]: https://azure.microsoft.com/updates/self-service-troubleshooting-for-app-service-web-apps-customers/ +[troubleshoot-web-app]: /azure/app-service-web/web-sites-dotnet-troubleshoot-visual-studio +[vsts]: https://www.visualstudio.com/features/vso-cloud-load-testing-vs.aspx +[web-app-autoscale]: /azure/app-service-web/web-sites-scale +[web-app-backup]: /azure/app-service-web/web-sites-backup +[web-app-log-stream]: /azure/app-service-web/web-sites-enable-diagnostic-log#streamlogs +[0]: ../_images/blueprints/paas-basic-web-app.png "Architecture of a basic Azure web application" +[1]: ../_images/blueprints/paas-basic-web-app-staging-slots.png "Swapping slots for production and staging deployments" diff --git a/docs/blueprints/managed-web-app/images/basic-web-app.svg b/docs/blueprints/managed-web-app/images/basic-web-app.svg new file mode 100644 index 00000000000..ecc5febb4ca --- /dev/null +++ b/docs/blueprints/managed-web-app/images/basic-web-app.svg @@ -0,0 +1,274 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/managed-web-app/images/multi-region-web-app-diagram.png b/docs/blueprints/managed-web-app/images/multi-region-web-app-diagram.png new file mode 100644 index 00000000000..ae7ce7933e2 Binary files /dev/null and b/docs/blueprints/managed-web-app/images/multi-region-web-app-diagram.png differ diff --git a/docs/blueprints/managed-web-app/images/multi-region-web-app.svg b/docs/blueprints/managed-web-app/images/multi-region-web-app.svg new file mode 100644 index 00000000000..c18cd928ecf --- /dev/null +++ b/docs/blueprints/managed-web-app/images/multi-region-web-app.svg @@ -0,0 +1,1265 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/managed-web-app/images/scalable-web-app.svg b/docs/blueprints/managed-web-app/images/scalable-web-app.svg new file mode 100644 index 00000000000..13f326d8ada --- /dev/null +++ b/docs/blueprints/managed-web-app/images/scalable-web-app.svg @@ -0,0 +1,852 @@ + + + +image/svg+xmlLayer_1Layer_1-7g4941-3path3-3path5-4g10016path3path5path9g11path13path15path19-7g21path23-0path25pathsg61polygon63path65path67path69polygon71path73g3354path3356path3358path3360path3362path3364g5253path3-4path5-26path7-09path9-77path11-8path13-1path15-3path17-7path19path21path23polygon25g4361g3366-3path3-5-6path5-3-4path7-0path9-9path11-5path13-5path15-8path17-2polygon19-6g10168path3-2path5-2path7-5path9-7path11-9g13path15-2path17-4paths-6g59path61path63path65-4path67-9Sheet.78Sheet.87path5380marker5412path5414Sheet.95path5422marker5424path5426Sheet.100Sheet.101path5422marker5424path5426Sheet.105path5422marker5424path5426 \ No newline at end of file diff --git a/docs/blueprints/managed-web-app/index.md b/docs/blueprints/managed-web-app/index.md new file mode 100644 index 00000000000..cf169253690 --- /dev/null +++ b/docs/blueprints/managed-web-app/index.md @@ -0,0 +1,68 @@ +--- +title: Azure App Service | Architectural Blueprints +description: Recommended architectures for a web applications running in Microsoft Azure. +layout: LandingPage +pnp.series.title: Azure App Service +pnp.series.next: basic-web-app +--- + + +# Series overview +[!INCLUDE [header](../../_includes/header.md)] + +Azure App Service is a fully managed cloud service for hosting web applications and web APIs. However, most applications require more than just a web tier. For example, a typical application may use a database, cache, or CDN. Other considerations include deployment, diagnostics, and monitoring. + + + diff --git a/docs/blueprints/managed-web-app/multi-region-web-app.md b/docs/blueprints/managed-web-app/multi-region-web-app.md new file mode 100644 index 00000000000..fffcef728cd --- /dev/null +++ b/docs/blueprints/managed-web-app/multi-region-web-app.md @@ -0,0 +1,175 @@ +--- +title: Multi-region web application +description: >- + Recommended architecture for web application with high availability, running + in Microsoft Azure. +services: 'app-service,app-service\web,sql-database' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Azure App Service +pnp.series.prev: web-queue-worker.md +ms.assetid: 7b7e3cd7-ecc6-4ca4-a4e9-468b0fa8ad9e +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/23/2016 +ms.author: mwasson +cardTitle: Run in multiple regions +--- +# Run a web application in multiple regions +[!INCLUDE [header](../../_includes/header.md)] + +This reference architecture shows how to run a web application in multiple regions to achieve high availability. + +![Reference architecture: Web application with high availability](./images/multi-region-web-app-diagram.png) + +## Architecture + +This architecture builds on the one shown in [Improve scalability in a web application][guidance-web-apps-scalability]. The main differences are: + +* **Primary and secondary regions**. This architecture uses two regions to achieve higher availability. The application is deployed to each region. During normal operations, network traffic is routed to the primary region. If the primary region becomes unavailable, traffic is routed to the secondary region. +* **Azure Traffic Manager**. [Traffic Manager][traffic-manager] routes incoming requests to the primary region. If the application running that region becomes unavailable, Traffic Manager fails over to the secondary region. +* **Geo-replication** of SQL Database and DocumentDB. + +A multi-region architecture can provide higher availability than deploying to a single region. If a regional outage affects the primary region, you can use [Traffic Manager][traffic-manager] to fail over to the secondary region. This architecture can also help if an individual subsystem of the application fails. + +There are several general approaches to achieving high availability across regions: + +* Active/passive with hot standby. Traffic goes to one region, while the other waits on hot standby. Hot standby means the VMs in the secondary region are allocated and running at all times. +* Active/passive with cold standby. Traffic goes to one region, while the other waits on cold standby. Cold standby means the VMs in the secondary region are not allocated until needed for failover. This approach costs less to run, but will generally take longer to come online during a failure. +* Active/active. Both regions are active, and requests are load balanced between them. If one region becomes unavailable, it is taken out of rotation. + +This reference architecture focuses on active/passive with hot standby, using Traffic Manager for failover. + + +## Recommendations + +Your requirements might differ from the architecture described here. Use the recommendations in this section as a starting point. + +### Regional pairing +Each Azure region is paired with another region within the same geography. In general, choose regions from the same regional pair (for example, East US 2 and Central US). Benefits of doing so include: + +* If there is a broad outage, recovery of at least one region out of every pair is prioritized. +* Planned Azure system updates are rolled out to paired regions sequentially to minimize possible downtime. +* In most cases, regional pairs reside within the same geography to meet data residency requirements. + +However, make sure that both regions support all of the Azure services needed for your application. See [Services by region][services-by-region]. For more information about regional pairs, see [Business continuity and disaster recovery (BCDR): Azure Paired Regions][regional-pairs]. + +### Resource groups +Consider placing the primary region, secondary region, and Traffic Manager into separate [resource groups][resource groups]. This lets you manage the resources deployed to each region as a single collection. + +### Traffic Manager configuration + +**Routing**. Traffic Manager supports several [routing algorithms][tm-routing]. For the scenario described in this article, use *priority* routing (formerly called *failover* routing). With this setting, Traffic Manager sends all requests to the primary region unless the endpoint for that region becomes unreachable. At that point, it automatically fails over to the secondary region. See [Configure Failover routing method][tm-configure-failover]. + +**Health probe**. Traffic Manager uses an HTTP (or HTTPS) probe to monitor the availability of each endpoint. The probe gives Traffic Manager a pass/fail test for failing over to the secondary region. It works by sending a request to a specified URL path. If it gets a non-200 response within a timeout period, the probe fails. After four failed requests, Traffic Manager marks the endpoint as degraded and fails over to the other endpoint. For details, see [Traffic Manager endpoint monitoring and failover][tm-monitoring]. + +As a best practice, create a health probe endpoint that reports the overall health of the application and use this endpoint for the health probe. The endpoint should check critical dependencies such as the App Service apps, storage queue, and SQL Database. Otherwise, the probe might report a healthy endpoint when critical parts of the application are actually failing. + +On the other hand, don't use the health probe to check lower priority services. For example, if an email service goes down the application can switch to a second provider or just send emails later. This is not a high enough priority to cause the application to fail over. For more information, see [Health Endpoint Monitoring Pattern][health-endpoint-monitoring-pattern]. + +### SQL Database +Use [Active Geo-Replication][sql-replication] to create a readable secondary replica in a different region. You can have up to four readable secondary replicas. Fail over to a secondary database if your primary database fails or needs to be taken offline. Active Geo-Replication can be configured for any database in any elastic database pool. + +### DocumentDB +DocumentDB supports geo-replication across regions. One region is designated as writable and the others are read-only replicas. + +If there is a regional outage, you can fail over by selecting another region to be the write region. The DocumentDB client SDK automatically sends write requests to the current write region, so you don't need to update the client configuration after a failover. For more information, see [Distribute data globally with DocumentDB][docdb-geo]. + +> [!NOTE] +> All of the replicas belong to the same resource group. +> +> + +### Storage +For Azure Storage, use [read-access geo-redundant storage][ra-grs] (RA-GRS). With RA-GRS storage, the data is replicated to a secondary region. You have read-only access to the data in the secondary region through a separate endpoint. If there is a regional outage or disaster, the Azure Storage team might decide to perform a geo-failover to the secondary region. There is no customer action required for this failover. + +For Queue storage, create a backup queue in the secondary region. During failover, the app can use the backup queue until the primary region becomes available again. That way, the application can still process new requests. + +## Availability considerations + + +### Traffic Manager + +Traffic Manager automatically fails over if the primary region becomes unavailable. When Traffic Manager fails over, there is a period of time when clients cannot reach the application. The duration is affected by the following factors: + +* The health probe must detect that the primary data center has become unreachable. +* Domain name service (DNS) servers must update the cached DNS records for the IP address, which depends on the DNS time-to-live (TTL). The default TTL is 300 seconds (5 minutes), but you can configure this value when you create the Traffic Manager profile. + +For details, see [About Traffic Manager Monitoring][tm-monitoring]. + +Traffic Manager is a possible failure point in the system. If the service fails, clients cannot access your application during the downtime. Review the [Traffic Manager service level agreement (SLA)][tm-sla] and determine whether using Traffic Manager alone meets your business requirements for high availability. If not, consider adding another traffic management solution as a failback. If the Azure Traffic Manager service fails, change your canonical name (CNAME) records in DNS to point to the other traffic management service. This step must be performed manually, and your application will be unavailable until the DNS changes are propagated. + +### SQL Database +The recovery point objective (RPO) and estimated recovery time (ERT) for SQL Database are documented in [Overview of business continuity with Azure SQL Database][sql-rpo]. + +### Storage +RA-GRS storage provides durable storage, but it's important to understand what can happen during an outage: + +* If a storage outage occurs, there will be a period of time when you don't have write-access to the data. You can still read from the secondary endpoint during the outage. +* If a regional outage or disaster affects the primary location and the data there cannot be recovered, the Azure Storage team may decide to perform a geo-failover to the secondary region. +* Data replication to the secondary region is performed asynchronously. Therefore, if a geo-failover is performed, some data loss is possible if the data can't be recovered from the primary region. +* Transient failures, such as a network outage, will not trigger a storage failover. Design your application to be resilient to transient failures. Possible mitigations: + + * Read from the secondary region. + * Temporarily switch to another storage account for new write operations (for example, to queue messages). + * Copy data from the secondary region to another storage account. + * Provide reduced functionality until the system fails back. + +For more information, see [What to do if an Azure Storage outage occurs][storage-outage]. + +## Manageability Considerations + +### Traffic Manager + +If Traffic Manager fails over, we recommend performing a manual failback rather than implementing an automatic failback. Otherwise, you can create a situation where the application flips back and forth between regions. Verify that all application subsystems are healthy before failing back. + +Note that Traffic Manager automatically fails back by default. To prevent this, manually lower the priority of the primary region after a failover event. For example, suppose the primary region is priority 1 and the secondary is priority 2. After a failover, set the primary region to priority 3, to prevent automatic failback. When you are ready to switch back, update the priority to 1. + +The following commands update the priority. + +**PowerShell** + +```bat +$endpoint = Get-AzureRmTrafficManagerEndpoint -Name -ProfileName -ResourceGroupName -Type AzureEndpoints +$endpoint.Priority = 3 +Set-AzureRmTrafficManagerEndpoint -TrafficManagerEndpoint $endpoint +``` + +For more information, see [Azure Traffic Manager Cmdlets][tm-ps]. + +**Azure command line interface (CLI)** + +```bat +azure network traffic-manager endpoint set --name --profile-name --resource-group --type AzureEndpoints --priority 3 +``` + +### SQL Database +If the primary database fails, perform a manual failover to the secondary database. See [Restore an Azure SQL Database or failover to a secondary][sql-failover]. The secondary database remains read-only until you fail over. + + + + +[azure-sql-db]: https://azure.microsoft.com/documentation/services/sql-database/ +[docdb-geo]: /azure/documentdb/documentdb-distribute-data-globally +[guidance-web-apps-scalability]: web-queue-worker.md.md +[health-endpoint-monitoring-pattern]: https://msdn.microsoft.com/library/dn589789.aspx +[ra-grs]: /azure/storage/storage-redundancy#read-access-geo-redundant-storage +[regional-pairs]: /azure/best-practices-availability-paired-regions +[resource groups]: /azure/azure-resource-manager/resource-group-overview#resource-groups +[services-by-region]: https://azure.microsoft.com/regions/#services +[sql-failover]: /azure/sql-database/sql-database-disaster-recovery +[sql-replication]: /azure/sql-database/sql-database-geo-replication-overview +[sql-rpo]: /azure/sql-database/sql-database-business-continuity#sql-database-features-that-you-can-use-to-provide-business-continuity +[storage-outage]: /azure/storage/storage-disaster-recovery-guidance +[tm-configure-failover]: /azure/traffic-manager/traffic-manager-configure-failover-routing-method +[tm-monitoring]: /azure/traffic-manager/traffic-manager-monitoring +[tm-ps]: https://msdn.microsoft.com/library/mt125941.aspx +[tm-routing]: /azure/traffic-manager/traffic-manager-routing-methods +[tm-sla]: https://azure.microsoft.com/support/legal/sla/traffic-manager/v1_0/ +[traffic-manager]: https://azure.microsoft.com/services/traffic-manager/ diff --git a/docs/blueprints/managed-web-app/series.yml b/docs/blueprints/managed-web-app/series.yml new file mode 100644 index 00000000000..b3e62166e11 --- /dev/null +++ b/docs/blueprints/managed-web-app/series.yml @@ -0,0 +1,5 @@ +--- +columns: 3 +summary: Recommended architectures for a web applications running in Microsoft Azure. +description: Azure App Service is a fully managed cloud service for hosting web applications and web APIs. However, most applications require more than just a web tier. For example, a typical application may use a database, cache, or CDN. Other considerations include deployment, diagnostics, and monitoring. +--- \ No newline at end of file diff --git a/docs/blueprints/managed-web-app/web-queue-worker.md b/docs/blueprints/managed-web-app/web-queue-worker.md new file mode 100644 index 00000000000..64cd2567adc --- /dev/null +++ b/docs/blueprints/managed-web-app/web-queue-worker.md @@ -0,0 +1,158 @@ +--- +title: Scalable web application +description: Improving scalability in a web application running in Microsoft Azure. +services: 'app-service,app-service\web,sql-database' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Azure App Service +pnp.series.prev: basic-web-app +pnp.series.next: multi-region-web-app +ms.assetid: 15459a5a-78e3-45ab-966c-dfc786214d1c +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/23/2016 +ms.author: mwasson +cardTitle: Improve scalability +--- +# Improve scalability in a web application +[!INCLUDE [header](../../_includes/header.md)] + +This reference architecture shows a set of proven practices for improving scalability and performance in a web application running on Microsoft Azure. + +![[0]][0] + +## Architecture + +This architecture builds on the one shown in [Basic web application][basic-web-app]. It includes the following components: + +* **Resource group**. A [resource group][resource-group] is a logical container for Azure resources. +* **[Web app][app-service-web-app]** and **[API app][app-service-api-app]**. A typical modern application might include both a website and one or more RESTful web APIs. A web API might be consumed by browser clients through AJAX, by native client applications, or by server-side applications. For considerations on designing web APIs, see [API design guidance][api-guidance]. +* **WebJob**. Use [Azure WebJobs][webjobs] to run long-running tasks in the background. WebJobs can run on a schedule, continously, or in response to a trigger, such as putting a message on a queue. A WebJob runs as a background process in the context of an App Service app. +* **Queue**. In the architecture shown here, the application queues background tasks by putting a message onto an [Azure Queue storage][queue-storage] queue. The message triggers a function in the WebJob. Alternatively, you can use Service Bus queues. For a comparison, see [Azure Queues and Service Bus queues - compared and contrasted][queues-compared]. +* **Cache**. Store semi-static data in [Azure Redis Cache][azure-redis]. +* **CDN**. Use [Azure Content Delivery Network][azure-cdn] (CDN) to cache publicly available content for lower latency and faster delivery of content. +* **Data storage**. Use [Azure SQL Database][sql-db] for relational data. For non-relational data, consider a NoSQL store, such as Azure Table storage or [DocumentDB][documentdb]. +* **Azure Search**. Use [Azure Search][azure-search] to add search functionality such as search suggestions, fuzzy search, and language-specific search. Azure Search is typically used in conjunction with another data store, especially if the primary data store requires strict consistency. In this approach, store authoritative data in the other data store and the search index in Azure Search. Azure Search can also be used to consolidate a single search index from multiple data stores. +* **Email/SMS**. Use a third-party service such as SendGrid or Twilio to send email or SMS messages instead of building this functionality directly into the application. + +## Recommendations + +Your requirements might differ from the architecture described here. Use the recommendations in this section as a starting point. + +### App Service apps +We recommend creating the web application and the web API as separate App Service apps. This design lets you run them in separate App Service plans so they can be scaled independently. If you don't need that level of scalability initially, you can deploy the apps into the same plan and move them into separate plans later if necessary. + +> [!NOTE] +> For the Basic, Standard, and Premium plans, you are billed for the VM instances in the plan, not per app. See [App Service Pricing][app-service-pricing] +> +> + +If you intend to use the *Easy Tables* or *Easy APIs* features of App Service Mobile Apps, create a separate App Service app for this purpose. These features rely on a specific application framework to enable them. + +### WebJobs +Consider deploying resource intensive WebJobs to an empty App Service app within a separate App Service plan. This provides dedicated instances for the WebJob. See [Background jobs guidance][webjobs-guidance]. + +### Cache +You can improve performance and scalability by using [Azure Redis Cache][azure-redis] to cache some data. Consider using Redis Cache for: + +* Semi-static transaction data. +* Session state. +* HTML output. This can be useful in applications that render complex HTML output. + +For more detailed guidance on designing a caching strategy, see [Caching guidance][caching-guidance]. + +### CDN +Use [Azure CDN][azure-cdn] to cache static content. The main benefit of a CDN is to reduce latency for users, because content is cached at an edge server that is geographically close to the user. CDN can also reduce load on the application, because that traffic is not being handled by the application. + +If your app consists mostly of static pages, consider using [CDN to cache the entire app][cdn-app-service]. Otherwise, put static content such as images, CSS, and HTML files, into [Azure Storage and use CDN to cache those files][cdn-storage-account]. + +> [!NOTE] +> Azure CDN cannot serve content that requires authentication. +> +> + +For more detailed guidance, see [Content Delivery Network (CDN) guidance][cdn-guidance]. + +### Storage +Modern applications often process large amounts of data. In order to scale for the cloud, it's important to choose the right storage type. Here are some baseline recommendations. + +| What you want to store | Example | Recommended storage | +| --- | --- | --- | +| Files |Images, documents, PDFs |Azure Blob Storage | +| Key/Value pairs |User profile data looked up by user ID |Azure Table storage | +| Short messages intended to trigger further processing |Order requests |Azure Queue storage, Service Bus queue, or Service Bus topic | +| Non-relational data with a flexible schema requiring basic querying |Product catalog |Document database, such as Azure DocumentDB, MongoDB, or Apache CouchDB | +| Relational data requiring richer query support, strict schema, and/or strong consistency |Product inventory |Azure SQL Database | + +## Scalability considerations + +A major benefit of Azure App Service is the ability to scale your application based on load. Here are some considerations to keep in mind when planning to scale your application. + +### App Service app +If your solution includes several App Service apps, consider deploying them to separate App Service plans. This approach enables you to scale them independently because they run on separate instances. + +Similarly, consider putting a WebJob into its own plan so that background tasks don't run on the same instances that handle HTTP requests. + +### SQL Database +Increase scalability of a SQL database by *sharding* the database. Sharding refers to partitioning the database horizontally. Sharding allows you to scale out the database horizontally using [Elastic Database tools][sql-elastic]. Potential benefits of sharding include: + +- Better transaction throughput. +- Queries can run faster over a subset of the data. + +### Azure Search +Azure Search removes the overhead of performing complex data searches from the primary data store, and it can scale to handle load. See [Scale resource levels for query and indexing workloads in Azure Search][azure-search-scaling]. + +## Security considerations +This section lists security considerations that are specific to the Azure services described in this article. It's not a complete list of security best practices. For some additional security considerations, see [Secure an app in Azure App Service][app-service-security]. + +### Cross-Origin Resource Sharing (CORS) +If you create a website and web API as separate apps, the website cannot make client-side AJAX calls to the API unless you enable CORS. + +> [!NOTE] +> Browser security prevents a web page from making AJAX requests to another domain. This restriction is called the same-origin policy, and prevents a malicious site from reading sentitive data from another site. CORS is a W3C standard that allows a server to relax the same-origin policy and allow some cross-origin requests while rejecting others. +> +> + +App Services has built-in support for CORS, without needing to write any application code. See [Consume an API app from JavaScript using CORS][cors]. Add the website to the list of allowed origins for the API. + +### SQL Database encryption +Use [Transparent Data Encryption][sql-encryption] if you need to encrypt data at rest in the database. This feature performs real-time encryption and decryption of an entire database (including backups and transaction log files) and requires no changes to the application. Encryption does add some latency, so it's a good practice to separate the data that must be secure into its own database and enable encryption only for that database. + + + + +[api-guidance]: ../../best-practices/api-design.md +[app-service-security]: /azure/app-service-web/web-sites-security +[app-service-web-app]: /azure/app-service-web/app-service-web-overview +[app-service-api-app]: /azure/app-service-api/app-service-api-apps-why-best-platform +[app-service-pricing]: https://azure.microsoft.com/pricing/details/app-service/ +[azure-cdn]: https://azure.microsoft.com/services/cdn/ +[azure-redis]: https://azure.microsoft.com/services/cache/ +[azure-search]: https://azure.microsoft.com/documentation/services/search/ +[azure-search-scaling]: /azure/search/search-capacity-planning +[background-jobs]: ../../best-practices/background-jobs.md +[basic-web-app]: basic-web-app.md +[basic-web-app-scalability]: basic-web-app.md#scalability-considerations +[caching-guidance]: ../../best-practices/caching.md +[cdn-app-service]: /azure/app-service-web/cdn-websites-with-cdn +[cdn-storage-account]: /azure/cdn/cdn-create-a-storage-account-with-cdn +[cdn-guidance]: ../../best-practices/cdn.md +[cors]: /azure/app-service-api/app-service-api-cors-consume-javascript +[documentdb]: https://azure.microsoft.com/documentation/services/documentdb/ +[queue-storage]: /azure/storage/storage-dotnet-how-to-use-queues +[queues-compared]: /azure/service-bus-messaging/service-bus-azure-and-service-bus-queues-compared-contrasted +[resource-group]: /azure/azure-resource-manager/resource-group-overview#resource-groups +[sql-db]: https://azure.microsoft.com/documentation/services/sql-database/ +[sql-elastic]: /azure/sql-database/sql-database-elastic-scale-introduction +[sql-encryption]: https://msdn.microsoft.com/library/dn948096.aspx +[tm]: https://azure.microsoft.com/services/traffic-manager/ +[web-app-multi-region]: ./multi-region-web-app.md +[webjobs-guidance]: ../../best-practices/background-jobs.md +[webjobs]: /azure/app-service/app-service-webjobs-readme +[0]: ../_images/blueprints/paas-web-scalability.png "Web application in Azure with improved scalability" diff --git a/docs/blueprints/toc.md b/docs/blueprints/toc.md new file mode 100644 index 00000000000..77e7afab2b9 --- /dev/null +++ b/docs/blueprints/toc.md @@ -0,0 +1,36 @@ +# [Reference Architectures](./index.md) + +## [Identity management](./identity/index.md) +### [Integrate on-premises AD with Azure AD](./identity/azure-ad.md) +### [Extend AD DS to Azure](./identity/adds-extend-domain.md) +### [Create an AD DS forest in Azure](./identity/adds-forest.md) +### [Extend AD FS to Azure](./identity/adfs.md) + +## [Hybrid network](./hybrid-networking/index.md) +### [Which should I choose?](./hybrid-networking/considerations.md) +### [VPN](./hybrid-networking/vpn.md) +### [ExpressRoute](./hybrid-networking/expressroute.md) +### [ExpressRoute with VPN failover](./hybrid-networking/expressroute-vpn-failover.md) + +## [Network DMZ](./dmz/index.md) +### [DMZ between Azure and on-premises](./dmz/secure-vnet-hybrid.md) +### [DMZ between Azure and the Internet](./dmz/secure-vnet-dmz.md) +### [Highly available network virtual appliances](./dmz/nva-ha.md) + +## [VM workloads (Linux)](./virtual-machines-linux/index.md) +### [Single VM](./virtual-machines-linux/single-vm.md) +### [Load balanced VMs](./virtual-machines-linux/multi-vm.md) +### [N-tier application](./virtual-machines-linux/n-tier.md) +### [Multi-region application](./virtual-machines-linux/multi-region-application.md) + +## [VM workloads (Windows)](./virtual-machines-windows/index.md) +### [Single VM](./virtual-machines-windows/single-vm.md) +### [Load balanced VMs](./virtual-machines-windows/multi-vm.md) +### [N-tier application](./virtual-machines-windows/n-tier.md) +### [Multi-region application](./virtual-machines-windows/multi-region-application.md) + +## [Web application with managed services](./managed-web-app/index.md) +### [Basic web application](./managed-web-app/basic-web-app.md) +### [Web-queue-worker](./managed-web-app/web-queue-worker.md.md) +### [Multi-region deployment](./managed-web-app/multi-region-web-app.md) + diff --git a/docs/blueprints/virtual-machines-linux/images/multi-region-application-diagram.png b/docs/blueprints/virtual-machines-linux/images/multi-region-application-diagram.png new file mode 100644 index 00000000000..3536cfcdd79 Binary files /dev/null and b/docs/blueprints/virtual-machines-linux/images/multi-region-application-diagram.png differ diff --git a/docs/blueprints/virtual-machines-linux/images/multi-region-application.svg b/docs/blueprints/virtual-machines-linux/images/multi-region-application.svg new file mode 100644 index 00000000000..aa5e7f7d23a --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/images/multi-region-application.svg @@ -0,0 +1,1743 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/virtual-machines-linux/images/multi-vm-diagram.png b/docs/blueprints/virtual-machines-linux/images/multi-vm-diagram.png new file mode 100644 index 00000000000..b980eb0e0a3 Binary files /dev/null and b/docs/blueprints/virtual-machines-linux/images/multi-vm-diagram.png differ diff --git a/docs/blueprints/virtual-machines-linux/images/multi-vm.svg b/docs/blueprints/virtual-machines-linux/images/multi-vm.svg new file mode 100644 index 00000000000..6dddb9af199 --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/images/multi-vm.svg @@ -0,0 +1,465 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/virtual-machines-linux/images/n-tier-diagram.png b/docs/blueprints/virtual-machines-linux/images/n-tier-diagram.png new file mode 100644 index 00000000000..6b74ae47067 Binary files /dev/null and b/docs/blueprints/virtual-machines-linux/images/n-tier-diagram.png differ diff --git a/docs/blueprints/virtual-machines-linux/images/n-tier.svg b/docs/blueprints/virtual-machines-linux/images/n-tier.svg new file mode 100644 index 00000000000..dcc1d79da56 --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/images/n-tier.svg @@ -0,0 +1,1142 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/virtual-machines-linux/images/single-vm-diagram.png b/docs/blueprints/virtual-machines-linux/images/single-vm-diagram.png new file mode 100644 index 00000000000..63637827dac Binary files /dev/null and b/docs/blueprints/virtual-machines-linux/images/single-vm-diagram.png differ diff --git a/docs/blueprints/virtual-machines-linux/images/single-vm.svg b/docs/blueprints/virtual-machines-linux/images/single-vm.svg new file mode 100644 index 00000000000..3bfe9edf6c7 --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/images/single-vm.svg @@ -0,0 +1,283 @@ + + + +image/svg+xmlrect4743rect5174g5181path3path5path7path9circle11g6636path6638path6640path6642path6644path6646path6648path6650path6652path6654path6656path4865-4XMLID_1_path12 \ No newline at end of file diff --git a/docs/blueprints/virtual-machines-linux/index.md b/docs/blueprints/virtual-machines-linux/index.md new file mode 100644 index 00000000000..265be0e6b82 --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/index.md @@ -0,0 +1,84 @@ +--- +title: Linux VM workloads | Architectural Blueprints +description: Explains some common architectures for deploying VMs that host enterprise-scale applications in Azure. +layout: LandingPage +pnp.series.title: Linux VM workloads +pnp.series.next: single-vm +--- + + +# Series overview + +Running a virtual machine (VM) in Azure involves more moving parts than just the VM itself. Other considerations include networking, load balancers, network security groups (NSGs), and redundancy within a region or across multiple regions. + + + diff --git a/docs/blueprints/virtual-machines-linux/multi-region-application.md b/docs/blueprints/virtual-machines-linux/multi-region-application.md new file mode 100644 index 00000000000..34b8417ec97 --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/multi-region-application.md @@ -0,0 +1,166 @@ +--- +title: Run Linux VMs in multiple Azure regions for high availability +description: >- + How to deploy VMs in multiple regions on Azure for high availability and + resiliency. +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Linux VM workloads +pnp.series.prev: n-tier +ms.assetid: 11b968f7-3a7f-4b43-bffe-8549d7a772cb +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/22/2016 +ms.author: mwasson +cardTitle: Multi-region application +--- +# Run Linux VMs in multiple regions for high availability + +This reference architecture shows a set of proven practices for running an N-tier application in multiple Azure regions, in order to achieve availability and a robust disaster recovery infrastructure. + +![[0]][0] + +## Architecture + +This architecture builds on the one shown in [Run Linux VMs for an N-tier application](n-tier.md). + +* **Primary and secondary regions**. Use two regions to achieve higher availability. One is the primary region.The other region is for failover. +* **Azure Traffic Manager**. [Traffic Manager][traffic-manager] routes incoming requests to one of the regions. During normal operations, it routes requests to the primary region. If that region becomes unavailable, Traffic Manager fails over to the secondary region. For more information, see the section [Traffic Manager configuration](#traffic-manager-configuration). +* **Resource groups**. Create separate [resource groups][resource groups] for the primary region, the secondary region, and for Traffic Manager. This gives you the flexibility to manage each region as a single collection of resources. For example, you could redeploy one region, without taking down the other one. [Link the resource groups][resource-group-links], so that you can run a query to list all the resources for the application. +* **VNets**. Create a separate VNet for each region. Make sure the address spaces do not overlap. +* **Apache Cassandra**. Deploy Cassandra in data centers across Azure regions for high availability. Within each region, nodes are configured in rack-aware mode with fault and upgrade domains, for resiliency inside the region. + +## Recommendations + +A multi-region architecture can provide higher availability than deploying to a single region. If a regional outage affects the primary region, you can use [Traffic Manager][traffic-manager] to fail over to the secondary region. This architecture can also help if an individual subsystem of the application fails. + +There are several general approaches to achieving high availability across regions: + +* Active/passive with hot standby. Traffic goes to one region, while the other waits on hot standby. Hot standby means the VMs in the secondary region are allocated and running at all times. +* Active/passive with cold standby. Traffic goes to one region, while the other waits on cold standby. Cold standby means the VMs in the secondary region are not allocated until needed for failover. This approach costs less to run, but will generally take longer to come online during a failure. +* Active/active. Both regions are active, and requests are load balanced between them. If one region becomes unavailable, it is taken out of rotation. + +This architecture focuses on active/passive with hot standby, using Traffic Manager for failover. Note that you could deploy a small number of VMs for hot standby and then scale out as needed. + + +### Regional pairing + +Each Azure region is paired with another region within the same geography. In general, choose regions from the same regional pair (for example, East US 2 and US Central). Benefits of doing so include: + +* If there is a broad outage, recovery of at least one region out of every pair is prioritized. +* Planned Azure system updates are rolled out to paired regions sequentially, to minimize possible downtime. +* Pairs reside within the same geography, to meet data residency requirements. + +However, make sure that both regions support all of the Azure services needed for your application (see [Services by region][services-by-region]). For more information about regional pairs, see [Business continuity and disaster recovery (BCDR): Azure Paired Regions][regional-pairs]. + +### Traffic Manager configuration + +Consider the following points when configuring Traffic Manager: + +* **Routing**. Traffic Manager supports several [routing algorithms][tm-routing]. For the scenario described in this article, use *priority* routing (formerly called *failover* routing). With this setting, Traffic Manager sends all requests to the primary region, unless the primary region becomes unreachable. At that point, it automatically fails over to the secondary region. See [Configure Failover routing method][tm-configure-failover]. +* **Health probe**. Traffic Manager uses an HTTP (or HTTPS) [probe][tm-monitoring] to monitor the availability of each region. The probe checks for an HTTP 200 response for a specified URL path. As a best practice, create an endpoint that reports the overall health of the application, and use this endpoint for the health probe. Otherwise, the probe might report a healthy endpoint when critical parts of the application are actually failing. For more information, see [Health Endpoint Monitoring Pattern][health-endpoint-monitoring-pattern]. + +When Traffic Manager fails over there is a period of time when clients cannot reach the application. The duration is affected by the following factors: + +* The health probe must detect that the primary region has become unreachable. +* DNS servers must update the cached DNS records for the IP address, which depends on the DNS time-to-live (TTL). The default TTL is 300 seconds (5 minutes), but you can configure this value when you create the Traffic Manager profile. + +For details, see [About Traffic Manager Monitoring][tm-monitoring]. + +If Traffic Manager fails over, we recommend performing a manual failback rather than implementing an automatic failback. Otherwise, you can create a situation where the application flips back and forth between regions. Verify that all application subsystems are healthy before failing back. + +Note that Traffic Manager automatically fails back by default. To prevent this, manually lower the priority of the primary region after a failover event. For example, suppose the primary region is priority 1 and the secondary is priority 2. After a failover, set the primary region to priority 3, to prevent automatic failback. When you are ready to switch back, update the priority to 1. + +The following [Azure CLI][install-azure-cli] command updates the priority: + +```bat +azure network traffic-manager endpoint set --resource-group --profile-name + --name --type AzureEndpoints --priority 3 +``` + +Another approach is to temporarily disable the endpoint until you are ready to fail back: + +```bat +azure network traffic-manager endpoint set --resource-group --profile-name + --name --type AzureEndpoints --status Disabled +``` + +Depending on the cause of a failover, you might need to redeploy the resources within a region. Before failing back, perform an operational readiness test. The test should verify things like: + +* VMs are configured correctly. (All required software is installed, IIS is running, and so on.) +* Application subsystems are healthy. +* Functional testing. (For example, the database tier is reachable from the web tier.) + +### Cassandra deployment across multiple regions + +Cassandra data centers are a group of related data nodes that are configured together within a cluster for replication and workload segregation. + +We recommend [DataStax Enterprise][datastax] for production use. For more information on running DataStax in Azure, see [DataStax Enterprise Deployment Guide for Azure][cassandra-in-azure]. The following general recommendations apply to any Cassandra edition: + +* Assign a public IP address to each node. This enables the clusters to communicate across regions using the Azure backbone infrastructure, providing high throughput at low cost. +* Secure nodes using the appropriate firewall and network security group (NSG) configurations, allowing traffic only to and from known hosts, including clients and other cluster nodes. Note that Cassandra uses different ports for communication, OpsCenter, Spark, and so forth. For port usage in Cassandra, see [Configuring firewall port access][cassandra-ports]. +* Use SSL encryption for all [client-to-node][ssl-client-node] and [node-to-node][ssl-node-node] communications. +* Within a region, follow the guidelines in [Cassandra recommendations](n-tier.md#cassandra). + +## Availability considerations + +With a complex N-tier app, you may not need to replicate the entire application in the secondary region. Instead, you might just replicate a critical subsystem that is needed to support business continuity. + +Traffic Manager is a possible failure point in the system. If the Traffic Manager service fails, clients cannot access your application during the downtime. Review the [Traffic Manager SLA][tm-sla], and determine whether using Traffic Manager alone meets your business requirements for high availability. If not, consider adding another traffic management solution as a failback. If the Azure Traffic Manager service fails, change your CNAME records in DNS to point to the other traffic management service. (This step must be performed manually, and your application will be unavailable until the DNS changes are propagated.) + +For the Cassandra cluster, the failover scenarios to consider depend on the consistency levels used by the application, as well as the number of replicas used. For consistency levels and usage in Cassandra, see [Configuring data consistency][cassandra-consistency] and [Cassandra: How many nodes are talked to with Quorum?][cassandra-consistency-usage] Data availability in Cassandra is determined by the consistency level used by the application and the replication mechanism. For replication in Cassandra, see [Data Replication in NoSQL Databases Explained][cassandra-replication]. + +## Manageability considerations + +When you update your deployment, update one region at a time to reduce the chance of a global failure from an incorrect configuration or an error in the application. + +Test the resiliency of the system to failures. Here are some common failure scenarios to test: + +* Shut down VM instances. +* Pressure resources such as CPU and memory. +* Disconnect/delay network. +* Crash processes. +* Expire certificates. +* Simulate hardware faults. +* Shut down the DNS service on the domain controllers. + +Measure the recovery times and verify they meet your business requirements. Test combinations of failure modes, as well. + + + +[hybrid-vpn]: ../hybrid-networking/vpn.md + +[azure-sla]: https://azure.microsoft.com/support/legal/sla/ +[cassandra-in-azure]: https://academy.datastax.com/resources/deployment-guide-azure +[cassandra-consistency]: http://docs.datastax.com/en/cassandra/2.0/cassandra/dml/dml_config_consistency_c.html +[cassandra-replication]: http://www.planetcassandra.org/data-replication-in-nosql-databases-explained/ +[cassandra-consistency-usage]: https://medium.com/@foundev/cassandra-how-many-nodes-are-talked-to-with-quorum-also-should-i-use-it-98074e75d7d5#.b4pb4alb2 +[cassandra-ports]: https://docs.datastax.com/en/datastax_enterprise/5.0/datastax_enterprise/sec/configFirewallPorts.html +[datastax]: https://www.datastax.com/products/datastax-enterprise +[health-endpoint-monitoring-pattern]: https://msdn.microsoft.com/library/dn589789.aspx +[install-azure-cli]: /azure/xplat-cli-install +[regional-pairs]: /azure/best-practices-availability-paired-regions +[resource groups]: /azure/azure-resource-manager/resource-group-overview +[resource-group-links]: /azure/resource-group-link-resources +[services-by-region]: https://azure.microsoft.com/regions/#services +[ssl-client-node]: http://docs.datastax.com/en/cassandra/2.0/cassandra/security/secureSSLClientToNode_t.html +[ssl-node-node]: http://docs.datastax.com/en/cassandra/2.0/cassandra/security/secureSSLNodeToNode_t.html +[tablediff]: https://msdn.microsoft.com/library/ms162843.aspx +[tm-configure-failover]: /azure/traffic-manager/traffic-manager-configure-failover-routing-method +[tm-monitoring]: /azure/traffic-manager/traffic-manager-monitoring +[tm-routing]: /azure/traffic-manager/traffic-manager-routing-methods +[tm-sla]: https://azure.microsoft.com/support/legal/sla/traffic-manager/v1_0/ +[traffic-manager]: https://azure.microsoft.com/services/traffic-manager/ +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vnet-dns]: /azure/virtual-network/virtual-networks-manage-dns-in-vnet +[vnet-to-vnet]: /azure/vpn-gateway/vpn-gateway-vnet-vnet-rm-ps +[vpn-gateway]: /azure/vpn-gateway/vpn-gateway-about-vpngateways +[wsfc]: https://msdn.microsoft.com/library/hh270278.aspx +[0]: ./images/multi-region-application-diagram.png "Highly available network architecture for Azure N-tier applications" diff --git a/docs/blueprints/virtual-machines-linux/multi-vm.md b/docs/blueprints/virtual-machines-linux/multi-vm.md new file mode 100644 index 00000000000..5cf71fcd831 --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/multi-vm.md @@ -0,0 +1,198 @@ +--- +title: Run load-balanced VMs on Azure for scalability and availability +description: >- + How to run multiple Linux VMs on Azure for scalability and availability. +services: '' +documentationcenter: na +author: MikeWasson +manager: christb +editor: '' +tags: '' +pnp.series.title: Linux VM workloads +pnp.series.next: n-tier +pnp.series.prev: single-vm +ms.assetid: 6fa4764e-4d97-4b96-ac25-8adf602baf27 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/22/2016 +ms.author: mwasson +cardTitle: Load balanced VMs +--- +# Run load-balanced VMs for scalability and availability + +This reference architecture shows a set of proven practices for running several Linux virtual machines (VMs) behind a load balancer, to improve availability and scalability. This architecture can be used for any stateless workload, such as a web server, and is a building block for deploying N-tier applications. [**Deploy this solution**.](#deploy-the-solution) + +![[0]][0] + +## Architecture + +This architecture builds on the one shown in [Run a Linux VM on Azure][single vm]. The recommendations there also apply to this architecture. + +In this architecture, a workload is distributed across several VM instances. There is a single public IP address, and Internet traffic is distributed to the VMs using a load balancer. This architecture can be used for a single-tier application, such as a stateless web application or storage cluster. It's also a building block for N-tier applications. + +The architecture has the following components: + +* **Availability set**. The [availability set][availability set] contains the VMs. This makes the VMs eligible for the [availability service level agreement (SLA) for Azure VMs][vm-sla]. For the SLA to apply, you need a minimum of two VMs in the same availability set. +* **Virtual network (VNet) and subnet.** Every VM in Azure is deployed into a VNet that is further divided into subnets. +* **Azure Load Balancer**. The [load balancer] distributes incoming Internet requests to the VM instances. The load balancer includes some related resources: + * **Public IP address**. A public IP address is needed for the load balancer to receive Internet traffic. + * **Front-end configuration**. Associates the public IP address with the load balancer. + * **Back-end address pool**. Contains the network interfaces (NICs) for the VMs that will receive the incoming traffic. +* **Load balancer rules**. Used to distribute network traffic among all the VMs in the back-end address pool. +* **Network address translation (NAT) rules**. Used to route traffic to a specific VM. For example, to enable remote desktop protocol (RDP) to the VMs, create a separate NAT rule for each VM. +* **Network interfaces (NICs)**. Each VM has a NIC to connect to the network. +* **Storage**. Storage accounts hold the VM images and other file-related resources, such as VM diagnostic data captured by Azure. + + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This article uses Resource Manager, which Microsoft recommends for new deployments. +> +> + + +## Recommendations + +Your requirements might differ from the architecture described here. Use these recommendations as a starting point. + + +### Availability set recommendations + +Create at least two VMs in the availability set, to support the [availability SLA for Azure VMs][vm-sla]. The Azure load balancer also requires that load-balanced VMs belong to the same availability set. + +Each Azure subscription has default limits in place, including a maximum number of VMs per region. You can increase the limit by filing a support request. For more information, see [Azure subscription and service limits, quotas, and constraints][subscription-limits]. + +### Network recommendations + +Place the VMs within the same subnet. Do not expose the VMs directly to the Internet, but instead give each VM a private IP address. Clients connect using the public IP address of the load balancer. + +### Load balancer recommendations + +Add all VMs in the availability set to the back-end address pool of the load balancer. + +Define load balancer rules to direct network traffic to the VMs. For example, to enable HTTP traffic, create a rule that maps port 80 from the front-end configuration to port 80 on the back-end address pool. When a client sends an HTTP request to port 80, the load balancer selects a back-end IP address by using a [hashing algorithm][load balancer hashing] that includes the source IP address. In that way, client requests are distributed across all the VMs. + +To route traffic to a specific VM, use NAT rules. For example, to enable RDP to the VMs, create a separate NAT rule for each VM. Each rule should map a distinct port number to port 3389, the default port for RDP. For example, use port 50001 for "VM1," port 50002 for "VM2," and so on. Assign the NAT rules to the NICs on the VMs. + +### Storage account recommendations + +Create separate Azure storage accounts for each VM to hold the virtual hard disks (VHDs), in order to avoid hitting the input/output operations per second [(IOPS) limits][vm-disk-limits] for storage accounts. + +Create one storage account for diagnostic logs. This storage account can be shared by all the VMs. + +## Scalability considerations + +To scale out, provision additional VMs and put them in the load balancer's back-end address pool. + +> [!TIP] +> When you add a new VM to an availability set, make sure to create a NIC for the VM, and add the NIC to the back-end address pool on the load balancer. Otherwise, Internet traffic won't be routed to the new VM. +> +> + +### VM scale sets + +Another option for scaling is to use a [virtual machine scale set][vmss]. VM scale sets help you to deploy and manage a set of identical VMs. Scale sets support autoscaling based on performance metrics. As the load on the VMs increases, additional VMs are automatically added to the load balancer. Consider scale sets if you need to quickly scale out VMs, or need to autoscale. + +Currently, scale sets do not support data disks. The options for storing data are Azure File storage, the OS drive, the temp drive, or an external store such as Azure Storage. + +By default, scale sets use "overprovisioning," which means the scale set initially provisions more VMs than you ask for, then deletes the extra VMs. This improves the overall success rate when provisioning the VMs. We recommend no more than 20 VMs per storage account with overprovisioning enabled, or no more than 40 VMs with overprovisioning disabled. + +There are two basic ways to configure VMs deployed in a scale set: + +- Use extensions to configure the VM after it is provisioned. With this approach, new VM instances may take longer to start up than a VM with no extensions. + +- Create a custom image. This option may be quicker to deploy. However, it requires you to keep the image up to date. A scale set built on a custom image must create all OS disk VHDs within one storage account. + +For additional considerations, see [Designing VM Scale Sets For Scale][vmss-design]. + +> [!TIP] +> When using any autoscale solution, test it with production-level work loads well in advance. +> +> + +## Availability considerations + +The availability set makes your application more resilient to both planned and unplanned maintenance events. + +* *Planned maintenance* occurs when Microsoft updates the underlying platform, sometimes causing VMs to be restarted. Azure makes sure the VMs in an availability set are not all restarted at the same time. At least one is kept running while others are restarting. +* *Unplanned maintenance* happens if there is a hardware failure. Azure makes sure that VMs in an availability set are provisioned across more than one server rack. This helps to reduce the impact of hardware failures, network outages, power interruptions, and so on. + +For more information, see [Manage the availability of virtual machines][availability set]. The following video also has a good overview of availability sets: [How Do I Configure an Availability Set to Scale VMs][availability set ch9]. + +> [!WARNING] +> Make sure to configure the availability set when you provision the VM. Currently, there is no way to add a Resource Manager VM to an availability set after the VM is provisioned. +> +> + +The load balancer uses [health probes] to monitor the availability of VM instances. If a probe cannot reach an instance within a timeout period, the load balancer stops sending traffic to that VM. However, the load balancer will continue to probe, and if the VM becomes available again, the load balancer resumes sending traffic to that VM. + +Here are some recommendations on load balancer health probes: + +* Probes can test either HTTP or TCP. If your VMs run an HTTP server, create an HTTP probe. Otherwise create a TCP probe. +* For an HTTP probe, specify the path to an HTTP endpoint. The probe checks for an HTTP 200 response from this path. This can be the root path ("/"), or a health-monitoring endpoint that implements some custom logic to check the health of the application. The endpoint must allow anonymous HTTP requests. +* The probe is sent from a [known][health-probe-ip] IP address, 168.63.129.16. Make sure you don't block traffic to or from this IP in any firewall policies or network security group (NSG) rules. +* Use [health probe logs][health probe log] to view the status of the health probes. Enable logging in the Azure portal for each load balancer. Logs are written to Azure Blob storage. The logs show how many VMs on the back end are not receiving network traffic due to failed probe responses. + +## Manageability considerations + +With multiple VMs, it is important to automate processes so they are reliable and repeatable. You can use [Azure Automation][azure-automation] to automate deployment, OS patching, and other tasks. [Azure Automation][azure-automation] is an automation service based on Windows Powershell that can be used for this. Example automation scripts are available from the [Runbook Gallery] on TechNet. + +## Security considerations + +Virtual networks are a traffic isolation boundary in Azure. VMs in one VNet cannot communicate directly to VMs in a different VNet. VMs within the same VNet can communicate, unless you create [network security groups][nsg] (NSGs) to restrict traffic. For more information, see [Microsoft cloud services and network security][network-security]. + +For incoming Internet traffic, the load balancer rules define which traffic can reach the back end. However, load balancer rules don't support IP safe lists, so if you want to add certain public IP addresses to a safe list, add an NSG to the subnet. + +## Deploy the solution + +A deployment for this architecture is available on [GitHub][github-folder]. It includes a VNet, NSG, load balancer, and two VMs. It can be deployed with either Windows or Linux VMs. To deploy the architecture, follow these steps: + +1. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-compute-multi-vm%2Fazuredeploy.json) +2. Once the link has opened in the Azure portal, you must enter values for some of the settings: + + * The **Resource group** name is already defined in the parameter file, so select **Create new** and enter `ra-multi-vm-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Select either **windows** or **linux** in the **Os Type** drop down box. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +3. Wait for the deployment to complete. +4. The parameter files include a hard-coded administrator user name and password, and it is strongly recommended that you immediately change both. Click the VM named `ra-multi-vm1` in the Azure portal. Then, click **Reset password** in the **Support + troubleshooting** blade. Select **Reset password** in the **Mode** dropdown box, then select a new **User name** and **Password**. Click the **Update** button to save the new user name and password. Repeat for the VM named `ra-multi-vm2`. + + + + + +[n-tier-linux]: n-tier.md +[single vm]: single-vm.md + +[naming conventions]: /azure/guidance/guidance-naming-conventions + +[availability set]: /azure/virtual-machines/virtual-machines-windows-manage-availability +[availability set ch9]: https://channel9.msdn.com/Series/Microsoft-Azure-Fundamentals-Virtual-Machines/08 +[azure-automation]: https://azure.microsoft.com/documentation/services/automation/ +[azure-cli]: /azure/virtual-machines-command-line-tools +[azure-automation]: /azure/automation/automation-intro +[bastion host]: https://en.wikipedia.org/wiki/Bastion_host +[github-folder]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-multi-vm +[health probe log]: /azure/load-balancer/load-balancer-monitor-log +[health probes]: /azure/load-balancer/load-balancer-overview#load-balancer-features +[health-probe-ip]: /azure/virtual-network/virtual-networks-nsg#special-rules +[load balancer]: /azure/load-balancer/load-balancer-get-started-internet-arm-cli +[load balancer hashing]: /azure/load-balancer/load-balancer-overview#load-balancer-features +[network-security]: /azure/best-practices-network-security +[nsg]: /azure/virtual-network/virtual-networks-nsg +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[Runbook Gallery]: /azure/automation/automation-runbook-gallery#runbooks-in-runbook-gallery +[subscription-limits]: /azure/azure-subscription-service-limits +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vm-disk-limits]: /azure/azure-subscription-service-limits#virtual-machine-disk-limits +[vm-sla]: https://azure.microsoft.com/support/legal/sla/virtual-machines/v1_2/ +[vmss]: /azure/virtual-machine-scale-sets/virtual-machine-scale-sets-overview +[vmss-design]: /azure/virtual-machine-scale-sets/virtual-machine-scale-sets-design-overview +[vmss-quickstart]: https://azure.microsoft.com/documentation/templates/?term=scale+set +[VM-sizes]: https://azure.microsoft.com/documentation/articles/virtual-machines-windows-sizes/ +[0]: ./images/multi-vm-diagram.png "Architecture of a multi-VM solution on Azure comprising an availability set with two VMs and a load balancer" \ No newline at end of file diff --git a/docs/blueprints/virtual-machines-linux/n-tier.md b/docs/blueprints/virtual-machines-linux/n-tier.md new file mode 100644 index 00000000000..d12076989a3 --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/n-tier.md @@ -0,0 +1,183 @@ +--- +title: Run Linux VMs for an N-tier application on Azure +description: How to run Linux VMs for an N-tier architecture in Microsoft Azure. +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Linux VM workloads +pnp.series.next: multi-region-application +pnp.series.prev: multi-vm +ms.assetid: d9d8a7bc-763a-4f07-a34a-c3cf91b4d16e +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/22/2016 +ms.author: mwasson +cardTitle: N-tier application +--- +# Run Linux VMs for an N-tier application + +This reference architecture shows a set of proven practices for running Linux virtual machines (VMs) for an N-tier application. [**Deploy this solution**.](#deploy-the-solution) + +![[0]][0] + +## Architecture + +There are many ways to implement an N-tier architecture. The diagram shows a typical 3-tier web application. This architecture builds on [Run load-balanced VMs for scalability and availability][multi-vm]. The web and business tiers use load-balanced VMs. + +* **Availability sets.** Create an [availability set][azure-availability-sets] for each tier, and provision at least two VMs in each tier. This makes the VMs eligible for a higher [service level agreement (SLA)][vm-sla] for VMs. +* **Subnets.** Create a separate subnet for each tier. Specify the address range and subnet mask using [CIDR] notation. +* **Load balancers.** Use an [Internet-facing load balancer][load-balancer-external] to distribute incoming Internet traffic to the web tier, and an [internal load balancer][load-balancer-internal] to distribute network traffic from the web tier to the business tier. +* **Jumpbox.** Also called a [bastion host]. A secure VM on the network that administrators use to connect to the other VMs. The jumpbox has an NSG that allows remote traffic only from public IP addresses on a safe list. The NSG should permit secure shell (SSH) traffic. +* **Monitoring.** Monitoring software such as [Nagios], [Zabbix], or [Icinga] can give you insight into response time, VM uptime, and the overall health of your system. Install the monitoring software on a VM that's placed in a separate management subnet. +* **NSGs.** Use [network security groups][nsg] (NSGs) to restrict network traffic within the VNet. For example, in the 3-tier architecture shown here, the database tier does not accept traffic from the web front end, only from the business tier and the management subnet. +* **Apache Cassandra database**. Provides high availability at the data tier, by enabling replication and failover. + + + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This article uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +## Recommendations + +Your requirements might differ from the architecture described here. Use these recommendations as a starting point. + +### VNet / Subnets + +When you create the VNet, determine how many IP addresses your resources in each subnet require. Specify a subnet mask and a VNet address range large enough for the required IP addresses using [CIDR] notation. Use an address space that falls within the standard [private IP address blocks][private-ip-space], which are 10.0.0.0/8, 172.16.0.0/12, and 192.168.0.0/16. + +Choose an address range that does not overlap with your on-premises network, in case you need to set up a gateway between the VNet and your on-premises network later. Once you create the VNet, you can't change the address range. + +Design subnets with functionality and security requirements in mind. All VMs within the same tier or role should go into the same subnet, which can be a security boundary. For more information about designing VNets and subnets, see [Plan and design Azure Virtual Networks][plan-network]. + +For each subnet, specify the address space for the subnet in CIDR notation. For example, '10.0.0.0/24' creates a range of 256 IP addresses. VMs can use 251 of these; five are reserved. Make sure the address ranges don't overlap across subnets. See the [Virtual Network FAQ][vnet faq]. + +### Network security groups + +Use NSG rules to restrict traffic between tiers. For example, in the 3-tier architecture shown above, the web tier does not communicate directly with the database tier. To enforce this, the database tier should block incoming traffic from the web tier subnet. + +1. Create an NSG and associate it to the database tier subnet. +2. Add a rule that denies all inbound traffic from the VNet. (Use the `VIRTUAL_NETWORK` tag in the rule.) +3. Add a rule with a higher priority that allows inbound traffic from the business tier subnet. This rule overrides the previous rule, and allows the business tier to talk to the database tier. +4. Add a rule that allows inbound traffic from within the database tier subnet itself. This rule allows communication between VMs in the database tier, which is needed for database replication and failover. +5. Add a rule that allows SSH traffic from the jumpbox subnet. This rule lets administrators connect to the database tier from the jumpbox. + + > [!NOTE] + > An NSG has [default rules][nsg-rules] that allow any inbound traffic from within the VNet. These rules can't be deleted, but you can override them by creating higher-priority rules. + > + > + +### Load balancers + +The external load balancer distributes Internet traffic to the web tier. Create a public IP address for this load balancer. See [Creating an Internet-facing load balancer][lb-external-create]. + +The internal load balancer distributes network traffic from the web tier to the business tier. To give this load balancer a private IP address, create a frontend IP configuration and associate it with the subnet for the business tier. See [Get started creating an Internal load balancer][lb-internal-create]. + +### Cassandra + +We recommend [DataStax Enterprise][datastax] for production use, but these recommendations apply to any Cassandra edition. For more information on running DataStax in Azure, see [DataStax Enterprise Deployment Guide for Azure][cassandra-in-azure]. + +Put the VMs for a Cassandra cluster in an availability set to ensure that the Cassandra replicas are distributed across multiple fault domains and upgrade domains. For more information about fault domains and upgrade domains, see [Manage the availability of virtual machines][availability-sets-manage]. + +Configure three fault domains (the maximum) per availability set and 18 upgrade domains per availability set. This provides the maximum number of upgrade domains that can still be distributed evenly across the fault domains. + +Configure nodes in rack-aware mode. Map fault domains to racks in the `cassandra-rackdc.properties` file. + +You don't need a load balancer in front of the cluster. The client connects directly to a node in the cluster. + +### Jumpbox + +The jumpbox will have minimal performance requirements, so select a small VM size for the jumpbox such as Standard A1. + +Create a [public IP address] for the jumpbox. Place the jumpbox in the same VNet as the other VMs, but in a separate management subnet. + +Do not allow SSH access from the public Internet to the VMs that run the application workload. Instead, all SSH access to these VMs must come through the jumpbox. An administrator logs into the jumpbox, and then logs into the other VM from the jumpbox. The jumpbox allows SSH traffic from the Internet, but only from known, safe IP addresses. + +To secure the jumpbox, create an NSG and apply it to the jumpbox subnet. Add an NSG rule that allows SSH connections only from a safe set of public IP addresses. The NSG can be attached either to the subnet or to the jumpbox NIC. In this case, we recommend attaching it to the NIC, so SSH traffic is permitted only to the jumpbox, even if you add other VMs to the same subnet. + +Configure the NSGs for the other subnets to allow SSH traffic from the management subnet. + +## Availability considerations + +Put each tier or VM role into a separate availability set. + +At the database tier, having multiple VMs does not automatically translate into a highly available database. For a relational database, you will typically need to use replication and failover to achieve high availability. + +If you need higher availability than the [Azure SLA for VMs][vm-sla] provides, replicate the application across two regions and use Azure Traffic Manager for failover. For more information, see [Run Linux VMs in multiple regions for high availability][multi-dc]. + +## Security considerations + +Consider adding a network virtual appliance (NVA) to create a DMZ between the public Internet and the Azure virtual network. NVA is a generic term for a virtual appliance that can perform network-related tasks such as firewall, packet inspection, auditing, and custom routing. For more information, see [Implementing a DMZ between Azure and the Internet][dmz]. + +## Scalability considerations + +The load balancers distribute network traffic to the web and business tiers. Scale horizontally by adding new VM instances. Note that you can scale the web and business tiers independently, based on load. To reduce possible complications caused by the need to maintain client affinity, the VMs in the web tier should be stateless. The VMs hosting the business logic should also be stateless. + +## Manageability considerations + +Simplify management of the entire system by using centralized administration tools such as [Azure Automation][azure-administration], [Microsoft Operations Management Suite][operations-management-suite], [Chef][chef], or [Puppet][puppet]. These tools can consolidate diagnostic and health information captured from multiple VMs to provide an overall view of the system. + +## Deploy the solution + +A deployment for this architecture is available on [GitHub][github-folder]. The architecture is deployed in three stages. To deploy the architecture, follow these steps: + +1. Right click the button below and select "open in new tab" or "open in new window." + [!["Deploy To Azure"][1]][2] +2. Once the link has opened in the Azure portal, enter the follow values: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-ntier-sql-network-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click on the **Purchase** button. +3. Check Azure portal notification for a message the deployment is complete. +4. The parameter files include a hard-coded administrator user names and passwords, and it is strongly recommended that you immediately change both on all the VMs. Click on each VM in the Azure portal then click on **Reset password** in the **Support + troubleshooting** blade. Select **Reset password** in the **Mode** dropdown box, then select a new **User name** and **Password**. Click the **Update** button to persist the new user name and password. + + +[multi-dc]: multi-region-application.md +[dmz]: ../dmz/secure-vnet-dmz.md +[multi-vm]: ./multi-vm.md +[naming conventions]: /azure/guidance/guidance-naming-conventions + +[azure-administration]: /azure/automation/automation-intro +[azure-availability-sets]: /azure/virtual-machines/virtual-machines-windows-manage-availability#configure-each-application-tier-into-separate-availability-sets +[availability-sets-manage]: /azure/virtual-machines/virtual-machines-windows-manage-availability +[bastion host]: https://en.wikipedia.org/wiki/Bastion_host +[cassandra-consistency]: http://docs.datastax.com/en/cassandra/2.0/cassandra/dml/dml_config_consistency_c.html +[cassandra-consistency-usage]: http://medium.com/@foundev/cassandra-how-many-nodes-are-talked-to-with-quorum-also-should-i-use-it-98074e75d7d5#.b4pb4alb2 +[cassandra-in-azure]: https://docs.datastax.com/en/datastax_enterprise/4.5/datastax_enterprise/install/installAzure.html +[cassandra-replication]: http://www.planetcassandra.org/data-replication-in-nosql-databases-explained/ +[cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing +[chef]: https://www.chef.io/solutions/azure/ +[datastax]: http://www.datastax.com/products/datastax-enterprise +[github-folder]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier +[lb-external-create]: /azure/load-balancer/load-balancer-get-started-internet-portal +[lb-internal-create]: /azure/load-balancer/load-balancer-get-started-ilb-arm-portal +[load-balancer-external]: /azure/load-balancer/load-balancer-internet-overview +[load-balancer-internal]: /azure/load-balancer/load-balancer-internal-overview +[nsg]: /azure/virtual-network/virtual-networks-nsg +[nsg-rules]: /azure/azure-resource-manager/best-practices-resource-manager-security#network-security-groups +[operations-management-suite]: https://www.microsoft.com/server-cloud/operations-management-suite/overview.aspx +[plan-network]: /azure/virtual-network/virtual-network-vnet-plan-design-arm +[private-ip-space]: https://en.wikipedia.org/wiki/Private_network#Private_IPv4_address_spaces +[public IP address]: /azure/virtual-network/virtual-network-ip-addresses-overview-arm +[puppet]: https://puppetlabs.com/blog/managing-azure-virtual-machines-puppet +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[vm-sla]: https://azure.microsoft.com/support/legal/sla/virtual-machines +[vnet faq]: /azure/virtual-network/virtual-networks-faq +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[Nagios]: https://www.nagios.org/ +[Zabbix]: http://www.zabbix.com/ +[Icinga]: http://www.icinga.org/ +[0]: ./images/n-tier-diagram.png "N-tier architecture using Microsoft Azure" +[1]: ../_images/blueprints/deploybutton.png +[2]: https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-compute-n-tier%2Fazuredeploy.json + + diff --git a/docs/blueprints/virtual-machines-linux/series.yml b/docs/blueprints/virtual-machines-linux/series.yml new file mode 100644 index 00000000000..033876c466d --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/series.yml @@ -0,0 +1,5 @@ +--- +columns: 2 +summary: Explains some common architectures for deploying VMs that host enterprise-scale applications in Azure. +description: Running a virtual machine (VM) in Azure involves more moving parts than just the VM itself. Other considerations include networking, load balancers, network security groups (NSGs), and redundancy within a region or across multiple regions. +--- \ No newline at end of file diff --git a/docs/blueprints/virtual-machines-linux/single-vm.md b/docs/blueprints/virtual-machines-linux/single-vm.md new file mode 100644 index 00000000000..e5b9b12ffb7 --- /dev/null +++ b/docs/blueprints/virtual-machines-linux/single-vm.md @@ -0,0 +1,238 @@ +--- +title: Run a Linux VM on Azure +description: >- + How to run a Linux VM on Azure, paying attention to scalability, resiliency, + manageability, and security. +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Linux VM workloads +pnp.series.next: multi-vm +ms.assetid: 9a77b10a-c364-46d7-b28a-1852cd2016f0 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/22/2016 +ms.author: mwasson +pnp.series.prev: ./index +cardTitle: Single VM +--- +# Run a Linux VM on Azure + +This reference architecture shows a set of proven practices for running a Linux virtual machine (VM) on Azure. It includes recommendations for provisioning the VM along with networking and storage components. This architecture can be used to run a single instance, and is the basis for more complex architectures such as N-tier applications. [**Deploy this solution**.](#deploy-the-solution) + +![[0]][0] + +## Architecture + + +Provisioning a VM in Azure involves more moving parts than just the VM itself. There are compute, networking, and storage elements that you need to consider. + +* **Resource group.** A [*resource group*][resource-manager-overview] is a container that holds related resources. Create a resource group to hold the resources for this VM. +* **VM**. Azure supports running various popular Linux distributions, including CentOS, Debian, Red Hat Enterprise, Ubuntu, and FreeBSD. For more information, see [Azure and Linux][azure-linux]. You can provision a VM from a list of published images or from a virtual hard disk (VHD) file that you upload to Azure Blob storage. +* **OS disk.** The OS disk is a VHD stored in [Azure Storage][azure-storage]. That means it persists even if the host machine goes down. The OS disk is `/dev/sda1`. +* **Temporary disk.** The VM is created with a temporary disk. This disk is stored on a physical drive on the host machine. It is *not* saved in Azure Storage, and might be deleted during reboots and other VM lifecycle events. Use this disk only for temporary data, such as page or swap files. The temporary disk is `/dev/sdb1` and is mounted at `/mnt/resource` or `/mnt`. +* **Data disks.** A [data disk][data-disk] is a persistent VHD used for application data. Data disks are stored in Azure Storage, like the OS disk. +* **Virtual network (VNet) and subnet.** Every VM in Azure is deployed into a VNet that is further divided into subnets. +* **Public IP address.** A public IP address is needed to communicate with the VM—for example over SSH. +* **Network interface (NIC)**. The NIC enables the VM to communicate with the virtual network. +* **Network security group (NSG)**. The [NSG][nsg] is used to allow/deny network traffic to the subnet. You can associate an NSG with an individual NIC or with a subnet. If you associate it with a subnet, the NSG rules apply to all VMs in that subnet. +* **Diagnostics.** Diagnostic logging is crucial for managing and troubleshooting the VM. + + + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This article uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +## Recommendations + +This architecture shows the baseline recommendations for running a Windows VM in Azure. However, we don't recommend using a single VM for mission critical workloads, because it creates a single point of failure. For higher availability, deploy multiple VMs in an [availability set][availability-set]. For more information, see [Running multiple VMs on Azure][multi-vm]. + +### VM recommendations + +Azure offers many different virtual machine sizes, but we recommend the DS- and GS-series because these machine sizes support [Premium Storage][premium-storage]. Select one of these machine sizes unless you have a specialized workload such as high-performance computing. For details, see [virtual machine sizes][virtual-machine-sizes]. + +If you are moving an existing workload to Azure, start with the VM size that's the closest match to your on-premises servers. Then measure the performance of your actual workload with respect to CPU, memory, and disk input/output operations per second (IOPS), and adjust the size if needed. If you require multiple NICs for your VM, be aware that the maximum number of NICs is a function of the [VM size][vm-size-tables]. + +When you provision the VM and other resources, you must specify a region. Generally, choose a region closest to your internal users or customers. However, not all VM sizes may be available in all region. For details, see [Services by region][services-by-region]. To list the VM sizes available in a given region, run the following Azure command-line interface (CLI) command: + +``` +azure vm sizes --location +``` + +For information about choosing a published VM image, see [Select Linux VM images with the Azure CLI][select-vm-image]. + +### Disk and storage recommendations + +For best disk I/O performance, we recommend [Premium Storage][premium-storage], which stores data on solid-state drives (SSDs). Cost is based on the size of the provisioned disk. IOPS and throughput (that is, data transfer rate) also depend on disk size, so when you provision a disk, consider all three factors (capacity, IOPS, and throughput). + +Create separate Azure storage accounts for each VM to hold the virtual hard disks (VHDs) in order to avoid hitting the IOPS limits for storage accounts. + +Add one or more data disks. When you create a VHD, it is unformatted. Log in to the VM to format the disk. In the Linux shell, data disks are displayed as `/dev/sdc`, `/dev/sdd`, and so on. You can run `lsblk` to list the block devices, including the disks. To use a data disk, create a partition and file system, and mount the disk. For example: + +```bat +# Create a partition. +sudo fdisk /dev/sdc # Enter 'n' to partition, 'w' to write the change. + +# Create a file system. +sudo mkfs -t ext3 /dev/sdc1 + +# Mount the drive. +sudo mkdir /data1 +sudo mount /dev/sdc1 /data1 +``` + +If you have a large number of data disks, be aware of the total I/O limits of the storage account. For more information, see [virtual machine disk limits][vm-disk-limits]. + +When you add a data disk, a logical unit number (LUN) ID is assigned to the disk. Optionally, you can specify the LUN ID — for example, if you're replacing a disk and want to retain the same LUN ID, or you have an application that looks for a specific LUN ID. However, remember that LUN IDs must be unique for each disk. + +You may want to change the I/O scheduler to optimize for performance on SSDs, because the disks for VMs with premium storage accounts are SSDs. A common recommendation is to use the NOOP scheduler for SSDs, but you should use a tool such as [iostat] to monitor disk I/O performance for your particular workload. + +For best performance, create a separate storage account to hold diagnostic logs. A standard locally redundant storage (LRS) account is sufficient for diagnostic logs. + +### Network recommendations + +The public IP address can be dynamic or static. The default is dynamic. + +* Reserve a [static IP address][static-ip] if you need a fixed IP address that won't change — for example, if you need to create an A record in DNS, or need the IP address to be added to a safe list. +* You can also create a fully qualified domain name (FQDN) for the IP address. You can then register a [CNAME record][cname-record] in DNS that points to the FQDN. For more information, see [create a fully qualified domain name in the Azure portal][fqdn]. + +All NSGs contain a set of [default rules][nsg-default-rules], including a rule that blocks all inbound Internet traffic. The default rules cannot be deleted, but other rules can override them. To enable Internet traffic, create rules that allow inbound traffic to specific ports — for example, port 80 for HTTP. + +To enable SSH, add a rule to the NSG that allows inbound traffic to TCP port 22. + +## Scalability considerations + +To scale up or down, [change the VM size][vm-resize]. + +To scale out horizontally, put two or more VMs into an availability set behind a load balancer. For details, see [running multiple VMs on Azure][multi-vm]. + +## Availability considerations + +For higher availabiility, deploy multiple VMs in an availability set. This also provides a higher [service level agreement][vm-sla] (SLA). + +Your VM may be affected by [planned maintenance][planned-maintenance] or [unplanned maintenance][manage-vm-availability]. You can use [VM reboot logs][reboot-logs] to determine whether a VM reboot was caused by planned maintenance. + +VHDs are stored in [Azure storage][azure-storage], and Azure storage is replicated for durability and availability. + +To protect against accidental data loss during normal operations (for example, because of user error), you should also implement point-in-time backups, using [blob snapshots][blob-snapshot] or another tool. + +## Manageability considerations + +**Resource groups.** Put tightly coupled resources that share the same life cycle into the same [resource group][resource-manager-overview]. Resource groups allow you to deploy and monitor resources as a group, and roll up billing costs by resource group. You can also delete resources as a set, which is very useful for test deployments. Give resources meaningful names. That makes it easier to locate a specific resource and understand its role. See [Recommended Naming Conventions for Azure Resources][naming conventions]. + +**SSH**. Before you create a Linux VM, generate a 2048-bit RSA public-private key pair. Use the public key file when you create the VM. For more information, see [How to Use SSH with Linux and Mac on Azure][ssh-linux]. + +**VM diagnostics.** Enable monitoring and diagnostics, including basic health metrics, diagnostics infrastructure logs, and [boot diagnostics][boot-diagnostics]. Boot diagnostics can help you diagnose boot failure if your VM gets into a nonbootable state. For more information, see [Enable monitoring and diagnostics][enable-monitoring]. + +The following CLI command enables diagnostics: + +``` +azure vm enable-diag +``` + +**Stopping a VM.** Azure makes a distinction between "stopped" and "deallocated" states. You are charged when the VM status is stopped, but not when the VM is deallocated. + +Use the following CLI command to deallocate a VM: + +``` +azure vm deallocate +``` + +In the Azure portal, the **Stop** button deallocates the VM. However, if you shut down through the OS while logged in, the VM is stopped but *not* deallocated, so you will still be charged. + +**Deleting a VM.** If you delete a VM, the VHDs are not deleted. That means you can safely delete the VM without losing data. However, you will still be charged for storage. To delete the VHD, delete the file from [Blob storage][blob-storage]. + +To prevent accidental deletion, use a [resource lock][resource-lock] to lock the entire resource group or lock individual resources, such as the VM. + +## Security considerations + +Automate OS updates by using the [OSPatching] VM extension. Install this extension when you provision the VM. You can specify how often to install patches and whether to reboot after patching. + +Use [role-based access control][rbac] (RBAC) to control access to the Azure resources that you deploy. RBAC lets you assign authorization roles to members of your DevOps team. For example, the Reader role can view Azure resources but not create, manage, or delete them. Some roles are specific to particular Azure resource types. For example, the Virtual Machine Contributor role can restart or deallocate a VM, reset the administrator password, create a VM, and so forth. Other [built-in RBAC roles][rbac-roles] that might be useful for this architecture include [DevTest Labs User][rbac-devtest] and [Network Contributor][rbac-network]. + +A user can be assigned to multiple roles, and you can create custom roles for even more fine-grained permissions. + +> [!NOTE] +> RBAC does not limit the actions that a user logged into a VM can perform. Those permissions are determined by the account type on the guest OS. +> +> + +Use [audit logs][audit-logs] to see provisioning actions and other VM events. + +Consider [Azure Disk Encryption][disk-encryption] if you need to encrypt the OS and data disks. + +## Deploy the solution + +A deployment for this architecture is available on [GitHub][github-folder]. It includes a VNet, NSG, and a single VM. To deploy the architecture, follow these steps: + +1. Right click the button below and select either "Open link in new tab" or "Open link in new window." + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-compute-single-vm%2Fazuredeploy.json) +2. Once the link has opened in the Azure portal, you must enter values for some of the settings: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-single-vm-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Select **linux** in the **Os Type** drop down box. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click on the **Purchase** button. +3. Wait for the deployment to complete. +4. The parameter files include a hard-coded administrator user name and password, and it is strongly recommended that you immediately change both. Click on the VM named `ra-single-vm0 `in the Azure portal. Then, click on **Reset password** in the **Support + troubleshooting** section. Select **Reset password** in the **Mode** dropdown box, then select a new **User name** and **Password**. Click the **Update** button to persist the new user name and password. + + + +[multi-vm]: ../virtual-machines-windows/multi-vm.md?toc=%2fazure%2farchitecture%24virtual-machines-linux%2f/toc.json +[naming conventions]: ../../best-practices/naming-conventions.md +[audit-logs]: https://azure.microsoft.com/blog/analyze-azure-audit-logs-in-powerbi-more/ +[availability-set]: /azure/virtual-machines/virtual-machines-windows-create-availability-set +[azure-cli]: /azure/virtual-machines-command-line-tools +[azure-linux]: /azure/virtual-machines/virtual-machines-linux-azure-overview +[azure-storage]: /azure/storage/storage-introduction +[blob-snapshot]: /azure/storage/storage-blob-snapshots +[blob-storage]: /azure/storage/storage-introduction +[boot-diagnostics]: https://azure.microsoft.com/blog/boot-diagnostics-for-virtual-machines-v2/ +[cname-record]: https://en.wikipedia.org/wiki/CNAME_record +[data-disk]: /azure/virtual-machines/virtual-machines-linux-about-disks-vhds +[disk-encryption]: /azure/security/azure-security-disk-encryption +[enable-monitoring]: /azure/monitoring-and-diagnostics/insights-how-to-use-diagnostics +[fqdn]: /azure/virtual-machines/virtual-machines-linux-portal-create-fqdn +[github-folder]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-single-vm/ +[iostat]: https://en.wikipedia.org/wiki/Iostat +[manage-vm-availability]: /azure/virtual-machines/virtual-machines-linux-manage-availability +[nsg]: /azure/virtual-network/virtual-networks-nsg +[nsg-default-rules]: /azure/virtual-network/virtual-networks-nsg#default-rules +[OSPatching]: https://github.com/Azure/azure-linux-extensions/tree/master/OSPatching +[planned-maintenance]: /azure/virtual-machines/virtual-machines-linux-planned-maintenance +[premium-storage]: /azure/storage/storage-premium-storage +[rbac]: /azure/active-directory/role-based-access-control-what-is +[rbac-roles]: /azure/active-directory/role-based-access-built-in-roles +[rbac-devtest]: /azure/active-directory/role-based-access-built-in-roles#devtest-labs-user +[rbac-network]: /azure/active-directory/role-based-access-built-in-roles#network-contributor +[reboot-logs]: https://azure.microsoft.com/blog/viewing-vm-reboot-logs/ +[Resize-VHD]: https://technet.microsoft.com/library/hh848535.aspx +[Resize virtual machines]: https://azure.microsoft.com/blog/resize-virtual-machines/ +[resource-lock]: /azure/resource-group-lock-resources +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[select-vm-image]: /azure/virtual-machines/virtual-machines-linux-cli-ps-findimage +[services-by-region]: https://azure.microsoft.com/regions/#services +[ssh-linux]: /azure/virtual-machines/virtual-machines-linux-mac-create-ssh-keys +[static-ip]: /azure/virtual-network/virtual-networks-reserved-public-ip +[storage-account-limits]: /azure/azure-subscription-service-limits#storage-limits +[storage-price]: https://azure.microsoft.com/pricing/details/storage/ +[virtual-machine-sizes]: /azure/virtual-machines/virtual-machines-linux-sizes +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vm-disk-limits]: /azure/azure-subscription-service-limits#virtual-machine-disk-limits +[vm-resize]: /azure/virtual-machines/virtual-machines-linux-change-vm-size +[vm-size-tables]: /azure/virtual-machines/virtual-machines-windows-sizes#size-tables +[vm-sla]: https://azure.microsoft.com/support/legal/sla/virtual-machines/v1_0/ +[readme]: https://github.com/mspnp/reference-architectures/blob/master/guidance-compute-single-vm +[blocks]: https://github.com/mspnp/template-building-blocks +[0]: ./images/single-vm-diagram.png "Single Linux VM architecture in Azure" + diff --git a/docs/blueprints/virtual-machines-windows/images/multi-region-application-diagram.png b/docs/blueprints/virtual-machines-windows/images/multi-region-application-diagram.png new file mode 100644 index 00000000000..88f46e16bca Binary files /dev/null and b/docs/blueprints/virtual-machines-windows/images/multi-region-application-diagram.png differ diff --git a/docs/blueprints/virtual-machines-windows/images/multi-region-application.svg b/docs/blueprints/virtual-machines-windows/images/multi-region-application.svg new file mode 100644 index 00000000000..71284318acd --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/images/multi-region-application.svg @@ -0,0 +1,1392 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + Sheet.1071 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/virtual-machines-windows/images/multi-vm-diagram.png b/docs/blueprints/virtual-machines-windows/images/multi-vm-diagram.png new file mode 100644 index 00000000000..b980eb0e0a3 Binary files /dev/null and b/docs/blueprints/virtual-machines-windows/images/multi-vm-diagram.png differ diff --git a/docs/blueprints/virtual-machines-windows/images/multi-vm.svg b/docs/blueprints/virtual-machines-windows/images/multi-vm.svg new file mode 100644 index 00000000000..6dddb9af199 --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/images/multi-vm.svg @@ -0,0 +1,465 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/virtual-machines-windows/images/n-tier-diagram.png b/docs/blueprints/virtual-machines-windows/images/n-tier-diagram.png new file mode 100644 index 00000000000..0952cf9d409 Binary files /dev/null and b/docs/blueprints/virtual-machines-windows/images/n-tier-diagram.png differ diff --git a/docs/blueprints/virtual-machines-windows/images/n-tier.svg b/docs/blueprints/virtual-machines-windows/images/n-tier.svg new file mode 100644 index 00000000000..4d0f2c0b464 --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/images/n-tier.svg @@ -0,0 +1,949 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/blueprints/virtual-machines-windows/images/single-vm-diagram.png b/docs/blueprints/virtual-machines-windows/images/single-vm-diagram.png new file mode 100644 index 00000000000..63637827dac Binary files /dev/null and b/docs/blueprints/virtual-machines-windows/images/single-vm-diagram.png differ diff --git a/docs/blueprints/virtual-machines-windows/images/single-vm.svg b/docs/blueprints/virtual-machines-windows/images/single-vm.svg new file mode 100644 index 00000000000..3bfe9edf6c7 --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/images/single-vm.svg @@ -0,0 +1,283 @@ + + + +image/svg+xmlrect4743rect5174g5181path3path5path7path9circle11g6636path6638path6640path6642path6644path6646path6648path6650path6652path6654path6656path4865-4XMLID_1_path12 \ No newline at end of file diff --git a/docs/blueprints/virtual-machines-windows/index.md b/docs/blueprints/virtual-machines-windows/index.md new file mode 100644 index 00000000000..e4ed1977191 --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/index.md @@ -0,0 +1,85 @@ +--- +title: Windows VM workloads | Architectural Blueprints +description: Explains some common architectures for deploying VMs that host enterprise-scale applications in Azure. +layout: LandingPage +pnp.series.title: Windows VM workloads +pnp.series.next: single-vm +--- + + +# Series overview +[!INCLUDE [header](../../_includes/header.md)] + +Running a virtual machine (VM) in Azure involves more moving parts than just the VM itself. Other considerations include networking, load balancers, network security groups (NSGs), and redundancy within a region or across multiple regions. + + + diff --git a/docs/blueprints/virtual-machines-windows/multi-region-application.md b/docs/blueprints/virtual-machines-windows/multi-region-application.md new file mode 100644 index 00000000000..d21368e80f9 --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/multi-region-application.md @@ -0,0 +1,198 @@ +--- +title: Run Windows VMs in multiple Azure regions for high availability +description: >- + How to deploy VMs in multiple regions on Azure for high availability and + resiliency. +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Windows VM workloads +pnp.series.prev: n-tier +ms.assetid: e711c233-81eb-4813-8c61-ff685bd9e5c7 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/22/2016 +ms.author: mwasson +cardTitle: Multi-region application +--- +# Run Windows VMs in multiple regions for high availability + +This reference architecture shows a set of proven practices for running an N-tier application in multiple Azure regions, in order to achieve availability and a robust disaster recovery infrastructure. + +[![0]][0] + +## Architecture + +This architecture builds on the one shown in [Run Windows VMs for an N-tier application](n-tier.md). + +* **Primary and secondary regions**. Use two regions to achieve higher availability. One is the primary region. The other region is for failover. +* **Azure Traffic Manager**. [Traffic Manager][traffic-manager] routes incoming requests to one of the regions. During normal operations, it routes requests to the primary region. If that region becomes unavailable, Traffic Manager fails over to the secondary region. For more information, see the section [Traffic Manager configuration](#traffic-manager-configuration). +* **Resource groups**. Create separate [resource groups][resource groups] for the primary region, the secondary region, and for Traffic Manager. This gives you the flexibility to manage each region as a single collection of resources. For example, you could redeploy one region, without taking down the other one. [Link the resource groups][resource-group-links], so that you can run a query to list all the resources for the application. +* **VNets**. Create a separate VNet for each region. Make sure the address spaces do not overlap. +* **SQL Server Always On Availability Group**. If you are using SQL Server, we recommend [SQL Always On Availability Groups][sql-always-on] for high availability. Create a single availability group that includes the SQL Server instances in both regions. + + > [!NOTE] + > Also consider [Azure SQL Database][azure-sql-db], which provides a relational database as a cloud service. With SQL Database, you don't need to configure an availability group or manage failover. + > + > + +* **VPN Gateways**. Create a [VPN gateway][vpn-gateway] in each VNet, and configure a [VNet-to-VNet connection][vnet-to-vnet], to enable network traffic between the two VNets. This is required for the SQL Always On Availability Group. + +## Recommendations + +A multi-region architecture can provide higher availability than deploying to a single region. If a regional outage affects the primary region, you can use [Traffic Manager][traffic-manager] to fail over to the secondary region. This architecture can also help if an individual subsystem of the application fails. + +There are several general approaches to achieving high availability across regions: + +* Active/passive with hot standby. Traffic goes to one region, while the other waits on hot standby. Hot standby means the VMs in the secondary region are allocated and running at all times. +* Active/passive with cold standby. Traffic goes to one region, while the other waits on cold standby. Cold standby means the VMs in the secondary region are not allocated until needed for failover. This approach costs less to run, but will generally take longer to come online during a failure. +* Active/active. Both regions are active, and requests are load balanced between them. If one region becomes unavailable, it is taken out of rotation. + +This reference architecture focuses on active/passive with hot standby, using Traffic Manager for failover. Note that you could deploy a small number of VMs for hot standby and then scale out as needed. + +### Regional pairing + +Each Azure region is paired with another region within the same geography. In general, choose regions from the same regional pair (for example, East US 2 and US Central). Benefits of doing so include: + +* If there is a broad outage, recovery of at least one region out of every pair is prioritized. +* Planned Azure system updates are rolled out to paired regions sequentially, to minimize possible downtime. +* Pairs reside within the same geography, to meet data residency requirements. + +However, make sure that both regions support all of the Azure services needed for your application (see [Services by region][services-by-region]). For more information about regional pairs, see [Business continuity and disaster recovery (BCDR): Azure Paired Regions][regional-pairs]. + +### Traffic Manager configuration + +Consider the following points when configuring Traffic Manager: + +* **Routing**. Traffic Manager supports several [routing algorithms][tm-routing]. For the scenario described in this article, use *priority* routing (formerly called *failover* routing). With this setting, Traffic Manager sends all requests to the primary region, unless the primary region becomes unreachable. At that point, it automatically fails over to the secondary region. See [Configure Failover routing method][tm-configure-failover]. +* **Health probe**. Traffic Manager uses an HTTP (or HTTPS) [probe][tm-monitoring] to monitor the availability of each region. The probe checks for an HTTP 200 response for a specified URL path. As a best practice, create an endpoint that reports the overall health of the application, and use this endpoint for the health probe. Otherwise, the probe might report a healthy endpoint when critical parts of the application are actually failing. For more information, see [Health Endpoint Monitoring Pattern][health-endpoint-monitoring-pattern]. + +When Traffic Manager fails over there is a period of time when clients cannot reach the application. The duration is affected by the following factors: + +* The health probe must detect that the primary region has become unreachable. +* DNS servers must update the cached DNS records for the IP address, which depends on the DNS time-to-live (TTL). The default TTL is 300 seconds (5 minutes), but you can configure this value when you create the Traffic Manager profile. + +For details, see [About Traffic Manager Monitoring][tm-monitoring]. + +If Traffic Manager fails over, we recommend performing a manual failback rather than implementing an automatic failback. Otherwise, you can create a situation where the application flips back and forth between regions. Verify that all application subsystems are healthy before failing back. + +Note that Traffic Manager automatically fails back by default. To prevent this, manually lower the priority of the primary region after a failover event. For example, suppose the primary region is priority 1 and the secondary is priority 2. After a failover, set the primary region to priority 3, to prevent automatic failback. When you are ready to switch back, update the priority to 1. + +The following [Azure CLI][install-azure-cli] command updates the priority: + +```bat +azure network traffic-manager endpoint set --resource-group --profile-name + --name --type AzureEndpoints --priority 3 +``` + +Another approach is to temporarily disable the endpoint until you are ready to fail back: + +```bat +azure network traffic-manager endpoint set --resource-group --profile-name + --name --type AzureEndpoints --status Disabled +``` + +Depending on the cause of a failover, you might need to redeploy the resources within a region. Before failing back, perform an operational readiness test. The test should verify things like: + +* VMs are configured correctly. (All required software is installed, IIS is running, and so on.) +* Application subsystems are healthy. +* Functional testing. (For example, the database tier is reachable from the web tier.) + +### SQL Server Always On configuration + +Prior to Windows Server 2016, SQL Server Always On Availability Groups require a domain controller, and all nodes in the availability group must be in the same Active Directory (AD) domain. + +To configure the availability group: + +* At a minimum, place two domain controllers in each region. +* Give each domain controller a static IP address. +* Create a VNet-to-VNet connection to enable communication between the VNets. +* For each VNet, add the IP addresses of the domain controllers (from both regions) to the DNS server list. You can use the following CLI command. For more information, see [Manage DNS servers used by a virtual network (VNet)][vnet-dns]. + + ```bat + azure network vnet set --resource-group dc01-rg --name dc01-vnet --dns-servers "10.0.0.4,10.0.0.6,172.16.0.4,172.16.0.6" + ``` + +* Create a [Windows Server Failover Clustering][wsfc] (WSFC) cluster that includes the SQL Server instances in both regions. +* Create a SQL Server Always On Availability Group that includes the SQL Server instances in both the primary and secondary regions. See [Extending Always On Availability Group to Remote Azure Datacenter (PowerShell)](https://blogs.msdn.microsoft.com/sqlcat/2014/09/22/extending-alwayson-availability-group-to-remote-azure-datacenter-powershell/) for the steps. + + * Put the primary replica in the primary region. + * Put one or more secondary replicas in the primary region. Configure these to use synchronous commit with automatic failover. + * Put one or more secondary replicas in the secondary region. Configure these to use *asynchronous* commit, for performance reasons. (Otherwise, all SQL transactions have to wait on a round trip over the network to the secondary region.) + + > [!NOTE] + > Asynchronous commit replicas do not support automatic failover. + > + > + +For more information, see [Running Windows VMs for an N-tier architecture on Azure](n-tier.md). + +## Availability considerations + +With a complex N-tier app, you may not need to replicate the entire application in the secondary region. Instead, you might just replicate a critical subsystem that is needed to support business continuity. + +Traffic Manager is a possible failure point in the system. If the Traffic Manager service fails, clients cannot access your application during the downtime. Review the [Traffic Manager SLA][tm-sla], and determine whether using Traffic Manager alone meets your business requirements for high availability. If not, consider adding another traffic management solution as a failback. If the Azure Traffic Manager service fails, change your CNAME records in DNS to point to the other traffic management service. (This step must be performed manually, and your application will be unavailable until the DNS changes are propagated.) + +For the SQL Server cluster, there are two failover scenarios to consider: + +- All of the SQL replicas in the primary region fail. For example, this could happen during a regional outage. In that case, you must manually fail over the SQL availability group, even though Traffic Manager automatically fails over on the front end. Follow the steps in [Perform a Forced Manual Failover of a SQL Server Availability Group](https://msdn.microsoft.com/library/ff877957.aspx), which describes how to perform a forced failover by using SQL Server Management Studio, Transact-SQL, or PowerShell in SQL Server 2016. + + > [!WARNING] + > With forced failover, there is a risk of data loss. Once the primary region is back online, take a snapshot of the database and use [tablediff] to find the differences. + > + > +- Traffic Manager fails over to the secondary region, but the primary SQL replica is still available. For example, the front-end tier might fail, without affecting the SQL VMs. In that case, Internet traffic is routed to the secondary region, and that region can still connect to the primary SQL replica. However, there will be increased latency, because the SQL connections are going across regions. In this situation, you should perform a manual failover as follows: + + 1. Temporarily switch a SQL replica in the secondary region to *synchronous* commit. This ensures there won't be data loss during the failover. + 2. Fail over to that SQL replica. + 3. When you fail back to the primary region, restore the asynchronous commit setting. + +## Manageability considerations + +When you update your deployment, update one region at a time to reduce the chance of a global failure from an incorrect configuration or an error in the application. + +Test the resiliency of the system to failures. Here are some common failure scenarios to test: + +* Shut down VM instances. +* Pressure resources such as CPU and memory. +* Disconnect/delay network. +* Crash processes. +* Expire certificates. +* Simulate hardware faults. +* Shut down the DNS service on the domain controllers. + +Measure the recovery times and verify they meet your business requirements. Test combinations of failure modes, as well. + + + + +[hybrid-vpn]: ../hybrid-networking/vpn.md + +[azure-sla]: https://azure.microsoft.com/support/legal/sla/ +[azure-sql-db]: https://azure.microsoft.com/documentation/services/sql-database/ +[health-endpoint-monitoring-pattern]: https://msdn.microsoft.com/library/dn589789.aspx +[install-azure-cli]: /azure/xplat-cli-install +[regional-pairs]: /azure/best-practices-availability-paired-regions +[resource groups]: /azure/azure-resource-manager/resource-group-overview +[resource-group-links]: /azure/resource-group-link-resources +[services-by-region]: https://azure.microsoft.com/regions/#services +[sql-always-on]: https://msdn.microsoft.com/library/hh510230.aspx +[tablediff]: https://msdn.microsoft.com/library/ms162843.aspx +[tm-configure-failover]: /azure/traffic-manager/traffic-manager-configure-failover-routing-method +[tm-monitoring]: /azure/traffic-manager/traffic-manager-monitoring +[tm-routing]: /azure/traffic-manager/traffic-manager-routing-methods +[tm-sla]: https://azure.microsoft.com/support/legal/sla/traffic-manager/v1_0/ +[traffic-manager]: https://azure.microsoft.com/services/traffic-manager/ +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vnet-dns]: /azure/virtual-network/virtual-networks-manage-dns-in-vnet +[vnet-to-vnet]: /azure/vpn-gateway/vpn-gateway-vnet-vnet-rm-ps +[vpn-gateway]: /azure/vpn-gateway/vpn-gateway-about-vpngateways +[wsfc]: https://msdn.microsoft.com/library/hh270278.aspx + +[0]: ./images/multi-region-application-diagram.png "Highly available network architecture for Azure N-tier applications" diff --git a/docs/blueprints/virtual-machines-windows/multi-vm.md b/docs/blueprints/virtual-machines-windows/multi-vm.md new file mode 100644 index 00000000000..dced70ec819 --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/multi-vm.md @@ -0,0 +1,200 @@ +--- +title: Run load-balanced VMs on Azure for scalability and availability +description: >- + How to run multiple Windows VMs on Azure for scalability and availability. +services: '' +documentationcenter: na +author: MikeWasson +manager: christb +editor: '' +tags: '' +pnp.series.title: Windows VM workloads +pnp.series.next: n-tier +pnp.series.prev: single-vm +pnp.series.github: >- + https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-multi-vm +ms.assetid: 6fa4764e-4d97-4b96-ac25-8adf602baf27 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/22/2016 +ms.author: mwasson +cardTitle: Load balanced VMs +--- +# Run load-balanced VMs for scalability and availability + +This reference architecture shows a set of proven practices for running several Windows virtual machines (VMs) behind a load balancer, to improve availability and scalability. This architecture can be used for any stateless workload, such as a web server, and is a building block for deploying N-tier applications. [**Deploy this solution**.](#deploy-the-solution) + +![[0]][0] + +## Architecture + +This architecture builds on the one shown in [Run a Windows VM on Azure][single vm]. The recommendations there also apply to this architecture. + +In this architecture, a workload is distributed across several VM instances. There is a single public IP address, and Internet traffic is distributed to the VMs using a load balancer. This architecture can be used for a single-tier application, such as a stateless web application or storage cluster. It's also a building block for N-tier applications. + +The architecture has the following components: + +* **Availability set**. The [availability set][availability set] contains the VMs. This makes the VMs eligible for the [availability service level agreement (SLA) for Azure VMs][vm-sla]. For the SLA to apply, you need a minimum of two VMs in the same availability set. +* **Virtual network (VNet) and subnet.** Every VM in Azure is deployed into a VNet that is further divided into subnets. +* **Azure Load Balancer**. The [load balancer] distributes incoming Internet requests to the VM instances. The load balancer includes some related resources: + * **Public IP address**. A public IP address is needed for the load balancer to receive Internet traffic. + * **Front-end configuration**. Associates the public IP address with the load balancer. + * **Back-end address pool**. Contains the network interfaces (NICs) for the VMs that will receive the incoming traffic. +* **Load balancer rules**. Used to distribute network traffic among all the VMs in the back-end address pool. +* **Network address translation (NAT) rules**. Used to route traffic to a specific VM. For example, to enable remote desktop protocol (RDP) to the VMs, create a separate NAT rule for each VM. +* **Network interfaces (NICs)**. Each VM has a NIC to connect to the network. +* **Storage**. Storage accounts hold the VM images and other file-related resources, such as VM diagnostic data captured by Azure. + + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This article uses Resource Manager, which Microsoft recommends for new deployments. +> +> + + +## Recommendations + +Your requirements might differ from the architecture described here. Use these recommendations as a starting point. + + +### Availability set recommendations + +Create at least two VMs in the availability set, to support the [availability SLA for Azure VMs][vm-sla]. The Azure load balancer also requires that load-balanced VMs belong to the same availability set. + +Each Azure subscription has default limits in place, including a maximum number of VMs per region. You can increase the limit by filing a support request. For more information, see [Azure subscription and service limits, quotas, and constraints][subscription-limits]. + +### Network recommendations + +Place the VMs within the same subnet. Do not expose the VMs directly to the Internet, but instead give each VM a private IP address. Clients connect using the public IP address of the load balancer. + +### Load balancer recommendations + +Add all VMs in the availability set to the back-end address pool of the load balancer. + +Define load balancer rules to direct network traffic to the VMs. For example, to enable HTTP traffic, create a rule that maps port 80 from the front-end configuration to port 80 on the back-end address pool. When a client sends an HTTP request to port 80, the load balancer selects a back-end IP address by using a [hashing algorithm][load balancer hashing] that includes the source IP address. In that way, client requests are distributed across all the VMs. + +To route traffic to a specific VM, use NAT rules. For example, to enable RDP to the VMs, create a separate NAT rule for each VM. Each rule should map a distinct port number to port 3389, the default port for RDP. For example, use port 50001 for "VM1," port 50002 for "VM2," and so on. Assign the NAT rules to the NICs on the VMs. + +### Storage account recommendations + +Create separate Azure storage accounts for each VM to hold the virtual hard disks (VHDs), in order to avoid hitting the input/output operations per second [(IOPS) limits][vm-disk-limits] for storage accounts. + +Create one storage account for diagnostic logs. This storage account can be shared by all the VMs. + +## Scalability considerations + +To scale out, provision additional VMs and put them in the load balancer's back-end address pool. + +> [!TIP] +> When you add a new VM to an availability set, make sure to create a NIC for the VM, and add the NIC to the back-end address pool on the load balancer. Otherwise, Internet traffic won't be routed to the new VM. +> +> + +### VM scale sets + +Another option for scaling is to use a [virtual machine scale set][vmss]. VM scale sets help you to deploy and manage a set of identical VMs. Scale sets support autoscaling based on performance metrics. As the load on the VMs increases, additional VMs are automatically added to the load balancer. Consider scale sets if you need to quickly scale out VMs, or need to autoscale. + +Currently, scale sets do not support data disks. The options for storing data are Azure File storage, the OS drive, the temp drive, or an external store such as Azure Storage. + +By default, scale sets use "overprovisioning," which means the scale set initially provisions more VMs than you ask for, then deletes the extra VMs. This improves the overall success rate when provisioning the VMs. We recommend no more than 20 VMs per storage account with overprovisioning enabled, or no more than 40 VMs with overprovisioning disabled. + +There are two basic ways to configure VMs deployed in a scale set: + +- Use extensions to configure the VM after it is provisioned. With this approach, new VM instances may take longer to start up than a VM with no extensions. + +- Create a custom image. This option may be quicker to deploy. However, it requires you to keep the image up to date. A scale set built on a custom image must create all OS disk VHDs within one storage account. + +For additional considerations, see [Designing VM Scale Sets For Scale][vmss-design]. + +> [!TIP] +> When using any autoscale solution, test it with production-level work loads well in advance. +> +> + +## Availability considerations + +The availability set makes your application more resilient to both planned and unplanned maintenance events. + +* *Planned maintenance* occurs when Microsoft updates the underlying platform, sometimes causing VMs to be restarted. Azure makes sure the VMs in an availability set are not all restarted at the same time. At least one is kept running while others are restarting. +* *Unplanned maintenance* happens if there is a hardware failure. Azure makes sure that VMs in an availability set are provisioned across more than one server rack. This helps to reduce the impact of hardware failures, network outages, power interruptions, and so on. + +For more information, see [Manage the availability of virtual machines][availability set]. The following video also has a good overview of availability sets: [How Do I Configure an Availability Set to Scale VMs][availability set ch9]. + +> [!WARNING] +> Make sure to configure the availability set when you provision the VM. Currently, there is no way to add a Resource Manager VM to an availability set after the VM is provisioned. +> +> + +The load balancer uses [health probes] to monitor the availability of VM instances. If a probe cannot reach an instance within a timeout period, the load balancer stops sending traffic to that VM. However, the load balancer will continue to probe, and if the VM becomes available again, the load balancer resumes sending traffic to that VM. + +Here are some recommendations on load balancer health probes: + +* Probes can test either HTTP or TCP. If your VMs run an HTTP server, create an HTTP probe. Otherwise create a TCP probe. +* For an HTTP probe, specify the path to an HTTP endpoint. The probe checks for an HTTP 200 response from this path. This can be the root path ("/"), or a health-monitoring endpoint that implements some custom logic to check the health of the application. The endpoint must allow anonymous HTTP requests. +* The probe is sent from a [known][health-probe-ip] IP address, 168.63.129.16. Make sure you don't block traffic to or from this IP in any firewall policies or network security group (NSG) rules. +* Use [health probe logs][health probe log] to view the status of the health probes. Enable logging in the Azure portal for each load balancer. Logs are written to Azure Blob storage. The logs show how many VMs on the back end are not receiving network traffic due to failed probe responses. + +## Manageability considerations + +With multiple VMs, it is important to automate processes so they are reliable and repeatable. You can use [Azure Automation][azure-automation] to automate deployment, OS patching, and other tasks. [Azure Automation][azure-automation] is an automation service based on Windows Powershell that can be used for this. Example automation scripts are available from the [Runbook Gallery] on TechNet. + +## Security considerations + +Virtual networks are a traffic isolation boundary in Azure. VMs in one VNet cannot communicate directly to VMs in a different VNet. VMs within the same VNet can communicate, unless you create [network security groups][nsg] (NSGs) to restrict traffic. For more information, see [Microsoft cloud services and network security][network-security]. + +For incoming Internet traffic, the load balancer rules define which traffic can reach the back end. However, load balancer rules don't support IP safe lists, so if you want to add certain public IP addresses to a safe list, add an NSG to the subnet. + +## Deploy the solution + +A deployment for this architecture is available on [GitHub][github-folder]. It includes a VNet, NSG, load balancer, and two VMs. It can be deployed with either Windows or Linux VMs. To deploy the architecture, follow these steps: + +1. Right-click the button below and select either "Open link in new tab" or "Open link in new window": + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-compute-multi-vm%2Fazuredeploy.json) +2. Once the link has opened in the Azure portal, you must enter values for some of the settings: + + * The **Resource group** name is already defined in the parameter file, so select **Create new** and enter `ra-multi-vm-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Select either **windows** or **linux** in the **Os Type** drop down box. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click the **Purchase** button. +3. Wait for the deployment to complete. +4. The parameter files include a hard-coded administrator user name and password, and it is strongly recommended that you immediately change both. Click the VM named `ra-multi-vm1` in the Azure portal. Then, click **Reset password** in the **Support + troubleshooting** blade. Select **Reset password** in the **Mode** dropdown box, then select a new **User name** and **Password**. Click the **Update** button to save the new user name and password. Repeat for the VM named `ra-multi-vm2`. + + + + +[n-tier-linux]: ../virtual-machines-linux/n-tier.md +[n-tier-windows]: n-tier.md +[single vm]: single-vm.md + +[naming conventions]: /azure/guidance/guidance-naming-conventions + +[availability set]: /azure/virtual-machines/virtual-machines-windows-manage-availability +[availability set ch9]: https://channel9.msdn.com/Series/Microsoft-Azure-Fundamentals-Virtual-Machines/08 +[azure-automation]: https://azure.microsoft.com/documentation/services/automation/ +[azure-cli]: /azure/virtual-machines-command-line-tools +[azure-automation]: /azure/automation/automation-intro +[bastion host]: https://en.wikipedia.org/wiki/Bastion_host +[github-folder]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-multi-vm +[health probe log]: /azure/load-balancer/load-balancer-monitor-log +[health probes]: /azure/load-balancer/load-balancer-overview#load-balancer-features +[health-probe-ip]: /azure/virtual-network/virtual-networks-nsg#special-rules +[load balancer]: /azure/load-balancer/load-balancer-get-started-internet-arm-cli +[load balancer hashing]: /azure/load-balancer/load-balancer-overview#load-balancer-features +[network-security]: /azure/best-practices-network-security +[nsg]: /azure/virtual-network/virtual-networks-nsg +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[Runbook Gallery]: /azure/automation/automation-runbook-gallery#runbooks-in-runbook-gallery +[subscription-limits]: /azure/azure-subscription-service-limits +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vm-disk-limits]: /azure/azure-subscription-service-limits#virtual-machine-disk-limits +[vm-sla]: https://azure.microsoft.com/support/legal/sla/virtual-machines/v1_2/ +[vmss]: /azure/virtual-machine-scale-sets/virtual-machine-scale-sets-overview +[vmss-design]: /azure/virtual-machine-scale-sets/virtual-machine-scale-sets-design-overview +[vmss-quickstart]: https://azure.microsoft.com/documentation/templates/?term=scale+set +[VM-sizes]: https://azure.microsoft.com/documentation/articles/virtual-machines-windows-sizes/ +[0]: ./images/multi-vm-diagram.png "Architecture of a multi-VM solution on Azure comprising an availability set with two VMs and a load balancer" \ No newline at end of file diff --git a/docs/blueprints/virtual-machines-windows/n-tier.md b/docs/blueprints/virtual-machines-windows/n-tier.md new file mode 100644 index 00000000000..cbc84e1eec7 --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/n-tier.md @@ -0,0 +1,244 @@ +--- +title: Running Windows VMs for an N-tier architecture +description: >- + How to implement a multi-tier architecture on Azure, paying particular + attention to availability, security, scalability, and manageability security. +services: '' +documentationcenter: na +author: MikeWasson +manager: christb +editor: '' +tags: '' +pnp.series.title: Windows VM workloads +pnp.series.next: multi-region-application +pnp.series.prev: multi-vm +pnp.series.github: >- + https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier-sql +ms.assetid: 79046c65-b024-4a24-ad56-66ed8b1b7800 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/22/2016 +ms.author: mwasson +cardTitle: N-tier application +--- +# Run Windows VMs for an N-tier application + +This reference architecture shows a set of proven practices for running Linux virtual machines (VMs) for an N-tier application. [**Deploy this solution**.](#deploy-the-solution) + +![[0]][0] + +## Architecture + +There are many ways to implement an N-tier architecture. The diagram shows a typical 3-tier web application. This architecture builds on [Run load-balanced VMs for scalability and availability][multi-vm]. The web and business tiers use load-balanced VMs. + +* **Availability sets.** Create an [availability set][azure-availability-sets] for each tier, and provision at least two VMs in each tier. This makes the VMs eligible for a higher [service level agreement (SLA)][vm-sla] for VMs. +* **Subnets.** Create a separate subnet for each tier. Specify the address range and subnet mask using [CIDR] notation. +* **Load balancers.** Use an [Internet-facing load balancer][load-balancer-external] to distribute incoming Internet traffic to the web tier, and an [internal load balancer][load-balancer-internal] to distribute network traffic from the web tier to the business tier. +* **Jumpbox.** Also called a [bastion host]. A secure VM on the network that administrators use to connect to the other VMs. The jumpbox has an NSG that allows remote traffic only from public IP addresses on a safe list. The NSG should permit remote desktop (RDP) traffic. +* **Monitoring.** Monitoring software such as [Nagios], [Zabbix], or [Icinga] can give you insight into response time, VM uptime, and the overall health of your system. Install the monitoring software on a VM that's placed in a separate management subnet. +* **NSGs.** Use [network security groups][nsg] (NSGs) to restrict network traffic within the VNet. For example, in the 3-tier architecture shown here, the database tier does not accept traffic from the web front end, only from the business tier and the management subnet. +* **SQL Server Always On Availability Group.** Provides high availability at the data tier, by enabling replication and failover. +* **Active Directory Domain Services (AD DS) Servers**. Prior to Windows Server 2016, SQL Server Always On Availability Groups must be joined to a domain. This is because Availability Groups depend on Windows Server Failover Cluster (WSFC) technology. Windows Server 2016 introduces the ability to create a Failover Cluster without Active Directory, in which case the AD DS servers are not required for this architecture. For more information, see [What's new in Failover Clustering in Windows Server 2016][wsfc-whats-new]. + +> [!NOTE] +> Azure has two different deployment models: [Resource Manager][resource-manager-overview] and classic. This article uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +## Recommendations + +Your requirements might differ from the architecture described here. Use these recommendations as a starting point. + +### VNet / Subnets + +When you create the VNet, determine how many IP addresses your resources in each subnet require. Specify a subnet mask and a VNet address range large enough for the required IP addresses, using [CIDR] notation. Use an address space that falls within the standard [private IP address blocks][private-ip-space], which are 10.0.0.0/8, 172.16.0.0/12, and 192.168.0.0/16. + +Choose an address range that does not overlap with your on-premises network, in case you need to set up a gateway between the VNet and your on-premise network later. Once you create the VNet, you can't change the address range. + +Design subnets with functionality and security requirements in mind. All VMs within the same tier or role should go into the same subnet, which can be a security boundary. For more information about designing VNets and subnets, see [Plan and design Azure Virtual Networks][plan-network]. + +For each subnet, specify the address space for the subnet in CIDR notation. For example, '10.0.0.0/24' creates a range of 256 IP addresses. VMs can use 251 of these; five are reserved. Make sure the address ranges don't overlap across subnets. See the [Virtual Network FAQ][vnet faq]. + +### Network security groups + +Use NSG rules to restrict traffic between tiers. For example, in the 3-tier architecture shown above, the web tier does not communicate directly with the database tier. To enforce this, the database tier should block incoming traffic from the web tier subnet. + +1. Create an NSG and associate it to the database tier subnet. +2. Add a rule that denies all inbound traffic from the VNet. (Use the `VIRTUAL_NETWORK` tag in the rule.) +3. Add a rule with a higher priority that allows inbound traffic from the business tier subnet. This rule overrides the previous rule, and allows the business tier to talk to the database tier. +4. Add a rule that allows inbound traffic from within the database tier subnet itself. This rule allows communication between VMs in the database tier, which is needed for database replication and failover. +5. Add a rule that allows RDP traffic from the jumpbox subnet. This rule lets administrators connect to the database tier from the jumpbox. + + > [!NOTE] + > An NSG has default rules that allow any inbound traffic from within the VNet. These rules can't be deleted, but you can override them by creating higher priority rules. + > + > + +### Load balancers + +The external load balancer distributes Internet traffic to the web tier. Create a public IP address for this load balancer. See [Creating an Internet-facing load balancer][lb-external-create]. + +The internal load balancer distributes network traffic from the web tier to the business tier. To give this load balancer a private IP address, create a frontend IP configuration and associate it with the subnet for the business tier. See [Get started creating an Internal load balancer][lb-internal-create]. + +### SQL Server Always On Availability Groups + +We recommend [Always On Availability Groups][sql-alwayson] for SQL Server high availability. Prior to Windows Server 2016, Always On Availability Groups require a domain controller, and all nodes in the availability group must be in the same AD domain. + +Other tiers connect to the database through an [availability group listener][sql-alwayson-listeners]. The listener enables a SQL client to connect without knowing the name of the physical instance of SQL Server. VMs that access the database must be joined to the domain. The client (in this case, another tier) uses DNS to resolve the listener's virtual network name into IP addresses. + +Configure the SQL Server Always On Availability Group as follows: + +1. Create a Windows Server Failover Clustering (WSFC) cluster, a SQL Server Always On Availability Group, and a primary replica. For more information, see [Getting Started with Always On Availability Groups][sql-alwayson-getting-started]. +2. Create an internal load balancer with a static private IP address. +3. Create an availability group listener, and map the listener's DNS name to the IP address of an internal load balancer. +4. Create a load balancer rule for the SQL Server listening port (TCP port 1433 by default). The load balancer rule must enable *floating IP*, also called Direct Server Return. This causes the VM to reply directly to the client, which enables a direct connection to the primary replica. + + > [!NOTE] + > When floating IP is enabled, the front-end port number must be the same as the back-end port number in the load balancer rule. + > + > + +When a SQL client tries to connect, the load balancer routes the connection request to the primary replica. If there is a failover to another replica, the load balancer automatically routes subsequent requests to a new primary replica. For more information, see [Configure an ILB listener for SQL Server Always On Availability Groups][sql-alwayson-ilb]. + +During a failover, existing client connections are closed. After the failover completes, new connections will be routed to the new primary replica. + +If your application makes significantly more reads than writes, you can offload some of the read-only queries to a secondary replica. See [Using a Listener to Connect to a Read-Only Secondary Replica (Read-Only Routing)][sql-alwayson-read-only-routing]. + +Test your deployment by [forcing a manual failover][sql-alwayson-force-failover] of the availability group. + +### Jumpbox + +The jumpbox will have minimal performance requirements, so select a small VM size for the jumpbox such as Standard A1. + +Create a [public IP address] for the jumpbox. Place the jumpbox in the same VNet as the other VMs, but in a separate management subnet. + +Do not allow RDP access from the public Internet to the VMs that run the application workload. Instead, all RDP access to these VMs must come through the jumpbox. An administrator logs into the jumpbox, and then logs into the other VM from the jumpbox. The jumpbox allows RDP traffic from the Internet, but only from known, safe IP addresses. + +To secure the jumpbox, create an NSG and apply it to the jumpbox subnet. Add an NSG rule that allows RDP connections only from a safe set of public IP addresses. The NSG can be attached either to the subnet or to the jumpbox NIC. In this case, we recommend attaching it to the NIC, so RDP traffic is permitted only to the jumpbox, even if you add other VMs to the same subnet. + +Configure the NSGs for the other subnets to allow RDP traffic from the management subnet. + +## Availability considerations + +At the database tier, having multiple VMs does not automatically translate into a highly available database. For a relational database, you will typically need to use replication and failover to achieve high availability. For SQL Server, we recommend using [Always On Availability Groups][sql-alwayson]. + +If you need higher availability than the [Azure SLA for VMs][vm-sla] provides, replicate the application across two regions and use Azure Traffic Manager for failover. For more information, see [Run Windows VMs in multiple regions for high availability][multi-dc]. + +## Security considerations + +Encrypt sensitive data at rest and use [Azure Key Vault][azure-key-vault] to manage the database encryption keys. Key Vault can store encryption keys in hardware security modules (HSMs). For more information, see [Configure Azure Key Vault Integration for SQL Server on Azure VMs][sql-keyvault] It's also recommended to store application secrets, such as database connection strings, in Key Vault. + +Consider adding a network virtual appliance (NVA) to create a DMZ between the Internet and the Azure virtual network. NVA is a generic term for a virtual appliance that can perform network-related tasks, such as firewall, packet inspection, auditing, and custom routing. For more information, see [Implementing a DMZ between Azure and the Internet][dmz]. + +## Scalability considerations + +The load balancers distribute network traffic to the web and business tiers. Scale horizontally by adding new VM instances. Note that you can scale the web and business tiers independently, based on load. To reduce possible complications caused by the need to maintain client affinity, the VMs in the web tier should be stateless. The VMs hosting the business logic should also be stateless. + +## Manageability considerations + +Simplify management of the entire system by using centralized administration tools such as [Azure Automation][azure-administration], [Microsoft Operations Management Suite][operations-management-suite], [Chef][chef], or [Puppet][puppet]. These tools can consolidate diagnostic and health information captured from multiple VMs to provide an overall view of the system. + +## Deploy the solution + +A deployment for this architecture is available on [GitHub][github-folder]. The architecture is deployed in three stages. To deploy the architecture, follow these steps: + +1. Right click the button below and select "open in new tab" or "open in new window" to begin the first stage of the deployment. + [!["Deploy To Azure"][1]][2] +2. Once the link has opened in the Azure portal, enter the follow values: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-ntier-sql-network-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click on the **Purchase** button. +3. Check Azure portal notification for a message that the first stage of the deployment is complete. +4. Right click the button below and select "open in new tab" or "open in new window" to begin the second stage of the deployment. + [!["Deploy To Azure"][1]][3] +5. Once the link has opened in the Azure portal, enter the follow values: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-ntier-sql-workload-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click on the **Purchase** button. +6. Check Azure portal notification for a message that the second stage of deployment is complete. +7. Right click the button below and select "open in new tab" or "open in new window" to begin the third stage of the deployment. + [!["Deploy To Azure"][1]][4] +8. Once the link has opened in the Azure portal, enter the follow values: + + * The **Resource group** name is already defined in the parameter file, so select **Use Existing** and enter `ra-ntier-sql-network-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click on the **Purchase** button. +9. Check Azure portal notification for a message that the third stage of the deployment is complete. +10. The parameter files include a hard-coded administrator user names and passwords, and it is strongly recommended that you immediately change both on all the VMs. Click on each VM in the Azure portal then click on **Reset password** in the **Support + troubleshooting** blade. Select **Reset password** in the **Mode** dropdown box, then select a new **User name** and **Password**. Click the **Update** button to save the new user name and password. + + + +[dmz]: ../dmz/secure-vnet-dmz.md +[multi-dc]: multi-region-application.md +[multi-vm]: multi-vm.md +[n-tier]: n-tier.md + +[naming conventions]: /azure/guidance/guidance-naming-conventions + +[arm-templates]: https://azure.microsoft.com/documentation/articles/resource-group-authoring-templates/ +[azure-administration]: /azure/automation/automation-intro +[azure-audit-logs]: /azure/resource-group-audit +[azure-availability-sets]: /azure/virtual-machines/virtual-machines-windows-manage-availability#configure-each-application-tier-into-separate-availability-sets +[azure-cli]: /azure/virtual-machines-command-line-tools +[azure-key-vault]: https://azure.microsoft.com/services/key-vault +[azure-load-balancer]: /azure/load-balancer/load-balancer-overview +[bastion host]: https://en.wikipedia.org/wiki/Bastion_host +[cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing +[chef]: https://www.chef.io/solutions/azure/ +[github-folder]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier-sql +[lb-external-create]: /azure/load-balancer/load-balancer-get-started-internet-portal +[lb-internal-create]: /azure/load-balancer/load-balancer-get-started-ilb-arm-portal +[load-balancer-external]: /azure/load-balancer/load-balancer-internet-overview +[load-balancer-internal]: /azure/load-balancer/load-balancer-internal-overview +[nsg]: /azure/virtual-network/virtual-networks-nsg +[operations-management-suite]: https://www.microsoft.com/server-cloud/operations-management-suite/overview.aspx +[plan-network]: /azure/virtual-network/virtual-network-vnet-plan-design-arm +[private-ip-space]: https://en.wikipedia.org/wiki/Private_network#Private_IPv4_address_spaces +[public IP address]: /azure/virtual-network/virtual-network-ip-addresses-overview-arm +[puppet]: https://puppetlabs.com/blog/managing-azure-virtual-machines-puppet +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[sql-alwayson]: https://msdn.microsoft.com/library/hh510230.aspx +[sql-alwayson-force-failover]: https://msdn.microsoft.com/library/ff877957.aspx +[sql-alwayson-getting-started]: https://msdn.microsoft.com/library/gg509118.aspx +[sql-alwayson-ilb]: /azure/virtual-machines/windows/sql/virtual-machines-windows-portal-sql-alwayson-int-listener +[sql-alwayson-listeners]: https://msdn.microsoft.com/library/hh213417.aspx +[sql-alwayson-read-only-routing]: https://technet.microsoft.com/library/hh213417.aspx#ConnectToSecondary +[sql-keyvault]: /azure/virtual-machines/virtual-machines-windows-ps-sql-keyvault +[vm-planned-maintenance]: /azure/virtual-machines/virtual-machines-windows-planned-maintenance +[vm-sla]: https://azure.microsoft.com/support/legal/sla/virtual-machines +[vnet faq]: /azure/virtual-network/virtual-networks-faq +[wsfc-whats-new]: https://technet.microsoft.com/windows-server-docs/failover-clustering/whats-new-in-failover-clustering +[Nagios]: https://www.nagios.org/ +[Zabbix]: http://www.zabbix.com/ +[Icinga]: http://www.icinga.org/ +[VM-sizes]: https://azure.microsoft.com/documentation/articles/virtual-machines-windows-sizes/ +[solution-script]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/Deploy-ReferenceArchitecture.ps1 +[solution-script-bash]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/deploy-reference-architecture.sh +[vnet-parameters-windows]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/windows/virtualNetwork.parameters.json +[vnet-parameters-linux]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/linux/virtualNetwork.parameters.json +[nsg-parameters-windows]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/windows/networkSecurityGroups.parameters.json +[nsg-parameters-linux]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/linux/networkSecurityGroups.parameters.json +[webtier-parameters-windows]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/windows/webTier.parameters.json +[webtier-parameters-linux]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/linux/webTier.parameters.json +[businesstier-parameters-windows]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/windows/businessTier.parameters.json +[businesstier-parameters-linux]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/linux/businessTier.parameters.json +[datatier-parameters-windows]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/windows/dataTier.parameters.json +[datatier-parameters-linux]: https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-n-tier/parameters/linux/dataTier.parameters.json +[azure-powershell-download]: https://azure.microsoft.com/documentation/articles/powershell-install-configure/ +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[0]: ./images/n-tier-diagram.png "N-tier architecture using Microsoft Azure" +[1]: ../_images/blueprints/deploybutton.png +[2]: https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-compute-n-tier-sql%2FvirtualNetwork.azuredeploy.json +[3]: https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-compute-n-tier-sql%2Fworkload.azuredeploy.json +[4]: https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-compute-n-tier-sql%2Fsecurity.azuredeploy.json diff --git a/docs/blueprints/virtual-machines-windows/series.yml b/docs/blueprints/virtual-machines-windows/series.yml new file mode 100644 index 00000000000..033876c466d --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/series.yml @@ -0,0 +1,5 @@ +--- +columns: 2 +summary: Explains some common architectures for deploying VMs that host enterprise-scale applications in Azure. +description: Running a virtual machine (VM) in Azure involves more moving parts than just the VM itself. Other considerations include networking, load balancers, network security groups (NSGs), and redundancy within a region or across multiple regions. +--- \ No newline at end of file diff --git a/docs/blueprints/virtual-machines-windows/single-vm.md b/docs/blueprints/virtual-machines-windows/single-vm.md new file mode 100644 index 00000000000..63fa8383a2e --- /dev/null +++ b/docs/blueprints/virtual-machines-windows/single-vm.md @@ -0,0 +1,233 @@ +--- +title: Run a Windows VM on Azure +description: >- + How to run a VM on Azure, paying attention to scalability, resiliency, + manageability, and security. +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Windows VM workloads +pnp.series.next: multi-vm +pnp.series.github: >- + https://github.com/mspnp/reference-architectures/tree/master/guidance-compute-single-vm +ms.assetid: 111649ea-4417-4a8e-8054-5bbe1902da87 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 11/22/2016 +ms.author: mwasson +pnp.series.prev: ./index +cardTitle: Single VM +--- +# Run a Windows VM on Azure + +This reference architecture shows a set of proven practices for running a Windows virtual machine (VM) on Azure. It includes recommendations for provisioning the VM along with networking and storage components. This architecture can be used to run a single instance, and is the basis for more complex architectures such as N-tier applications. [**Deploy this solution**.](#deploy-the-solution) + + +![[0]][0] + +## Architecture + +Provisioning a VM in Azure involves more moving parts than just the VM itself. There are compute, networking, and storage elements. + + +* **Resource group.** A [*resource group*][resource-manager-overview] is a container that holds related resources. Create a resource group to hold the resources for this VM. +* **VM**. You can provision a VM from a list of published images or from a virtual hard disk (VHD) file that you upload to Azure Blob storage. +* **OS disk.** The OS disk is a VHD stored in [Azure Storage][azure-storage]. That means it persists even if the host machine goes down. +* **Temporary disk.** The VM is created with a temporary disk (the `D:` drive on Windows). This disk is stored on a physical drive on the host machine. It is *not* saved in Azure Storage, and might be deleted during reboots and other VM lifecycle events. Use this disk only for temporary data, such as page or swap files. +* **Data disks.** A [data disk][data-disk] is a persistent VHD used for application data. Data disks are stored in Azure Storage, like the OS disk. +* **Virtual network (VNet) and subnet.** Every VM in Azure is deployed into a VNet that is further divided into subnets. +* **Public IP address.** A public IP address is needed to communicate with the VM—for example over remote desktop (RDP). +* **Network interface (NIC)**. The NIC enables the VM to communicate with the virtual network. +* **Network security group (NSG)**. The [NSG][nsg] is used to allow/deny network traffic to the subnet. You can associate an NSG with an individual NIC or with a subnet. If you associate it with a subnet, the NSG rules apply to all VMs in that subnet. +* **Diagnostics.** Diagnostic logging is crucial for managing and troubleshooting the VM. + +> [!NOTE] +> Azure has two different deployment models: [Azure Resource Manager][resource-manager-overview] and classic. This article uses Resource Manager, which Microsoft recommends for new deployments. +> +> + +## Recommendations + +This architecture shows the baseline recommendations for running a Windows VM in Azure. However, we don't recommend using a single VM for mission critical workloads, because it creates a single point of failure. For higher availability, deploy multiple VMs in an [availability set][availability-set]. For more information, see [Running multiple VMs on Azure][multi-vm]. + +### VM recommendations + +Azure offers many different virtual machine sizes, but we recommend the DS- and GS-series because these machine sizes support [Premium Storage][premium-storage]. Select one of these machine sizes unless you have a specialized workload such as high-performance computing. For details, see [virtual machine sizes][virtual-machine-sizes]. + +If you are moving an existing workload to Azure, start with the VM size that's the closest match to your on-premises servers. Then measure the performance of your actual workload with respect to CPU, memory, and disk input/output operations per second (IOPS), and adjust the size if needed. If you require multiple NICs for your VM, be aware that the maximum number of NICs is a function of the [VM size][vm-size-tables]. + +When you provision the VM and other resources, you must specify a region. Generally, choose a region closest to your internal users or customers. However, not all VM sizes may be available in all regions. For details, see [services by region][services-by-region]. To see a list of the VM sizes available in a given region, run the following Azure command-line interface (CLI) command: + +``` +azure vm sizes --location +``` + +For information about choosing a published VM image, see [Navigate and select Windows virtual machine images in Azure with Powershell or CLI][select-vm-image]. + +### Disk and storage recommendations + +For best disk I/O performance, we recommend [Premium Storage][premium-storage], which stores data on solid state drives (SSDs). Cost is based on the size of the provisioned disk. IOPS and throughput also depend on disk size, so when you provision a disk, consider all three factors (capacity, IOPS, and throughput). + +Create separate Azure storage accounts for each VM to hold the virtual hard disks (VHDs) in order to avoid hitting the IOPS limits for storage accounts. + +Add one or more data disks. When you create a new VHD, it is unformatted. Log into the VM to format the disk. If you have a large number of data disks, be aware of the total I/O limits of the storage account. For more information, see [virtual machine disk limits][vm-disk-limits]. + +When possible, install applications on a data disk, not the OS disk. However, some legacy applications might need to install components on the C: drive. In that case, you can [resize the OS disk][resize-os-disk] using PowerShell. + +For best performance, create a separate storage account to hold diagnostic logs. A standard locally redundant storage (LRS) account is sufficient for diagnostic logs. + +### Network recommendations + +The public IP address can be dynamic or static. The default is dynamic. + +* Reserve a [static IP address][static-ip] if you need a fixed IP address that won't change — for example, if you need to create an A record in DNS, or need the IP address to be added to a safe list. +* You can also create a fully qualified domain name (FQDN) for the IP address. You can then register a [CNAME record][cname-record] in DNS that points to the FQDN. For more information, see [create a fully qualified domain name in the Azure portal][fqdn]. + +All NSGs contain a set of [default rules][nsg-default-rules], including a rule that blocks all inbound Internet traffic. The default rules cannot be deleted, but other rules can override them. To enable Internet traffic, create rules that allow inbound traffic to specific ports — for example, port 80 for HTTP. + +To enable RDP, add an NSG rule that allows inbound traffic to TCP port 3389. + +## Scalability considerations + +You can scale a VM up or down by [changing the VM size][vm-resize]. To scale out horizontally, put two or more VMs into an availability set behind a load balancer. For details, see [Running multiple VMs on Azure for scalability and availability][multi-vm]. + +## Availability considerations + +For higher availabiility, deploy multiple VMs in an availability set. This also provides a higher [service level agreement][vm-sla] (SLA). + +Your VM may be affected by [planned maintenance][planned-maintenance] or [unplanned maintenance][manage-vm-availability]. You can use [VM reboot logs][reboot-logs] to determine whether a VM reboot was caused by planned maintenance. + +VHDs are stored in [Azure storage][azure-storage], and Azure storage is replicated for durability and availability. + +To protect against accidental data loss during normal operations (for example, because of user error), you should also implement point-in-time backups, using [blob snapshots][blob-snapshot] or another tool. + +## Manageability considerations + +**Resource groups.** Put tightly-coupled resources that share the same life cycle into the same [resource group][resource-manager-overview]. Resource groups allow you to deploy and monitor resources as a group and roll up billing costs by resource group. You can also delete resources as a set, which is very useful for test deployments. Give resources meaningful names. That makes it easier to locate a specific resource and understand its role. See [Recommended Naming Conventions for Azure Resources][naming conventions]. + +**VM diagnostics.** Enable monitoring and diagnostics, including basic health metrics, diagnostics infrastructure logs, and [boot diagnostics][boot-diagnostics]. Boot diagnostics can help you diagnose a boot failure if your VM gets into a nonbootable state. For more information, see [Enable monitoring and diagnostics][enable-monitoring]. Use the [Azure Log Collection][log-collector] extension to collect Azure platform logs and upload them to Azure storage. + +The following CLI command enables diagnostics: + +``` +azure vm enable-diag +``` + +**Stopping a VM.** Azure makes a distinction between "stopped" and "deallocated" states. You are charged when the VM status is stopped, but not when the VM is deallocated. + +Use the following CLI command to deallocate a VM: + +``` +azure vm deallocate +``` + +In the Azure portal, the **Stop** button deallocates the VM. However, if you shut down through the OS while logged in, the VM is stopped but *not* deallocated, so you will still be charged. + +**Deleting a VM.** If you delete a VM, the VHDs are not deleted. That means you can safely delete the VM without losing data. However, you will still be charged for storage. To delete the VHD, delete the file from [Blob storage][blob-storage]. + +To prevent accidental deletion, use a [resource lock][resource-lock] to lock the entire resource group or lock individual resources, such as the VM. + +## Security considerations + +Use [Azure Security Center][security-center] to get a central view of the security state of your Azure resources. Security Center monitors potential security issues and provides a comprehensive picture of the security health of your deployment. Security Center is configured per Azure subscription. Enable security data collection as described in [Use Security Center]. When data collection is enabled, Security Center automatically scans any VMs created under that subscription. + +**Patch management.** If enabled, Security Center checks whether security and critical updates are missing. Use [Group Policy settings][group-policy] on the VM to enable automatic system updates. + +**Antimalware.** If enabled, Security Center checks whether antimalware software is installed. You can also use Security Center to install antimalware software from inside the Azure portal. + +**Operations.** Use [role-based access control][rbac] (RBAC) to control access to the Azure resources that you deploy. RBAC lets you assign authorization roles to members of your DevOps team. For example, the Reader role can view Azure resources but not create, manage, or delete them. Some roles are specific to particular Azure resource types. For example, the Virtual Machine Contributor role can restart or deallocate a VM, reset the administrator password, create a new VM, and so forth. Other [built-in RBAC roles][rbac-roles] that might be useful for this architecture include [DevTest Labs User][rbac-devtest] and [Network Contributor][rbac-network]. A user can be assigned to multiple roles, and you can create custom roles for even more fine-grained permissions. + +> [!NOTE] +> RBAC does not limit the actions that a user logged into a VM can perform. Those permissions are determined by the account type on the guest OS. +> +> + +To reset the local administrator password, run the `vm reset-access` Azure CLI command. + +``` +azure vm reset-access -u -p +``` + +Use [audit logs][audit-logs] to see provisioning actions and other VM events. + +**Data encryption.** Consider [Azure Disk Encryption][disk-encryption] if you need to encrypt the OS and data disks. + +## Deploy the solution + +A deployment for this architecture is available on [GitHub][github-folder]. It includes a VNet, NSG, and a single VM. To deploy the architecture, follow these steps: + +1. Right click the button below and select either "Open link in new tab" or "Open link in new window." + [![Deploy to Azure](../_images/blueprints/deploybutton.png)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmspnp%2Freference-architectures%2Fmaster%2Fguidance-compute-single-vm%2Fazuredeploy.json) +2. Once the link has opened in the Azure portal, you must enter values for some of the settings: + + * The **Resource group** name is already defined in the parameter file, so select **Create New** and enter `ra-single-vm-rg` in the text box. + * Select the region from the **Location** drop down box. + * Do not edit the **Template Root Uri** or the **Parameter Root Uri** text boxes. + * Select **windows** in the **Os Type** drop down box. + * Review the terms and conditions, then click the **I agree to the terms and conditions stated above** checkbox. + * Click on the **Purchase** button. +3. Wait for the deployment to complete. +4. The parameter files include a hard-coded administrator user name and password, and it is strongly recommended that you immediately change both. Click on the VM named `ra-single-vm0 `in the Azure portal. Then, click on **Reset password** in the **Support + troubleshooting** blade. Select **Reset password** in the **Mode** dropdown box, then select a new **User name** and **Password**. Click the **Update** button to persist the new user name and password. + +For information on additional ways to deploy this architecture, see the readme file in the [guidance-single-vm][github-folder]] Github folder. + +If you need to change the deployment to match your needs, follow the instructions in the [readme][github-folder]. + + + + +[audit-logs]: https://azure.microsoft.com/blog/analyze-azure-audit-logs-in-powerbi-more/ +[availability-set]: /azure/virtual-machines/virtual-machines-windows-create-availability-set +[azure-cli]: /azure/virtual-machines-command-line-tools +[azure-storage]: /azure/storage/storage-introduction +[blob-snapshot]: /azure/storage/storage-blob-snapshots +[blob-storage]: /azure/storage/storage-introduction +[boot-diagnostics]: https://azure.microsoft.com/blog/boot-diagnostics-for-virtual-machines-v2/ +[cname-record]: https://en.wikipedia.org/wiki/CNAME_record +[data-disk]: /azure/virtual-machines/virtual-machines-windows-about-disks-vhds +[disk-encryption]: /azure/security/azure-security-disk-encryption +[enable-monitoring]: /azure/monitoring-and-diagnostics/insights-how-to-use-diagnostics +[fqdn]: /azure/virtual-machines/virtual-machines-windows-portal-create-fqdn +[github-folder]: http://github.com/mspnp/reference-architectures/tree/master/guidance-compute-single-vm +[group-policy]: https://technet.microsoft.com/en-us/library/dn595129.aspx +[log-collector]: https://azure.microsoft.com/blog/simplifying-virtual-machine-troubleshooting-using-azure-log-collector/ +[manage-vm-availability]: /azure/virtual-machines/virtual-machines-windows-manage-availability +[multi-vm]: multi-vm.md +[naming conventions]: ../../best-practices/naming-conventions.md +[nsg]: /azure/virtual-network/virtual-networks-nsg +[nsg-default-rules]: /azure/virtual-network/virtual-networks-nsg#default-rules +[planned-maintenance]: /azure/virtual-machines/virtual-machines-windows-planned-maintenance +[premium-storage]: /azure/storage/storage-premium-storage +[rbac]: /azure/active-directory/role-based-access-control-what-is +[rbac-roles]: /azure/active-directory/role-based-access-built-in-roles +[rbac-devtest]: /azure/active-directory/role-based-access-built-in-roles#devtest-labs-user +[rbac-network]: /azure/active-directory/role-based-access-built-in-roles#network-contributor +[reboot-logs]: https://azure.microsoft.com/blog/viewing-vm-reboot-logs/ +[resize-os-disk]: /azure/virtual-machines/virtual-machines-windows-expand-os-disk +[Resize-VHD]: https://technet.microsoft.com/en-us/library/hh848535.aspx +[Resize virtual machines]: https://azure.microsoft.com/blog/resize-virtual-machines/ +[resource-lock]: /azure/resource-group-lock-resources +[resource-manager-overview]: /azure/azure-resource-manager/resource-group-overview +[security-center]: https://azure.microsoft.com/services/security-center/ +[select-vm-image]: /azure/virtual-machines/virtual-machines-windows-cli-ps-findimage +[services-by-region]: https://azure.microsoft.com/regions/#services +[static-ip]: /azure/virtual-network/virtual-networks-reserved-public-ip +[storage-account-limits]: /azure/azure-subscription-service-limits#storage-limits +[storage-price]: https://azure.microsoft.com/pricing/details/storage/ +[Use Security Center]: /azure/security-center/security-center-get-started#use-security-center +[virtual-machine-sizes]: /azure/virtual-machines/virtual-machines-windows-sizes +[visio-download]: http://download.microsoft.com/download/1/5/6/1569703C-0A82-4A9C-8334-F13D0DF2F472/RAs.vsdx +[vm-disk-limits]: /azure/azure-subscription-service-limits#virtual-machine-disk-limits +[vm-resize]: /azure/virtual-machines/virtual-machines-linux-change-vm-size +[vm-sla]: https://azure.microsoft.com/support/legal/sla/virtual-machines +[vm-size-tables]: /azure/virtual-machines/virtual-machines-windows-sizes#size-tables +[0]: ./images/single-vm-diagram.png "Single Windows VM architecture in Azure" +[readme]: https://github.com/mspnp/reference-architectures/blob/master/guidance-compute-single-vm +[blocks]: https://github.com/mspnp/template-building-blocks + diff --git a/docs/checklist/availability.md b/docs/checklist/availability.md new file mode 100644 index 00000000000..7d144a49968 --- /dev/null +++ b/docs/checklist/availability.md @@ -0,0 +1,67 @@ +--- +title: Availability checklist +description: Checklist that provides guidance for availability concerns during design. +services: '' +documentationcenter: na +author: dragon119 +manager: masimms +editor: '' +tags: '' + +ms.assetid: bc6be15e-b454-4f53-8761-71f0810ce549 +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/13/2016 +ms.author: masashin + +--- +# Availability checklist +[!INCLUDE [header](../_includes/header.md)] + +## Application design +* **Avoid any single point of failure.** All components, services, resources, and compute instances should be deployed as multiple instances to prevent a single point of failure from affecting availability. This includes authentication mechanisms. Design the application to be configurable to use multiple instances, and to automatically detect failures and redirect requests to non-failed instances where the platform does not do this automatically. +* **Decompose workload per different service-level agreement.** If a service is composed of critical and less-critical workloads, manage them differently and specify the service features and number of instances to meet their availability requirements. +* **Minimize and understand service dependencies.** Minimize the number of different services used where possible, and ensure you understand all of the feature and service dependencies that exist in the system. This includes the nature of these dependencies, and the impact of failure or reduced performance in each one on the overall application. Microsoft guarantees at least 99.9 percent availability for most services, but this means that every additional service an application relies on potentially reduces the overall availability SLA of your system by 0.1 percent. +* **Design tasks and messages to be idempotent (safely repeatable) where possible**, so that duplicated requests will not cause problems. For example, a service can act as a consumer that handles messages sent as requests by other parts of the system that act as producers. If the consumer fails after processing the message, but before acknowledging that it has been processed, a producer might submit a repeat request which could be handled by another instance of the consumer. For this reason, consumers and the operations they carry out should be idempotent so that repeating a previously executed operation does not render the results invalid. This may mean detecting duplicated messages, or ensuring consistency by using an optimistic approach to handling conflicts. +* **Use a message broker that implements high availability for critical transactions.** Many scenarios for initiating tasks or accessing remote services use messaging to pass instructions between the application and the target service. For best performance, the application should be able to send the message and then return to process more requests, without needing to wait for a reply. To guarantee delivery of messages, the messaging system should provide high availability. Azure Service Bus message queues implement *at least once* semantics. This means that each message posted to a queue will not be lost, although duplicate copies may be delivered under certain circumstances. If message processing is idempotent (see the previous item), repeated delivery should not be a problem. +* **Design applications to gracefully degrade** when reaching resource limits, and take appropriate action to minimize the impact for the user. In some cases, the load on the application may exceed the capacity of one or more parts, causing reduced availability and failed connections. Scaling can help to alleviate this, but it may reach a limit imposed by other factors, such as resource availability or cost. Design the application so that, in this situation, it can automatically degrade gracefully. For example, in an ecommerce system, if the order-processing subsystem is under strain (or has even failed completely), it can be temporarily disabled while allowing other functionality (such as browsing the product catalog) to continue. It might be appropriate to postpone requests to a failing subsystem, for example still enabling customers to submit orders but saving them for later processing, when the orders subsystem is available again. +* **Gracefully handle rapid burst events.** Most applications need to handle varying workloads over time, such as peaks first thing in the morning in a business application or when a new product is released in an ecommerce site. Auto-scaling can help to handle the load, but it may take some time for additional instances to come online and handle requests. Prevent sudden and unexpected bursts of activity from overwhelming the application: design it to queue requests to the services it uses and degrade gracefully when queues are near to full capacity. Ensure that there is sufficient performance and capacity available under non-burst conditions to drain the queues and handle outstanding requests. For more information, see the [Queue-Based Load Leveling Pattern](https://msdn.microsoft.com/library/dn589783.aspx). + +## Deployment and maintenance +* **Deploy multiple instances of roles for each service.** Microsoft makes availability guarantees for services that you create and deploy, but these guarantees are only valid if you deploy at least two instances of each role in the service. This enables one role to be unavailable while the other remains active. This is especially important if you need to deploy updates to a live system without interrupting clients' activities; instances can be taken down and upgraded individually while the others continue online. +* **Host applications in multiple datacenters.** Although extremely unlikely, it is possible for an entire datacenter to go offline through an event such as a natural disaster or Internet failure. Vital business applications should be hosted in more than one datacenter to provide maximum availability. This can also reduce latency for local users, and provide additional opportunities for flexibility when updating applications. +* **Automate and test deployment and maintenance tasks.** Distributed applications consist of multiple parts that must work together. Deployment should therefore be automated, using tested and proven mechanisms such as scripts and deployment applications. These can update and validate configuration, and automate the deployment process. Automated techniques should also be used to perform updates of all or parts of applications. It is vital to test all of these processes fully to ensure that errors do not cause additional downtime. All deployment tools must have suitable security restrictions to protect the deployed application; define and enforce deployment policies carefully and minimize the need for human intervention. +* **Consider using staging and production features of the platform** where these are available. For example, using Azure Cloud Services staging and production environments allows applications to be switched from one to another instantly through a virtual IP address swap (VIP Swap). However, if you prefer to stage on-premises, or deploy different versions of the application concurrently and gradually migrate users, you may not be able to use a VIP Swap operation. +* **Apply configuration changes without recycling** the instance when possible. In many cases, the configuration settings for an Azure application or service can be changed without requiring the role to be restarted. Role expose events that can be handled to detect configuration changes and apply them to components within the application. However, some changes to the core platform settings do require a role to be restarted. When building components and services, maximize availability and minimize downtime by designing them to accept changes to configuration settings without requiring the application as a whole to be restarted. +* **Use upgrade domains for zero downtime during updates.** Azure compute units such as web and worker roles are allocated to upgrade domains. Upgrade domains group role instances together so that, when a rolling update takes place, each role in the upgrade domain is stopped, updated, and restarted in turn. This minimizes the impact on application availability. You can specify how many upgrade domains should be created for a service when the service is deployed. + + > [!NOTE] + > Roles are also distributed across fault domains, each of which is reasonably independent from other fault domains in terms of server rack, power, and cooling provision, in order to minimize the chance of a failure affecting all role instances. This distribution occurs automatically, and you cannot control it. + > + > +* **Configure availability sets for Azure virtual machines.** Placing two or more virtual machines in the same availability set guarantees that these virtual machines will not be deployed to the same fault domain. To maximize availability, you should create multiple instances of each critical virtual machine used by your system and place these instances in the same availability set. If you are running multiple virtual machines that serve different purposes, create an availability set for each virtual machine. Add instances of each virtual machine to each availability set. For example, if you have created separate virtual machines to act as a web server and a reporting server, create an availability set for the web server and another availability set for the reporting server. Add instances of the web server virtual machine to the web server availability set, and add instances of the reporting server virtual machine to the reporting server availability set. + +## Data management +* **Take advantage of data replication** through both local and geographical redundancy. Data in Azure storage is automatically replicated to protect against loss in case of infrastructure failure, and some aspects of this replication can be configured. For example, read-only copies of data may be replicated in more than one geographical region (referred to as read-access globally redundant storage, or RA-GRS). Note that using RA-GRS incurs additional charges. For details, see [Azure Storage Pricing](https://azure.microsoft.com/pricing/details/storage/). +* **Use optimistic concurrency and eventual consistency** where possible. Transactions that block access to resources through locking (pessimistic concurrency) can cause poor performance and considerably reduce availability. These problems can become especially acute in distributed systems. In many cases, careful design and techniques such as partitioning can minimize the chances of conflicting updates occurring. Where data is replicated, or is read from a separately updated store, the data will only be eventually consistent. But the advantages usually far outweigh the impact on availability of using transactions to ensure immediate consistency. +* **Use periodic backup and point-in-time restore**, and ensure it meets the Recovery Point Objective (RPO). Regularly and automatically back up data that is not preserved elsewhere, and verify you can reliably restore both the data and the application itself should a failure occur. Data replication is not a backup feature because errors and inconsistencies introduced through failure, error, or malicious operations will be replicated across all stores. The backup process must be secure to protect the data in transit and in storage. Databases or parts of a data store can usually be recovered to a previous point in time by using transaction logs. Microsoft Azure provides a backup facility for data stored in Azure SQL Database. The data is exported to a backup package on Azure blob storage, and can be downloaded to a secure on-premises location for storage. +* **Enable the high availability option to maintain a secondary copy of an Azure Redis cache.** When using Azure Redis Cache, choose the standard option to maintain a secondary copy of the contents. For more information, see [Create a cache in Azure Redis Cache](https://msdn.microsoft.com/library/dn690516.aspx). + +## Errors and failures +* **Introduce the concept of a timeout.** Services and resources may become unavailable, causing requests to fail. Ensure that the timeouts you apply are appropriate for each service or resource as well as the client that is accessing them. (In some cases, it may be appropriate to allow a longer timeout for a particular instance of a client, depending on the context and other actions that the client is performing.) Very short timeouts may cause excessive retry operations for services and resources that have considerable latency. Very long timeouts can cause blocking if a large number of requests are queued, waiting for a service or resource to respond. +* **Retry failed operations caused by transient faults.** Design a retry strategy for access to all services and resources where they do not inherently support automatic connection retry. Use a strategy that includes an increasing delay between retries as the number of failures increases, to prevent overloading of the resource and to allow it to gracefully recover and handle queued requests. Continual retries with very short delays are likely to exacerbate the problem. +* **Stop sending requests to avoid cascading failures** when remote services are unavailable. There may be situations in which transient or other faults, ranging in severity from a partial loss of connectivity to the complete failure of a service, take much longer than expected to return to normal. Additionally, if a service is very busy, failure in one part of the system may lead to cascading failures, and result in many operations becoming blocked while holding onto critical system resources such as memory, threads, and database connections. Instead of continually retrying an operation that is unlikely to succeed, the application should quickly accept that the operation has failed, and gracefully handle this failure. You can use the circuit breaker pattern to reject requests for specific operations for defined periods. For more details, see [Circuit Breaker Pattern](https://msdn.microsoft.com/library/dn589784.aspx). +* **Compose or fall back to multiple components** to mitigate the impact of a specific service being offline or unavailable. Design applications to take advantage of multiple instances without affecting operation and existing connections where possible. Use multiple instances and distribute requests between them, and detect and avoid sending requests to failed instances, in order to maximize availability. +* **Fall back to a different service or workflow** where possible. For example, if writing to SQL Database fails, temporarily store data in blob storage. Provide a facility to replay the writes in blob storage to SQL Database when the service becomes available. In some cases, a failed operation may have an alternative action that allows the application to continue to work even when a component or service fails. If possible, detect failures and redirect requests to other services that can offer a suitable alternative functionality, or to back up or reduced functionality instances that can maintain core operations while the primary service is offline. + +## Monitoring and disaster recovery +* **Provide rich instrumentation for likely failures and failure events** to report the situation to operations staff. For failures that are likely but have not yet occurred, provide sufficient data to enable operations staff to determine the cause, mitigate the situation, and ensure that the system remains available. For failures that have already occurred, the application should return an appropriate error message to the user but attempt to continue running, albeit with reduced functionality. In all cases, the monitoring system should capture comprehensive details to enable operations staff to effect a quick recovery, and if necessary, for designers and developers to modify the system to prevent the situation from arising again. +* **Monitor system health by implementing checking functions.** The health and performance of an application can degrade over time, without being noticeable until it fails. Implement probes or check functions that are executed regularly from outside the application. These checks can be as simple as measuring response time for the application as a whole, for individual parts of the application, for individual services that the application uses, or for individual components. Check functions can execute processes to ensure they produce valid results, measure latency and check availability, and extract information from the system. +* **Regularly test all failover and fallback systems** to ensure they are available and operate as expected. Changes to systems and operations may affect failover and fallback functions, but the impact may not be detected until the main system fails or becomes overloaded. Test it before it is required to compensate for a live problem at runtime. +* **Test the monitoring systems.** Automated failover and fallback systems, and manual visualization of system health and performance by using dashboards, all depend on monitoring and instrumentation functioning correctly. If these elements fail, miss critical information, or report inaccurate data, an operator might not realize that the system is unhealthy or failing. +* **Track the progress of long-running workflows** and retry on failure. Long-running workflows are often composed of multiple steps. Ensure that each step is independent and can be retried to minimize the chance that the entire workflow will need to be rolled back, or that multiple compensating transactions need to be executed. Monitor and manage the progress of long-running workflows by implementing a pattern such as [Scheduler Agent Supervisor Pattern](https://msdn.microsoft.com/library/dn589780.aspx). +* **Plan for disaster recovery.** Ensure there is a documented, accepted, and fully tested plan for recovery from any type of failure that may render part or all of the main system unavailable. Test the procedures regularly, and ensure that all operations staff are familiar with the process. + diff --git a/docs/checklist/index.md b/docs/checklist/index.md new file mode 100644 index 00000000000..2c1761f25e4 --- /dev/null +++ b/docs/checklist/index.md @@ -0,0 +1,3 @@ +--- +redirect_url: /azure/architecture/checklist/availability +--- \ No newline at end of file diff --git a/docs/checklist/resiliency.md b/docs/checklist/resiliency.md new file mode 100644 index 00000000000..05f46cb09c9 --- /dev/null +++ b/docs/checklist/resiliency.md @@ -0,0 +1,210 @@ +--- +title: Resiliency checklist +description: Checklist that provides guidance for resiliency concerns during design. +services: '' +documentationcenter: na +author: petertaylor9999 +manager: christb +editor: '' +tags: '' + +ms.assetid: 1b24134e-2279-4bf6-9e24-cd05aa0de89f +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 10/24/2016 +ms.author: petertay + +--- +# Resiliency checklist +[!INCLUDE [header](../_includes/header.md)] + +Designing your application for resiliency requires planning for and mitigating a variety of failure modes that could occur. Review the items in this checklist against your application design to make it more resilient. + +## Requirements +* **Define your customer's availability requirements.** Your customer will have availability requirements for the components in your application and this will affect your application's design. Get agreement from your customer for the availability targets of each piece of your application, otherwise your design may not meet the customer's expectations. For more information, see the [Defining your resiliency requirements](../resiliency/index.md#defining-your-resiliency-requirements) section of the [Designing resilient applications for Azure](../resiliency/index.md) document. + +## Failure Mode Analysis +* **Perform a failure mode analysis (FMA) for your application.** FMA is a process for building resiliency into an application early in the design stage. The goals of an FMA include: + + * Identify what types of failures an application might experience. + * Capture the potential effects and impact of each type of failure on the application. + * Identify recovery strategies. + + For more information, see [Designing resilient applications for Azure: Failure mode analysis][fma]. + +## Application +* **Deploy multiple instances of services.** Services will inevitably fail, and if your application depends on a single instance of a service it will inevitably fail also. To provision multiple instances for [Azure App Service](/azure/app-service/app-service-value-prop-what-is/), select an [App Service Plan](/azure/app-service/azure-web-sites-web-hosting-plans-in-depth-overview/) that offers multiple instances. For Azure Cloud Services, configure each of your roles to use [multiple instances](/azure/cloud-services/cloud-services-choose-me/#scaling-and-management). For [Azure Virtual Machines (VMs)](/azure/virtual-machines/virtual-machines-windows-about/?toc=%2fazure%2fvirtual-machines%2fwindows%2ftoc.json), ensure that your VM architecture includes more than one VM and that each VM is included in an [availability set][availability-sets]. +* **Use a load balancer to distribute requests.** A load balancer distributes your application's requests to healthy service instances by removing unhealthy instances from rotation. If your service uses Azure App Service or Azure Cloud Services, it is already load balanced for you. However, if your application uses Azure VMs, you will need to provision a load balancer. See the [Azure Load Balancer](/azure/load-balancer/load-balancer-overview/) overview for more details. +* **Configure Azure Application Gateways to use multiple instances.** Depending on your application's requirements, an [Azure Application Gateway](/azure/application-gateway/application-gateway-introduction/) may be better suited to distributing requests to your application's services. However, single instances of the Application Gateway service are not guaranteed by an SLA so it's possible that your application could fail if the Application Gateway instance fails. Provision more than one medium or larger Application Gateway instance to guarantee availability of the service under the terms of the [SLA](https://azure.microsoft.com/support/legal/sla/application-gateway/v1_0/). +* **Use Availability Sets for each application tier**. Placing your instances in an [availability set][availability-sets] provides a higher [SLA](https://azure.microsoft.com/support/legal/sla/virtual-machines/). +* **Consider deploying your application across multiple regions.** If your application is deployed to a single region, in the rare event the entire region becomes unavailable, your application will also be unavailable. This may be unacceptable under the terms of your application's SLA. If so, consider deploying your application and its services across multiple regions. A multi-region deployment can use an active-active pattern (distributing requests across multiple active instances) or an active-passive pattern (keeping a "warm" instance in reserve, in case the primary instance fails). We recommend that you deploy multiple instances of your application's services across regional pairs. For more information, see [Business continuity and disaster recovery (BCDR): Azure Paired Regions](/azure/best-practices-availability-paired-regions). +* **Implement resiliency patterns for remote operations where appropriate.** If your application depends on communication between remote services, the communication path will inevitably fail. If there are multiple failures, the remaining healthy instances of your application's services could be overwhelmed with requests. There are several patterns useful for dealing with common failures including the timeout pattern, the [retry pattern][retry-pattern], the [circuit breaker][circuit-breaker] pattern, and others. For more information, see [Designing resilient applications for Azure](../resiliency/index.md#resiliency-strategies). +* **Use autoscaling to respond to increases in load.** If your application is not configured to scale out automatically as load increases, it's possible that your application's services will fail if they become saturated with user requests. For more details, see the following: + + * General: [Scalability checklist](./scalability.md) + * Azure App Service: [Scale instance count manually or automatically][app-service-autoscale] + * Cloud Services: [How to auto scale a cloud service][cloud-service-autoscale] + * Virtual Machines: [Automatic scaling and virtual machine scale sets][vmss-autoscale] +* **Implement asynchronous operations whenever possible.** Synchronous operations can monopolize resources and block other operations while the caller waits for the process to complete. Design each part of your application to allow for asynchronous operations whenever possible. For more information on how to implement asynchronous programming in C#, see [Asynchronous Programming with async and await][asynchronous-c-sharp]. +* **Use Azure Traffic Manager to route your application's traffic to different regions.** [Azure Traffic Manager][traffic-manager] performs load balancing at the DNS level and can route traffic to different regions based on the [traffic routing][traffic-manager-routing] method you specify and the health of your application's endpoints. +* **Configure and test health probes for your load balancers and traffic managers.** Ensure that your health logic checks the critical parts of the system and responds appropriately to health probes. + + * The health probes for [Azure Traffic Manager][traffic-manager] and [Azure Load Balancer][load-balancer] serve a specific function. For Traffic Manager, the health probe determines whether to fail over to another region. For a load balancer, it determines whether to remove a VM from rotation. + * For a Traffic Manager probe, your health endpoint should check any critical dependencies that are deployed within the same region, and whose failure should trigger a failover to another region. + * For a load balancer, the health endpoint should report the health of the VM. Don't include other tiers or external services. Otherwise, a failure that occurs outside the VM will cause the load balancer to remove the VM from rotation. + * For guidance on implementing health monitoring in your application, see [Health Endpoint Monitoring Pattern](https://msdn.microsoft.com/library/dn589789.aspx). +* **Monitor third-party services.** If your application has dependencies on third-party services, identify where and how these third-party services can fail and what effect those failures will have on your application. A third-party service may not include monitoring and diagnostics, so it's important to log your invocations of them and correlate them with your application's health and diagnostic logging using a unique identifier. For more information on best practices for monitoring and diagnostics, see the [Monitoring and Diagnostics guidance][monitoring-and-diagnostics-guidance] document. +* **Ensure that any third-party service you consume provides an SLA.** If your application depends on a third-party service, but the third party provides no guarantee of availability in the form of an SLA, your application's availability also cannot be guaranteed. Your SLA is only as good as the least available component of your application. + +## Data management +* **Understand the replication methods for your application's data sources.** Your application data will be stored in different data sources and have different availability requirements. Evaluate the replication methods for each type of data storage in Azure, including [Azure Storage Replication](/azure/storage/storage-redundancy/) and [SQL Database Active Geo-Replication](/azure/sql-database/sql-database-geo-replication-overview/) to ensure that your application's data requirements are satisfied. +* **Ensure that no single user account has access to both production and backup data.** Your data backups are compromised if one single user account has permission to write to both production and backup sources. A malicious user could purposely delete all your data, while a regular user could accidentally delete it. Design your application to limit the permissions of each user account so that only the users that require write access have write access and it's only to either production or backup, but not both. +* **Document your data source fail over and fail back process and test it.** In the case where your data source fails catastrophically, a human operator will have to follow a set of documented instructions to fail over to a new data source. If the documented steps have errors, an operator will not be able to successfully follow them and fail over the resource. Regularly test the instruction steps to verify that an operator following them is able to successfully fail over and fail back the data source. +* **Validate your data backups.** Regularly verify that your backup data is what you expect by running a script to validate data integrity, schema, and queries. There's no point having a backup if it's not useful to restore your data sources. Log and report any inconsistencies so the backup service can be repaired. +* **Consider using a storage account type that is geo-redundant.** Data stored in an Azure Storage account is always replicated locally. However, there are multiple replication strategies to choose from when a Storage Account is provisioned. Select [Azure Read-Access Geo Redundant Storage (RA-GRS)](/azure/storage/storage-redundancy/#read-access-geo-redundant-storage) to protect your application data against the rare case when an entire region becomes unavailable. + + > [!NOTE] + > For VMs, do not rely on RA-GRS replication to restore the VM disks (VHD files). Instead, use [Azure Backup][azure-backup]. + > + > + +## Operations +* **Implement monitoring and alerting best practices in your application.** Without proper monitoring, diagnostics, and alerting, there is no way to detect failures in your application and alert an operator to fix them. For more information on best practices, see the [Monitoring and Diagnostics guidance][monitoring-and-diagnostics-guidance] document. +* **Measure remote call statistics and make the information available to the application team.** If you don't track and report remote call statistics in real time and provide an easy way to review this information, the operations team will not have an instantaneous view into the health of your application. And if you only measure average remote call time, you will not have enough information to reveal issues in the services. Summarize remote call metrics such as latency, throughput, and errors in the 99 and 95 percentiles. Perform statistical analysis on the metrics to uncover errors that occur within each percentile. +* **Track the number of transient exceptions and retries over an appropriate timeframe.** If you don't track and monitor transient exceptions and retry attempts over time, it's possible that an issue or failure could be hidden by your application's retry logic. That is, if your monitoring and logging only shows success or failure of an operation, the fact that the operation had to be retried multiple times due to exceptions will be hidden. A trend of increasing exceptions over time indicates that the service is having an issue and may fail. For more information, see [Retry service specific guidance][retry-service-guidance]. +* **Implement an early warning system that alerts an operator.** Identify the key performance indicators of your application's health, such as transient exceptions and remote call latency, and set appropriate threshold values for each of them. Send an alert to operations when the threshold value is reached. Set these thresholds at levels that identify issues before they become critical and require a recovery response. +* **Document the release process for your application.** Without detailed release process documentation, an operator might deploy a bad update or improperly configure settings for your application. Clearly define and document your release process, and ensure that it's available to the entire operations team. Best practices for resilient deployment of your application are detailed in the [resilient deployment][resilient-deployment] section of the Resiliency Guidance document. +* **Ensure that more than one person on the team is trained to monitor the application and perform any manual recovery steps.** If you only have a single operator on the team who can monitor the application and kick off recovery steps, that person becomes a single point of failure. Train multiple individuals on detection and recovery and make sure there is always at least one active at any time. +* **Automate your application's deployment process.** If your operations staff is required to manually deploy your application, human error can cause the deployment to fail. For more information on best practices for automating application deployment, see the [resilient deployment][resilient-deployment] section of the Resiliency Guidance document. +* **Design your release process to maximize application availability.** If your release process requires services to go offline during deployment, your application will be unavailable until they come back online. Use the [blue/green](http://martinfowler.com/bliki/BlueGreenDeployment.html) or [canary release](http://martinfowler.com/bliki/CanaryRelease.html) deployment technique to deploy your application to production. Both of these techniques involve deploying your release code alongside production code so users of release code can be redirected to production code in the event of a failure. For more information, see the [resilient deployment][resilient-deployment] section of the Resiliency Guidance document. +* **Log and audit your application's deployments.** If you use staged deployment techniques such as blue/green or canary releases there will be more than one version of your application running in production. If a problem should occur, it's critical to determine which version of your application is causing a problem. Implement a robust logging strategy to capture as much version-specific information as possible. +* **Ensure that your application does not run up against [Azure subscription limits](/azure/azure-subscription-service-limits/).** Azure subscriptions have limits on certain resource types, such as number of resource groups, number of cores, and number of storage accounts. If your application requirements exceed Azure subscription limits, create another Azure subscription and provision sufficient resources there. +* **Ensure that your application does not run up against [per-service limits](/azure/azure-subscription-service-limits/).** Individual Azure services have consumption limits — for example, limits on storage, throughput, number of connections, requests per second, and other metrics. Your application will fail if it attempts to use resources beyond these limits. This will result in service throttling and possible downtime for affected users. + + Depending on the specific service and your application requirements, you can often avoid these limits by scaling up (for example, choosing another pricing tier) or scaling out (adding new instances). +* **Design your application's storage requirements to fall within Azure storage scalability and performance targets.** Azure storage is designed to function within predefined scalability and performance targets, so design your application to utilize storage within those targets. If you exceed these targets your application will experience storage throttling. To fix this, provision additional Storage Accounts. If you run up against the Storage Account limit, provision additional Azure Subscriptions and then provision additional Storage Accounts there. For more information, see [Azure Storage Scalability and Performance Targets](/azure/storage/storage-scalability-targets/). +* **Select the right VM size for your application.** Measure the actual CPU, memory, disk, and I/O of your VMs in production and verify that the VM size you've selected is sufficient. If not, your application may experience capacity issues as the VMs approach their limits. VM sizes are described in detail in the [Sizes for virtual machines in Azure](/azure/virtual-machines/virtual-machines-windows-sizes/?toc=%2fazure%2fvirtual-machines%2fwindows%2ftoc.json) document. +* **Determine if your application's workload is stable or fluctuating over time.** If your workload fluctuates over time, use Azure VM scale sets to automatically scale the number of VM instances. Otherwise, you will have to manually increase or decrease the number of VMs. For more information, see the [Virtual Machine Scale Sets Overview](/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-overview/). +* **Select the right service tier for Azure SQL Database.** If your application uses Azure SQL Database, ensure that you have selected the appropriate service tier. If you select a tier that is not able to handle your application's database transaction unit (DTU) requirements, your data use will be throttled. For more information on selecting the correct service plan, see the [SQL Database options and performance: Understand what's available in each service tier](/azure/sql-database/sql-database-service-tiers/) document. +* **Have a rollback plan for deployment.** It's possible that your application deployment could fail and cause your application to become unavailable. Design a rollback process to go back to a last known good version and minimize downtime. See the [resilient deployment][resilient-deployment] section of the Resiliency Guidance document for more information. +* **Create a process for interacting with Azure support.** If the process for contacting [Azure support](https://azure.microsoft.com/support/plans/) is not set before the need to contact support arises, downtime will be prolonged as the support process is navigated for the first time. Include the process for contacting support and escalating issues as part of your application's resiliency from the outset. +* **Ensure that your application doesn't use more than the maximum number of storage accounts per subscription.** Azure allows a maximum of 200 storage accounts per subscription. If your application requires more storage accounts than are currently available in your subscription, you will have to create a new subscription and create additional storage accounts there. For more information, see [Azure subscription and service limits, quotas, and constraints](/azure/azure-subscription-service-limits/#storage-limits). +* **Ensure that your application doesn't exceed the scalability targets for virtual machine disks.** An Azure IaaS VM supports attaching a number of data disks depending on several factors, including the VM size and type of storage account. If your application exceeds the scalability targets for virtual machine disks, provision additional storage accounts and create the virtual machine disks there. For more information, see [Azure Storage Scalability and Performance Targets](/azure/storage/storage-scalability-targets/#scalability-targets-for-virtual-machine-disks) + +## Test +* **Perform failover and failback testing for your application.** If you haven't fully tested failover and failback, you can't be certain that the dependent services in your application come back up in a synchronized manner during disaster recovery. Ensure that your application's dependent services failover and fail back in the correct order. +* **Perform fault-injection testing for your application.** Your application can fail for many different reasons, such as certificate expiration, exhaustion of system resources in a VM, or storage failures. Test your application in an environment as close as possible to production, by simulating or triggering real failures. For example, delete certificates, artificially consume system resources, or delete a storage source. Verify your application's ability to recover from all types of faults, alone and in combination. Check that failures are not propagating or cascading through your system. +* **Run tests in production using both synthetic and real user data.** Test and production are rarely identical, so it's important to use blue/green or a canary deployment and test your application in production. This allows you to test your application in production under real load and ensure it will function as expected when fully deployed. + +## Security +* **Implement application-level protection against distributed denial of service (DDoS) attacks.** Azure services are protected against DDos attacks at the network layer. However, Azure cannot protect against application-layer attacks, because it is difficult to distinguish between true user requests from malicious user requests. For more information on how to protect against application-layer DDoS attacks, see the "Protecting against DDoS" section of [Microsoft Azure Network Security](http://download.microsoft.com/download/C/A/3/CA3FC5C0-ECE0-4F87-BF4B-D74064A00846/AzureNetworkSecurity_v3_Feb2015.pdf) (PDF download). +* **Implement the principle of least privilege for access to the application's resources.** The default for access to the application's resources should be as restrictive as possible. Grant higher level permissions on an approval basis. Granting overly permissive access to your application's resources by default can result in someone purposely or accidentally deleting resources. Azure provides [role-based access control](/azure/active-directory/role-based-access-built-in-roles/) to manage user privileges, but it's important to verify least privilege permissions for other resources that have their own permissions systems such as SQL Server. + +## Telemetry +* **Log telemetry data while the application is running in the production environment.** Capture robust telemetry information while the application is running in the production environment or you will not have sufficient information to diagnose the cause of issues while it's actively serving users. More information is available in the logging best practices is available in the [Monitoring and Diagnostics guidance][monitoring-and-diagnostics-guidance] document. +* **Implement logging using an asynchronous pattern.** If logging operations are synchronous, they might block your application code. Ensure that your logging operations are implemented as asynchronous operations. +* **Correlate log data across service boundaries.** In a typical n-tier application, a user request may traverse several service boundaries. For example, a user request typically originates in the web tier and is passed to the business tier and finally persisted in the data tier. In more complex scenarios, a user request may be distributed to many different services and data stores. Ensure that your logging system correlates calls across service boundaries so you can track the request throughout your application. + +## Azure Resources +* **Use Azure Resource Manager templates to provision resources.** Resource Manager templates make it easier to automate deployments via PowerShell or the Azure CLI, which leads to a more reliable deployment process. For more information, see [Azure Resource Manager overview][resource-manager]. +* **Give resources meaningful names.** Giving resources meaningful names makes it easier to locate a specific resource and understand its role. For more information, see [Naming conventions for Azure resources](../best-practices/naming-conventions.md) +* **Use role-based access control (RBAC)**. Use RBAC to control access to the Azure resources that you deploy. RBAC lets you assign authorization roles to members of your DevOps team, to prevent accidental deletion or changes to deployed resources. For more information, see [Get started with access management in the Azure portal](/azure/active-directory/role-based-access-control-what-is/) +* **Use resource locks for critical resources, such as VMs.** Resource locks prevent an operator from accidentally deleting a resource. For more information, see [Lock resources with Azure Resource Manager](/azure/azure-resource-manager/resource-group-lock-resources/) +* **Regional pairs.** When deploying to two regions, choose regions from the same regional pair. In the event of a broad outage, recovery of one region is prioritized out of every pair. Some services such as Geo-Redundant Storage provide automatic replication to the paired region. For more information, see [Business continuity and disaster recovery (BCDR): Azure Paired Regions](/azure/best-practices-availability-paired-regions) +* **Organize resource groups by function and lifecycle**. In general, a resource group should contain resources that share the same lifecycle. This makes it easier to manage deployments, delete test deployments, and assign access rights, reducing the chance that a production deployment is accidentally deleted or modified. Create separate resource groups for production, development, and test environments. In a multi-region deployment, put resources for each region into separate resource groups. This makes it easier to redeploy one region without affecting the other region(s). + +## Azure Services +The following checklist items apply to specific services in Azure. + +### App Service +* **Use Standard or Premium tier.** These tiers support staging slots and automated backups. For more information, see [Azure App Service plans in-depth overview](/azure/app-service/azure-web-sites-web-hosting-plans-in-depth-overview/) +* **Avoid scaling up or down.** Instead, select a tier and instance size that meet your performance requirements under typical load, and then [scale out](/azure/app-service-web/web-sites-scale/) the instances to handle changes in traffic volume. Scaling up and down may trigger an application restart. +* **Store configuration as app settings.** Use app settings to hold configuration settings as app settings. Define the settings in your Resource Manager templates, or using PowerShell, so that you can apply them as part of an automated deployment / update process, which is more reliable. For more information, see [Configure web apps in Azure App Service](/azure/app-service-web/web-sites-configure/). +* **Create separate App Service plans for production and test.** Don't use slots on your production deployment for testing. All apps within the same App Service plan share the same VM instances. If you put production and test deployments in the same plan, it can negatively affect the production deployment. For example, load tests might degrade the live production site. By putting test deployments into a separate plan, you isolate them from the production version. +* **Separate web apps from web APIs**. If your solution has both a web front-end and a web API, consider decomposing them into separate App Service apps. This design makes it easier to decompose the solution by workload. You can run the web app and the API in separate App Service plans, so they can be scaled independently. If you don't need that level of scalability at first, you can deploy the apps into the same plan, and move them into separate plans later, if needed. +* **Avoid using the App Service backup feature to back up Azure SQL databases.** Instead, use [SQL Database automated backups][sql-backup]. App Service backup exports the database to a SQL .bacpac file, which costs DTUs. +* **Deploy to a staging slot.** Create a deployment slot for staging. Deploy application updates to the staging slot, and verify the deployment before swapping it into production. This reduces the chance of a bad update in production. It also ensures that all instances are warmed up before being swapped into production. Many applications have a significant warmup and cold-start time. For more information, see [Set up staging environments for web apps in Azure App Service](/azure/app-service-web/web-sites-staged-publishing/). +* **Create a deployment slot to hold the last-known-good (LKG) deployment.** When you deploy an update to production, move the previous production deployment into the LKG slot. This makes it easier to roll back a bad deployment. If you discover a problem later, you can quickly revert to the LKG version. For more information, see [Basic web application](../blueprints/managed-web-app/basic-web-app.md). +* **Enable diagnostics logging**, including application logging and web server logging. Logging is important for monitoring and diagnostics. See [Enable diagnostics logging for web apps in Azure App Service](/azure/app-service-web/web-sites-enable-diagnostic-log/) +* **Log to blob storage**. This makes it easier to collect and analyze the data. +* **Create a separate storage account for logs.** Don't use the same storage account for logs and application data. This helps to prevent logging from reducing application performance. +* **Monitor performance.** Use a performance monitoring service such as [New Relic](http://newrelic.com/) or [Application Insights](/azure/application-insights/app-insights-overview/) to monitor application performance and behavior under load. Performance monitoring gives you real-time insight into the application. It enables you to diagnose issues and perform root-cause analysis of failures. + +### Application Gateway +* **Provision at least two instances.** Deploy Application Gateway with at least two instances. A single instance is a single point of failure. Use two or more instances for redundancy and scalability. In order to qualify for the [SLA](https://azure.microsoft.com/support/legal/sla/application-gateway/v1_0/), you must provision two or more medium or larger instances. + +### Azure Search +* **Provision more than one replica.** Use at least two replicas for read high-availability, or three for read-write high-availability. +* **Configure indexers for multi-region deployments.** If you have a multi-region deployment, consider your options for continuity in indexing. + + * If the data source is geo-replicated, point each indexer of each regional Azure Search service to its local data source replica. + * If the data source is not geo-replicated, point multiple indexers at the same data source, so that Azure Search services in multiple regions continuously and independently index from the data source. For more information, see [Azure Search performance and optimization considerations][search-optimization]. + +### Azure Storage +* **For application data, use read-access geo-redundant storage (RA-GRS).** RA-GRS storage replicates the data to a secondary region, and provides read-only access from the secondary region. If there is a storage outage in the primary region, the application can read the data from the secondary region. For more information, see [Azure Storage replication](/azure/storage/storage-redundancy/). +* **For VM disks, use Premium Storage** For more information, see [Premium Storage: High-Performance Storage for Azure Virtual Machine Workloads](/azure/storage/storage-premium-storage/). +* **For Queue storage, create a backup queue in another region.** For Queue storage, a read-only replica has limited use, because you can't queue or dequeue items. Instead, create a backup queue in a storage account in another region. If there is a storage outage, the application can use the backup queue, until the primary region becomes available again. That way, the application can still process new requests. + +### DocumentDB +* **Replicate the database across regions.** With a multi-region account, your DocumentDB database has one write region and multiple read regions. If there is a failure in the write region, you can read from another replica. The Client SDK handles this automatically. You can also fail over the write region to another region. For more information, see [Distribute data globally with DocumentDB](/azure/documentdb/documentdb-distribute-data-globally/). + +### SQL Database +* **Use Standard or Premium tier.** These tiers provide a longer point-in-time restore period (35 days). For more information, see [SQL Database options and performance](/azure/sql-database/sql-database-service-tiers/). +* **Enable SQL Database auditing.** Auditing can be used to diagnose malicious attacks or human error. For more information, see [Get started with SQL database auditing](/azure/sql-database/sql-database-auditing-get-started/). +* **Use Active Geo-Replication** Use Active Geo-Replication to create a readable secondary in a different region. If your primary database fails, or simply needs to be taken offline, perform a manual failover to the secondary database. Until you fail over, the secondary database remains read-only. For more information, see [SQL Database Active Geo-Replication](/azure/sql-database/sql-database-geo-replication-overview/). +* **Use sharding**. Consider using sharding to partition the database horizontally. Sharding can provide fault isolation. For more information, see [Scaling out with Azure SQL Database](/azure/sql-database/sql-database-elastic-scale-introduction/). +* **Use point-in-time restore to recover from human error.** Point-in-time restore returns your database to an earlier point in time. For more information, see [Recover an Azure SQL database using automated database backups][sql-restore]. +* **Use geo-restore to recover from a service outage.** Geo-restore restores a database from a geo-redundant backup. For more information, see [Recover an Azure SQL database using automated database backups][sql-restore]. + +### SQL Server (running in a VM) +* **Replicate the database.** Use SQL Server Always On Availability Groups to replicate the database. Provides high availability if one SQL Server instance fails. For more information, see [More information...](../blueprints/virtual-machines-windows/n-tier.md) +* **Back up the database**. If you are already using [Azure Backup](https://azure.microsoft.com/documentation/services/backup/) to back up your VMs, consider using [Azure Backup for SQL Server workloads using DPM](/azure/backup/backup-azure-backup-sql/). With this approach, there is one backup administrator role for the organization and a unified recovery procedure for VMs and SQL Server. Otherwise, use [SQL Server Managed Backup to Microsoft Azure](https://msdn.microsoft.com/library/dn449496.aspx). + +### Traffic Manager +* **Perform manual failback.** After a Traffic Manager failover, perform manual failback, rather than automatically failing back. Before failing back, verify that all application subsystems are healthy. Otherwise, you can create a situation where the application flips back and forth between data centers. For more information, see [Run VMs in multiple regions for high availability](../blueprints/virtual-machines-windows/multi-region-application.md). +* **Create a health probe endpoint**. Create a custom endpoint that reports on the overall health of the application. This enables Traffic Manager to fail over if any critical path fails, not just the front end. The endpoint should return an HTTP error code if any critical dependency is unhealthy or unreachable. Don't report errors for non-critical services, however. Otherwise, the health probe might trigger failover when it's not needed, creating false positives. For more information, see [Traffic Manager endpoint monitoring and failover](/azure/traffic-manager/traffic-manager-monitoring/). + +### Virtual Machines +* **Avoid running a production workload on a single VM.** A single VM deployment is not resilient to planned or unplanned maintenance. Instead, put multiple VMs in an availability set or [VM scale set](/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-overview/), with a load balancer in front. +* **Specify the availability set when you provision the VM.** Currently, there is no way to add a Resource Manager VM to an availability set after the VM is provisioned. When you add a new VM to an existing availability set, make sure to create a NIC for the VM, and add the NIC to the back-end address pool on the load balancer. Otherwise, the load balancer won't route network traffic to that VM. +* **Put each application tier into a separate Availability Set.** In an N-tier application, don't put VMs from different tiers into the same availability set. VMs in an availability set are placed across fault domains (FDs) and update domains (UD). However, to get the redundancy benefit of FDs and UDs, every VM in the availability set must be able to handle the same client requests. +* **Choose the right VM size based on performance requirements.** When moving an existing workload to Azure, start with the VM size that's the closest match to your on-premises servers. Then measure the performance of your actual workload with respect to CPU, memory, and disk IOPS, and adjust the size if needed. This helps to ensure the application behaves as expected in a cloud environment. Also, if you need multiple NICs, be aware of the NIC limit for each size. +* **Use premium storage for VHDs.** Azure Premium Storage provides high-performance, low-latency disk support. For more information, see [Premium Storage: High-Performance Storage for Azure Virtual Machine Workloads](/azure/storage/storage-premium-storage/) Choose a VM size that supports premium storage. +* **Create a separate storage account for each VM.** Place the VHDs for one VM into a separate storage account. This helps to avoid hitting the IOPS limits for storage accounts. For more information, see [Azure Storage Scalability and Performance Targets](/azure/storage/storage-scalability-targets/). However, if you are deploying a large number of VMs, be aware of the per-subscription limit for storage accounts. See [Storage limits](/azure/azure-subscription-service-limits/#storage-limits). +* **Create a separate storage account for diagnostic logs**. Don't write diagnostic logs to the same storage account as the VHDs, to avoid having the diagnostic logging affect the IOPS for the VM disks. A standard locally redundant storage (LRS) account is sufficient for diagnostic logs. +* **Install applications on a data disk, not the OS disk.** Otherwise, you may reach the disk size limit. +* **Use Azure Backup to back up VMs.** Backups protect against accidental data loss. For more information, see [Protect Azure VMs with a recovery services vault](/azure/backup/backup-azure-vms-first-look-arm/). +* **Enable diagnostic logs**, including basic health metrics, infrastructure logs, and [boot diagnostics][boot-diagnostics]. Boot diagnostics can help you diagnose a boot failure if your VM gets into a non-bootable state. For more information, see [Overview of Azure Diagnostic Logs][diagnostics-logs]. +* **Use the AzureLogCollector extension**. (Windows VMs only.) This extension aggregates Azure platform logs and uploads them to Azure storage, without the operator remotely logging into the VM. For more information, see [AzureLogCollector Extension](/azure/virtual-machines/virtual-machines-windows-log-collector-extension/?toc=%2fazure%2fvirtual-machines%2fwindows%2ftoc.json). + +### Virtual Network +* **To whitelist or block public IP addresses, add an NSG to the subnet.** Block access from malicious users, or allow access only from users who have privilege to access the application. +* **Create a custom health probe.** Load Balancer Health Probes can test either HTTP or TCP. If a VM runs an HTTP server, the HTTP probe is a better indicator of health status than a TCP probe. For an HTTP probe, use a custom endpoint that reports the overall health of the application, including all critical dependencies. For more information, see [Azure Load Balancer overview](/azure/load-balancer/load-balancer-overview/). +* **Don't block the health probe.** The Load Balancer Health probe is sent from a known IP address, 168.63.129.16. Don't block traffic to or from this IP in any firewall policies or network security group (NSG) rules. Blocking the health probe would cause the load balancer to remove the VM from rotation. +* **Enable Load Balancer logging.** The logs show how many VMs on the back-end are not receiving network traffic due to failed probe responses. For more information, see [Log analytics for Azure Load Balancer](/azure/load-balancer/load-balancer-monitor-log/). + + +[app-service-autoscale]: /azure/monitoring-and-diagnostics/insights-how-to-scale/ +[asynchronous-c-sharp]:https://msdn.microsoft.com/library/mt674882.aspx +[availability-sets]:/azure/virtual-machines/virtual-machines-windows-manage-availability/ +[azure-backup]: https://azure.microsoft.com/documentation/services/backup/ +[boot-diagnostics]: https://azure.microsoft.com/blog/boot-diagnostics-for-virtual-machines-v2/ +[circuit-breaker]: https://msdn.microsoft.com/library/dn589784.aspx +[cloud-service-autoscale]: /azure/cloud-services/cloud-services-how-to-scale/ +[diagnostics-logs]: /azure/monitoring-and-diagnostics/monitoring-overview-of-diagnostic-logs/ +[fma]: ../resiliency/failure-mode-analysis.md +[resilient-deployment]: ../resiliency/index.md#resilient-deployment +[load-balancer]: /azure/load-balancer/load-balancer-overview/ +[monitoring-and-diagnostics-guidance]: ../best-practices/monitoring.md +[resource-manager]: /azure/azure-resource-manager/resource-group-overview/ +[retry-pattern]: https://msdn.microsoft.com/library/dn589788.aspx +[retry-service-guidance]: ../best-practices/retry-service-specific.md +[search-optimization]: /azure/search/search-performance-optimization/ +[sql-backup]: /azure/sql-database/sql-database-automated-backups/ +[sql-restore]: /azure/sql-database/sql-database-recovery-using-backups/ +[traffic-manager]: /azure/traffic-manager/traffic-manager-overview/ +[traffic-manager-routing]: /azure/traffic-manager/traffic-manager-routing-methods/ +[vmss-autoscale]: /azure/virtual-machine-scale-sets/virtual-machine-scale-sets-autoscale-overview/ diff --git a/docs/checklist/scalability.md b/docs/checklist/scalability.md new file mode 100644 index 00000000000..b84142f91ef --- /dev/null +++ b/docs/checklist/scalability.md @@ -0,0 +1,71 @@ +--- +title: Scalability checklist +description: Scalability checklist guidance for design concerns for Azure Autoscaling. +services: '' +documentationcenter: na +author: dragon119 +manager: christb +editor: '' +tags: '' + +ms.assetid: e505c665-a095-4013-a4b6-ccd79bcb2e1e +ms.service: best-practice +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 07/13/2016 +ms.author: masashin + +--- +# Scalability checklist +[!INCLUDE [header](../_includes/header.md)] + +## Service design +* **Partition the workload**. Design parts of the process to be discrete and decomposable. Minimize the size of each part, while following the usual rules for separation of concerns and the single responsibility principle. This allows the component parts to be distributed in a way that maximizes use of each compute unit (such as a role or database server). It also makes it easier to scale the application by adding instances of specific resources. For more information, see [Compute Partitioning Guidance](https://msdn.microsoft.com/library/dn589773.aspx). +* **Design for scaling**. Scaling allows applications to react to variable load by increasing and decreasing the number of instances of roles, queues, and other services they use. However, the application must be designed with this in mind. For example, the application and the services it uses must be stateless, to allow requests to be routed to any instance. This also prevents the addition or removal of specific instances from adversely impacting current users. You should also implement configuration or auto-detection of instances as they are added and removed, so that code in the application can perform the necessary routing. For example, a web application might use a set of queues in a round-robin approach to route requests to background services running in worker roles. The web application must be able to detect changes in the number of queues, to successfully route requests and balance the load on the application. +* **Scale as a unit**. Plan for additional resources to accommodate growth. For each resource, know the upper scaling limits, and use sharding or decomposition to go beyond these limits. Determine the scale units for the system in terms of well-defined sets of resources. This makes applying scale-out operations easier, and less prone to negative impact on the application through limitations imposed by lack of resources in some part of the overall system. For example, adding x number of web and worker roles might require y number of additional queues and z number of storage accounts to handle the additional workload generated by the roles. So a scale unit could consist of x web and worker roles, *y* queues, and *z* storage accounts. Design the application so that it's easily scaled by adding one or more scale units. +* **Avoid client affinity**. Where possible, ensure that the application does not require affinity. Requests can thus be routed to any instance, and the number of instances is irrelevant. This also avoids the overhead of storing, retrieving, and maintaining state information for each user. +* **Take advantage of platform autoscaling features**. Where the hosting platform supports an autoscaling capability, such as Azure Autoscale, prefer it to custom or third-party mechanisms unless the built-in mechanism can't fulfill your requirements. Use scheduled scaling rules where possible to ensure resources are available without a start-up delay, but add reactive autoscaling to the rules where appropriate to cope with unexpected changes in demand. You can use the autoscaling operations in the Service Management API to adjust autoscaling, and to add custom counters to rules. For more information, see [Auto-scaling guidance](../best-practices/auto-scaling.md). +* **Offload intensive CPU/IO tasks as background tasks**. If a request to a service is expected to take a long time to run or absorb considerable resources, offload the processing for this request to a separate task. Use worker roles or background jobs (depending on the hosting platform) to execute these tasks. This strategy enables the service to continue receiving further requests and remain responsive. For more information, see [Background jobs guidance](../best-practices/background-jobs.md). +* **Distribute the workload for background tasks**. Where there are many background tasks, or the tasks require considerable time or resources, spread the work across multiple compute units (such as worker roles or background jobs). For one possible solution, see the [Competing Consumers Pattern](https://msdn.microsoft.com/library/dn568101.aspx). +* **Consider moving towards a *shared-nothing* architecture**. A shared-nothing architecture uses independent, self-sufficient nodes that have no single point of contention (such as shared services or storage). In theory, such a system can scale almost indefinitely. While a fully shared-nothing approach is generally not practical for most applications, it may provide opportunities to design for better scalability. For example, avoiding the use of server-side session state, client affinity, and data partitioning are good examples of moving towards a shared-nothing architecture. + +## Data management +* **Use data partitioning**. Divide the data across multiple databases and database servers, or design the application to use data storage services that can provide this partitioning transparently (examples include Azure SQL Database Elastic Database, and Azure Table storage). This approach can help to maximize performance and allow easier scaling. There are different partitioning techniques, such as horizontal, vertical, and functional. You can use a combination of these to achieve maximum benefit from increased query performance, simpler scalability, more flexible management, better availability, and to match the type of store to the data it will hold. Also, consider using different types of data store for different types of data, choosing the types based on how well they are optimized for the specific type of data. This may include using table storage, a document database, or a column-family data store, instead of, or as well as, a relational database. For more information, see [Data partitioning guidance](../best-practices/data-partitioning.md). +* **Design for eventual consistency**. Eventual consistency improves scalability by reducing or removing the time needed to synchronize related data partitioned across multiple stores. The cost is that data is not always consistent when it is read, and some write operations may cause conflicts. Eventual consistency is ideal for situations where the same data is read frequently but written infrequently. For more information, see the [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). +* **Reduce chatty interactions between components and services**. Avoid designing interactions in which an application is required to make multiple calls to a service (each of which returns a small amount of data), rather than a single call that can return all of the data. Where possible, combine several related operations into a single request when the call is to a service or component that has noticeable latency. This makes it easier to monitor performance and optimize complex operations. For example, use stored procedures in databases to encapsulate complex logic, and reduce the number of round trips and resource locking. +* **Use queues to level the load for high velocity data writes**. Surges in demand for a service can overwhelm that service and cause escalating failures. To prevent this, consider implementing the [Queue-Based Load Leveling Pattern](https://msdn.microsoft.com/library/dn589783.aspx). Use a queue that acts as a buffer between a task and a service that it invokes. This can smooth intermittent heavy loads that may otherwise cause the service to fail or the task to time out. +* **Minimize the load on the data store**. The data store is commonly a processing bottleneck, a costly resource, and often not easy to scale out. Where possible, remove logic (such as processing XML documents or JSON objects) from the data store, and perform processing within the application. For example, instead of passing XML to the database (other than as an opaque string for storage), serialize or deserialize the XML within the application layer and pass it in a form that is native to the data store. It's typically much easier to scale out the application than the data store, so you should attempt to do as much of the compute-intensive processing as possible within the application. +* **Minimize the volume of data retrieved**. Retrieve only the data you require by specifying columns and using criteria to select rows. Make use of table value parameters and the appropriate isolation level. Use mechanisms like entity tags to avoid retrieving data unnecessarily. +* **Aggressively use caching**. Use caching wherever possible to reduce the load on resources and services that generate or deliver data. Caching is typically suited to data that is relatively static, or that requires considerable processing to obtain. Caching should occur at all levels where appropriate in each layer of the application, including data access and user interface generation. For more information, see the [Caching Guidance](../best-practices/caching.md). +* **Handle data growth and retention**. The amount of data stored by an application grows over time. This growth increases storage costs, and increases latency when accessing the data — which affects application throughput and performance. It may be possible to periodically archive some of the old data that is no longer accessed, or move data that is rarely accessed into long-term storage that is more cost efficient, even if the access latency is higher. +* **Optimize Data Transfer Objects (DTOs) using an efficient binary format**. DTOs are passed between the layers of an application many times. Minimizing the size reduces the load on resources and the network. However, balance the savings with the overhead of converting the data to the required format in each location where it is used. Adopt a format that has the maximum interoperability to enable easy reuse of a component. +* **Set cache control**. Design and configure the application to use output caching or fragment caching where possible, to minimize processing load. +* **Enable client side caching**. Web applications should enable cache settings on the content that can be cached. This is commonly disabled by default. Configure the server to deliver the appropriate cache control headers to enable caching of content on proxy servers and clients. +* **Use Azure blob storage and the Azure Content Delivery Network to reduce the load on the application**. Consider storing static or relatively static public content, such as images, resources, scripts, and style sheets, in blob storage. This approach relieves the application of the load caused by dynamically generating this content for each request. Additionally, consider using the Content Delivery Network to cache this content and deliver it to clients. Using the Content Delivery Network can improve performance at the client because the content is delivered from the geographically closest datacenter that contains a Content Delivery Network cache. For more information, see the [Content Delivery Network Guidance](../best-practices/cdn.md). +* **Optimize and tune SQL queries and indexes**. Some T-SQL statements or constructs may have an impact on performance that can be reduced by optimizing the code in a stored procedure. For example, avoid converting **datetime** types to a **varchar** before comparing with a **datetime** literal value. Use date/time comparison functions instead. Lack of appropriate indexes can also slow query execution. If you use an object/relational mapping framework, understand how it works and how it may affect performance of the data access layer. For more information, see [Query Tuning](https://technet.microsoft.com/library/ms176005.aspx). +* **Consider de-normalizing data**. Data normalization helps to avoid duplication and inconsistency. However, maintaining multiple indexes, checking for referential integrity, performing multiple accesses to small chunks of data, and joining tables to reassemble the data imposes an overhead that can affect performance. Consider if some additional storage volume and duplication is acceptable in order to reduce the load on the data store. Also, consider if the application itself (which is typically easier to scale) can be relied upon to take over tasks such as managing referential integrity in order to reduce the load on the data store. For more information, see [Data partitioning guidance](../best-practices/data-partitioning.md). + +## Service implementation +* **Use asynchronous calls**. Use asynchronous code wherever possible when accessing resources or services that may be limited by I/O or network bandwidth, or that have a noticeable latency, in order to avoid locking the calling thread. To implement asynchronous operations, use the [Task-based Asynchronous Pattern (TAP)](https://msdn.microsoft.com/library/hh873175.aspx). +* **Avoid locking resources, and use an optimistic approach instead**. Never lock access to resources such as storage or other services that have noticeable latency, because this is a primary cause of poor performance. Always use optimistic approaches to managing concurrent operations, such as writing to storage. Use features of the storage layer to manage conflicts. In distributed applications, data may be only eventually consistent. +* **Compress highly compressible data over high latency, low bandwidth networks**. In the majority of cases in a web application, the largest volume of data generated by the application and passed over the network is HTTP responses to client requests. HTTP compression can reduce this considerably, especially for static content. This can reduce cost as well as reducing the load on the network, though compressing dynamic content does apply a fractionally higher load on the server. In other, more generalized environments, data compression can reduce the volume of data transmitted and minimize transfer time and costs, but the compression and decompression processes incur overhead. As such, compression should only be used when there is a demonstrable gain in performance. Other serialization methods, such as JSON or binary encodings, may reduce the payload size while having less impact on performance, whereas XML is likely to increase it. +* **Minimize the time that connections and resources are in use**. Maintain connections and resources only for as long as you need to use them. For example, open connections as late as possible, and allow them to be returned to the connection pool as soon as possible. Acquire resources as late as possible, and dispose of them as soon as possible. +* **Minimize the number of connections required**. Service connections absorb resources. Limit the number that are required and ensure that existing connections are reused whenever possible. For example, after performing authentication, use impersonation where appropriate to run code as a specific identity. This can help to make best use of the connection pool by reusing connections. + + > [!NOTE] + > : APIs for some services automatically reuse connections, provided service-specific guidelines are followed. It's important that you understand the conditions that enable connection reuse for each service that your application uses. + > + > +* **Send requests in batches to optimize network use**. For example, send and read messages in batches when accessing a queue, and perform multiple reads or writes as a batch when accessing storage or a cache. This can help to maximize efficiency of the services and data stores by reducing the number of calls across the network. +* **Avoid a requirement to store server-side session state** where possible. Server-side session state management typically requires client affinity (that is, routing each request to the same server instance), which affects the ability of the system to scale. Ideally, you should design clients to be stateless with respect to the servers that they use. However, if the application must maintain session state, store sensitive data or large volumes of per-client data in a distributed server-side cache that all instances of the application can access. +* **Optimize table storage schemas**. When using table stores that require the table and column names to be passed and processed with every query, such as Azure table storage, consider using shorter names to reduce this overhead. However, do not sacrifice readability or manageability by using overly compact names. +* **Use the Task Parallel Library (TPL) to perform asynchronous operations**. The TPL makes it easy to write asynchronous code that performs I/O-bound operations. Use *ConfigureAwait(false)* wherever possible to eliminate the dependency of a continuation on a specific synchronization context. This reduces the chances of thread-deadlock occurring. +* **Create resource dependencies during deployment or at application startup**. Avoid repeated calls to methods that test the existence of a resource and then create the resource if it does not exist. (Methods such as *CloudTable.CreateIfNotExists* and *CloudQueue.CreateIfNotExists* in the Azure Storage Client Library follow this pattern). These methods can impose considerable overhead if they are invoked before each access to a storage table or storage queue. Instead: + * Create the required resources when the application is deployed, or when it first starts (a single call to *CreateIfNotExists* for each resource in the startup code for a web or worker role is acceptable). However, be sure to handle exceptions that may arise if your code attempts to access a resource that doesn't exist. In these situations, you should log the exception, and possibly alert an operator that a resource is missing. + * Under some circumstances, it may be appropriate to create the missing resource as part of the exception handling code. But you should adopt this approach with caution as the non-existence of the resource might be indicative of a programming error (a misspelled resource name for example), or some other infrastructure-level issue. +* **Use lightweight frameworks**. Carefully choose the APIs and frameworks you use to minimize resource usage, execution time, and overall load on the application. For example, using Web API to handle service requests can reduce the application footprint and increase execution speed, but it may not be suitable for advanced scenarios where the additional capabilities of Windows Communication Foundation are required. +* **Consider minimizing the number of service accounts**. For example, use a specific account to access resources or services that impose a limit on connections, or perform better where fewer connections are maintained. This approach is common for services such as databases, but it can affect the ability to accurately audit operations due to the impersonation of the original user. +* **Carry out performance profiling and load testing** during development, as part of test routines, and before final release to ensure the application performs and scales as required. This testing should occur on the same type of hardware as the production platform, and with the same types and quantities of data and user load as it will encounter in production. For more information, see [Testing the performance of a cloud service](/azure/vs-azure-tools-performance-profiling-cloud-services/). + diff --git a/docs/checklist/toc.md b/docs/checklist/toc.md new file mode 100644 index 00000000000..19cec9ebb22 --- /dev/null +++ b/docs/checklist/toc.md @@ -0,0 +1,4 @@ +# Design Review Checklists +## [Availability](./availability.md) +## [Resiliency](./resiliency.md) +## [Scalability](./scalability.md) \ No newline at end of file diff --git a/docs/docfx.json b/docs/docfx.json new file mode 100644 index 00000000000..ced7d61b25f --- /dev/null +++ b/docs/docfx.json @@ -0,0 +1,64 @@ +{ + "build": { + "content": [ + { + "src": ".", + "dest": ".", + "files": [ + "**/*.md" + ], + "exclude": [ + "**/obj/**", + "**/*.liquid.md", + "**/_css/**", + "**/_includes/**", + "**/_bread/**", + "**/_themes/**" + ] + }, + { + "src": "_bread", + "dest": "bread", + "files": [ + "**/*.yml" + ] + } + ], + "resource": [ + { + "src": ".", + "dest": ".", + "files": [ + "**/*.css", + "**/*.png", + "**/*.jpg", + "**/*.jpeg", + "**/*.gif", + "**/*.svg" + ], + "exclude": [ + "**/obj/**", + "**/_themes/**" + ] + } + ], + "overwrite": [], + "externalReference": [], + "globalMetadata": { + "breadcrumb_path": "/azure/architecture/bread/toc.json", + "brand": "azure", + "searchScope": [ + "Azure" + ] + }, + "fileMetadata": { + "tocRel":{ + "blueprints/**.md":"../toc.json" + } + }, + "template": [ + "docs.html" + ], + "dest": "azure" + } +} \ No newline at end of file diff --git a/docs/elasticsearch/automated-performance-tests.md b/docs/elasticsearch/automated-performance-tests.md new file mode 100644 index 00000000000..0739b42d123 --- /dev/null +++ b/docs/elasticsearch/automated-performance-tests.md @@ -0,0 +1,114 @@ +--- +title: Run the automated Elasticsearch performance tests +description: Description of how you can run the performance tests in your own environment. +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.prev: automated-resilience-tests +ms.assetid: a83b6752-130e-4a3b-a67a-46b03d57ce74 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +pnp.series.next: automated-resilience-tests +--- +# Run automated performance tests +[!INCLUDE [header](../_includes/header.md)] + +The documents [Tuning data ingestion performance for Elasticsearch on Azure] and [Tuning data aggregation and query performance for Elasticsearch on Azure] describe a number of performance tests that were run against a sample Elasticsearch cluster. + +These tests were scripted to enable them to be run in an automated manner. This document describes how you can repeat the tests in your own environment. + +## Prerequisites +The automated tests require the following items: + +* An Elasticsearch cluster. +* A JMeter environment setup as described by the document [Creating a performance testing environment for Elasticsearch on Azure]. +* [Python 3.5.1](https://www.python.org/downloads/release/python-351/) installed on the JMeter master VM. + +## How the tests work +The tests are run using JMeter. A JMeter master server loads a test plan and passes it to a set of JMeter subordinate servers that actually run the tests. The JMeter master server coordinates the JMeter subordinate servers and accumulates the results. + +The following test plans are provided: + +* [elasticsearchautotestplan3nodes.jmx](https://github.com/mspnp/azure-guidance/blob/master/ingestion-and-query-tests/templates/elasticsearchautotestplan3nodes.jmx). Runs the ingestion test over a 3-node cluster. +* [elasticsearchautotestplan6nodes.jmx](https://github.com/mspnp/azure-guidance/blob/master/ingestion-and-query-tests/templates/elasticsearchautotestplan6nodes.jmx). Runs the ingestion test over a 6-node cluster. +* [elasticsearchautotestplan6qnodes.jmx](https://github.com/mspnp/azure-guidance/blob/master/ingestion-and-query-tests/templates/elasticsearchautotestplan6qnodes.jmx). Runs the ingestion and query test over a 6-node cluster. +* [elasticsearchautotestplan6nodesqueryonly.jmx](https://github.com/mspnp/azure-guidance/blob/master/ingestion-and-query-tests/templates/elasticsearchautotestplan6nodesqueryonly.jmx). Runs the query-only test over a 6-node cluster. + +You can use these test plans as a basis for your own scenarios if you need fewer or more nodes. + +The test plans use a JUnit request sampler to generate and upload the test data. The JMeter test plan creates and runs this sampler, and monitors each of the Elasticsearch nodes for performance data. + +## Building and deploying the JUnit JAR and dependencies +Before running the performance tests you should download, compile, and deploy the JUnit tests located under the performance/junitcode folder. These tests are referenced by the JMeter test plan. For more information, see the procedure "Importing an existing JUnit test project into Eclipse" in the document [Deploying a JMeter JUnit sampler for testing Elasticsearch performance]. + +There are two versions of the JUnit tests: + +* [Elasticsearch1.73](https://github.com/mspnp/azure-guidance/tree/master/ingestion-and-query-tests/junitcode/elasticsearch1.73). Use this code for performing the ingestion tests. These tests use Elasticsearch 1.73. +* [Elasticsearch2](https://github.com/mspnp/azure-guidance/tree/master/ingestion-and-query-tests/junitcode/elasticsearch2). Use this code for performing the query tests. These tests use Elasticsearch 2.1 and later. + +Copy the appropriate Java archive (JAR) file along with the rest of the dependencies to your JMeter machines. The process is described in [Deploying a JMeter JUnit sampler for testing Elasticsearch performance][Deploying a JMeter JUnit sampler for testing Elasticsearch performance]. + +> **Important** After deploying a JUnit test, use JMeter to load and configure the test plans that reference this JUnit test and ensure that the BulkInsertLarge thread group references the correct JAR file, JUnit class name, and test method: +> +> ![JUnit Request UI](./images/performance-tests-image1.png) +> +> Save the updated test plans before running the tests. +> +> + +## Creating the test indexes +Each test performs ingestion and/or queries against a single index specified when the test is run. You should create the index using the schemas described in the appendices to the documents [Tuning data ingestion performance for Elasticsearch on Azure] and [Tuning data aggregation and query performance for Elasticsearch on Azure] and configure them according to your test scenario (doc values enabled/disabled, multiple replicas, and so on.) Note that the test plans assume that the index contains a single type named *ctip*. + +## Configuring the test script parameters +Copy the following test script parameter files to the JMeter server machine: + +* [run.properties](https://github.com/mspnp/azure-guidance/blob/master/ingestion-and-query-tests/run.properties). This file specifies the number of JMeter test threads to use, the duration of the test (in seconds), the IP address of a node (or a load balancer) in the Elasticsearch cluster, and the name of the cluster: + + ```ini + nthreads=3 + duration=300 + elasticip= + clustername= + ``` + + Edit this file and specify the appropriate values for your test and cluster. +* [query-config-win.ini](https://github.com/mspnp/azure-guidance/blob/master/ingestion-and-query-tests/query-config-win.ini) and [query-config-nix.ini](https://github.com/mspnp/azure-guidance/blob/master/ingestion-and-query-tests/query-config-nix.ini). These two files contain the same information; the *win* file is formatted for Windows filenames and paths, and the *nix* file is formatted for Linux filenames and paths: + + ```ini + [DEFAULT] + debug=true #if true shows console logs. + + [RUN] + pathreports=C:\Users\administrator1\jmeter\test-results\ #path where tests results are saved. + jmx=C:\Users\administrator1\testplan.jmx #path to the JMeter test plan. + machines=10.0.0.1,10.0.0.2,10.0.0.3 #IPs of the Elasticsearch data nodes separated by commas. + reports=aggr,err,tps,waitio,cpu,network,disk,response,view #Name of the reports separated by commas. + tests=idx1,idx2 #Elasticsearch index(es) name(s) to test, comma delimited if more than one. + properties=run.properties #Name of the properties file. + ``` + + Edit this file to specify the locations of the test results, the name of the JMeter test plan to run, the IP addresses of the Elasticsearch data nodes you are collecting performance metrics from, the reports containing the raw performance data that will be generated, and the name (or names comma delimited) of the index(es) under test, if more than one, tests will run one after another. If the run.properties file is located in a different folder or directory, specify the full path to this file. + +## Running the tests +* Copy the file [query-test.py](https://github.com/mspnp/azure-guidance/blob/master/ingestion-and-query-tests/query-test.py) to the JMeter server machine, in the same folder as the run.properties and query-config-win.ini (query-config-nix.ini) files. +* Ensure that jmeter.bat (Windows) or jmeter.sh (Linux) are on the executable path for your environment. +* Run the query-test.py script from the command line to perform the tests: + + ```cmd + py query-test.py + ``` +* When the test has completed, the results are stored as the set of comma-separated values (CSV) files specified in the query-config-win.ini (query-config-nix.ini) file . You can use Excel to analyze and graph this data. + +[Tuning Data Ingestion Performance for Elasticsearch on Azure]: data-ingestion-performance.md +[Tuning Data Aggregation and Query Performance for Elasticsearch on Azure]: data-aggregation-and-query-performance.md +[Creating a Performance Testing Environment for Elasticsearch on Azure]: performance-testing-environment.md +[Deploying a JMeter JUnit Sampler for Testing Elasticsearch Performance]: jmeter-junit-sampler.md diff --git a/docs/elasticsearch/automated-resilience-tests.md b/docs/elasticsearch/automated-resilience-tests.md new file mode 100644 index 00000000000..f5dd1ea2c3f --- /dev/null +++ b/docs/elasticsearch/automated-resilience-tests.md @@ -0,0 +1,167 @@ +--- +title: Run the automated Elasticsearch resiliency tests +description: Description of how you can run the resiliency tests in your own environment. +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.prev: deploy-jmeter-junit-sampler +pnp.series.next: automated-performance-tests +ms.assetid: 5ee97393-0680-4b60-9674-d3708014f4f8 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +--- +# Run automated resiliency tests +[!INCLUDE [header](../_includes/header.md)] + +In [Configuring resilience and recovery on Elasticsearch on Azure][elasticsearch-resilience-recovery], we described a series of tests that were performed against a sample Elasticsearch cluster to determine how well the system responded to some common forms of failure and how well it recovered. The tests were scripted to enable them to be run in an automated manner. This document describes how you can repeat the tests in your own environment. + +The following scenarios were tested: + +* **Node failure and restart with no data loss**. A data node is stopped and restarted after 5 minutes. + Elasticsearch was configured not to reallocate missing shards in this interval, so no additional I/O + is incurred in moving shards around. When the node restarts, the recovery process brings the shards + on that node back up to date. +* **Node failure with catastrophic data loss**. A data node is stopped and the data that it holds + is erased to simulate catastrophic disk failure. The node is then restarted (after 5 minutes), + effectively acting as a replacement for the original node. The recovery process requires + rebuilding the missing data for this node, and may involve relocating shards held on other nodes. +* **Node failure and restart with no data loss, but with shard reallocation**. A data node is + stopped and the shards that it holds are reallocated to other nodes. The node is then restarted + and more reallocation occurs to rebalance the cluster. +* **Rolling updates**. Each node in the cluster is stopped and restarted after a short interval + to simulate machines being rebooted after a software update. Only one node is stopped at any one time. + Shards are not reallocated while a node is down. + +## Prerequisites +The automated tests require the following items: + +* An Elasticsearch cluster. +* A JMeter environment setup as described by the [performance testing guidance]. +* The following additions installed on the JMeter master VM only. + + * Java Runtime 7. + * Nodejs 4.x.x or later. + * The Git command line tools. + +## How the scripts work +The test scripts are intended to run on the JMeter Master VM. When you select a test to run, the scripts +perform the following sequence of operations: + +1. Start a JMeter test plan passing the parameters that you have specified. +2. Copy a script that performs the operations required by the test to a specified VM in the cluster. This can be any VM that has a public IP address, or the *Jumpbox* VM if you have built the cluster using the [Azure Elasticsearch quickstart template](https://github.com/Azure/azure-quickstart-templates/tree/master/elasticsearch). +3. Run the script on the VM (or Jumpbox). + +The following image shows the structure of the test environment and Elasticsearch cluster. Note that the test scripts use secure shell (SSH) to connect to each node in the cluster to perform various Elasticsearch operations such as stopping or restarting a node. + +![Elasticsearch cluster and test environment](./images/resilience-testing1.png) + +## Setting up the JMeter tests +Before running the resilience tests you should compile and deploy the JUnit tests located in the resiliency/jmeter/tests folder. These tests are referenced by the JMeter test plan. For more information, see the procedure "Importing an existing JUnit test project into Eclipse" in [Deploying a JMeter JUnit sampler for testing Elasticsearch performance][Deploying a JMeter JUnit sampler for testing Elasticsearch performance]. + +There are two versions of the JUnit tests held in the following folders: + +* **Elasticsearch17.** The project in this folder generates the file Elasticsearch17.jar. Use this + JAR for testing Elasticsearch versions 1.7.x +* **Elasticsearch20**. The project in this folder generates the file Elasticsearch20.jar. Use this + JAR for testing Elasticsearch version 2.0.0 and later + +Copy the appropriate JAR file along with the rest of the dependencies to your JMeter machines. The +process is described by the procedure "Deploying a JUnit test to JMeter" in [Deploying a JMeter JUnit sampler for testing Elasticsearch performance]. + +## Configuring VM security for each node +The test scripts require an authentication certificate be installed on each Elasticsearch node +in the cluster. This enables the scripts to run automatically without prompting for a username or +password as they connect to the various VMs. + +Start by logging in to one of the nodes in the Elasticsearch cluster (or the Jumpbox VM) and then +run the following command to generate an authentication key: + +```Shell +ssh-keygen -t rsa +``` + +While connected to the Elasticsearch node (or Jumpbox), run the following commands for every node in +he Elasticsearch cluster. Replace `` with the name of a valid user on each VM, and +replace `` with the DNS name or IP address of the VM hosting the Elasticsearch node. +Note that you will be prompted for the password of the user when running these commands. +For more information see [SSH login without password](http://www.linuxproblem.org/art_9.html): + +```Shell +ssh @ mkdir -p .ssh ( +cat .ssh/id\_rsa.pub | ssh *@ 'cat >> .ssh/authorized\_keys' +``` + +## Downloading and configuring the test scripts +The test scripts are provided in a Git repository. Use the following procedure to download and +configure the scripts. + +On the JMeter master machine where you will run the tests, open a Git desktop window (Git Bash) +and clone the repository that contains the scripts, as follows: + +```Shell +git clone https://github.com/mspnp/azure-guidance.git +``` + +Move to the resiliency-tests folder and run the following command to install the dependencies required +to run the tests: + +```Shell +npm install +``` + +If the JMeter master is running on Windows, download [Plink](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html), which is a command-line interface to the PuTTY Telnet client. Copy the Plink executable to the resiliency-tests/lib folder. + +If the JMeter master is running on Linux, you don’t need to download Plink but you will need to +configure password-less SSH between the JMeter master and the Elasticsearch node or Jumpbox +you used by following the steps outlined in the procedure "Configuring VM security for each node." + +Edit the following configuration parameters in the `config.js` file to match your test +environment and Elasticsearch cluster. These parameters are common to all of the tests: + +| Name | Description | Default Value | +| --- | --- | --- | +| `jmeterPath` |Local path where JMeter is located. |`C:/apache-jmeter-2.13` | +| `resultsPath` |Relative directory where the script dumps the result. |`results` | +| `verbose` |Indicates whether the script outputs in verbose mode or not. |`true` | +| `remote` |Indicates whether the JMeter tests run locally or on the remote servers. |`true` | +| `cluster.clusterName` |The name of the Elasticsearch cluster. |`elasticsearch` | +| `cluster.jumpboxIp` |The IP address of the Jumpbox machine. |- | +| `cluster.username` |The admin user you created while deploying the cluster. |- | +| `cluster.password` |The password for the admin user. |- | +| `cluster.loadBalancer.ip` |The IP address of the Elasticsearch load balancer. |- | +| `cluster.loadBalancer.url` |Base URL of the load balancer. |- | + +## Running the tests +Move to the resiliency-tests folder and run the following command: + +```Shell +node app.js +``` + +The following menu should appear: + +![Test menu](./images/resilience-testing2.png) + +Enter the number of the scenario you want to run: `11`, `12`, `13` or `21`. + +Once you select a scenario, the test will run automatically. The results are stored as a set of comma-separated value (CSV) files in a folder created under the results directory. Each run has its own results folder. +You can use Excel to analyze and graph this data. + +[Running Elasticsearch on Azure]: index.md +[Tuning Data Ingestion Performance for Elasticsearch on Azure]: data-ingestion-performance.md +[performance testing guidance]: performance-testing-environment.md +[JMeter guidance]: implementing-jmeter.md +[Considerations for JMeter]: jmeter-junit-sampler.md +[Query aggregation and performance]: query-aggregation-performance.md +[elasticsearch-resilience-recovery]: resilience-and-recovery.md +[Resilience and Recovery Testing]: automated-resilience-tests.md +[Deploying a JMeter JUnit Sampler for Testing Elasticsearch Performance]: jmeter-junit-sampler.md diff --git a/docs/elasticsearch/data-aggregation-and-query-performance.md b/docs/elasticsearch/data-aggregation-and-query-performance.md new file mode 100644 index 00000000000..56e35b3f28f --- /dev/null +++ b/docs/elasticsearch/data-aggregation-and-query-performance.md @@ -0,0 +1,1273 @@ +--- +title: Tune data aggregation and query performance with Elasticsearch +description: >- + A summary of considerations when optimizing query and search performance for + Elasticsearch. +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.prev: data-ingestion-performance +pnp.series.next: resilience-and-recovery +ms.assetid: da54e75e-0b08-45b5-a171-f67d88b77c5b +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +--- +# Tune data aggregation and query performance +[!INCLUDE [header](../_includes/header.md)] + +A primary reason for using Elasticsearch is to support searches through data. Users should be able to quickly locate the information they are looking for. Additionally, the system must enable users to ask questions of the data, seek correlations, and come to conclusions that can drive business decisions. This processing is what differentiates data from information. + +This document summarizes options that you can consider when determining the best way to optimize your system for query and search performance. + +All performance recommendations depend largely on the scenarios that apply to your situation, the volume of data that you are indexing, and the rate at which applications and users query your data. You should carefully test the results of any change in configuration or indexing structure using your own data and workloads to assess the benefits to your specific scenarios. To this end, this document also describes a number of benchmarks that were performed for one specific scenario implemented using different configurations. You can adapt the approach taken to assess the performance of your own systems. The details of these tests are described in the [appendix](#appendix-the-query-and-aggregation-performance-test). + +## Index and query performance considerations +This section describes some common factors that you should think about when designing indexes that need to support fast querying and searching. + +### Storing multiple types in an index +An Elasticsearch index can contain multiple types. It may be better to avoid this approach and create a separate index for each type. Consider the following points: + +* Different types might specify different analyzers, and it is not always clear which analyzer Elasticsearch should use if a query is performed at the index level rather than at the type level. See [Avoiding Type Gotchas](https://www.elastic.co/guide/en/elasticsearch/guide/current/mapping.html#_avoiding_type_gotchas) for details. +* Shards for indexes that hold multiple types will likely be bigger than those for indexes that contain a single type. The bigger a shard, the more effort is required by Elasticsearch to filter data when performing queries. +* If there is a significant mismatch between data volumes for the types, information for one type can become sparsely distributed across many shards reducing the efficiency of searches that retrieve this data. + + ![The effects of sharing an index between types](./images/query-performance1.png) + + ***The effects of sharing an index between types*** + + In the upper part of the diagram, the same index is shared by documents of type A and type B. There are many more documents of type A than type B. Searches for type A will involve querying all four shards. The lower part of the diagram shows the effect if separate indexes are created for each type. In this case, searches for type A will only require accessing two shards. +* Small shards can be more evenly distributed than large shards, making it easier for Elasticsearch to spread the load across nodes. +* Different types might have different retention periods. It can be difficult to archive old data that shares shards with active data. + +However, under some circumstances sharing an index across types can be efficient if: + +* Searches regularly span types held in the same index. +* The types only have a small number of documents each. Maintaining a separate set of shards for each type can become a significant overhead in this case. + +### Optimizing index types +An Elasticsearch index contains a copy of the original JSON documents that were used to populate it. This information is held in the [*\_source*](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#mapping-source-field) field of each indexed item. This data is not searchable, but by default is returned by *get* and *search* requests. However, this field incurs overhead and occupies storage, making shards larger and increasing the volume of I/O performed. You can disable the *\_source* field on a per type basis: + +```http +PUT my_index +{ + "mappings": { + "my_type": { + "_source": { + "enabled": false + } + } + } +} +``` +Disabling this field also removes the ability to perform the following operations: + +* Updating data in the index by using the *update* API. +* Performing searches that return highlighted data. +* Reindexing from one Elasticsearch index directly to another. +* Changing mappings or analysis settings. +* Debugging queries by viewing the original document. + +### Reindexing data +The number of shards available to an index ultimately determines the capacity of the index. You can take an initial (and informed) guess at how many shards will be required, but you should always consider your document re-indexing strategy up front. In many cases, reindexing may be an intended task as data grows. You may not want to allocate a large number of shards to an index initially, for the sake of search optimization, but allocate new shards as the volume of data expands. In other cases reindexing might need to be performed on a more ad-hoc basis if your estimates about data volume growth simply prove to be inaccurate. + +> [!NOTE] +> Reindexing might not be necessary for data that ages quickly. In this case, an application might create a new index for each period of time. Examples include performance logs or audit data which could be stored in a fresh index each day. +> +> + + + +Reindexing effectively involves creating a new index from the data in an old one, and then removing the old index. If an index is large, this process can take time, and you may need to ensure that the data remains searchable during this period. For this reason, you should create an [alias for each index](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html), and queries should retrieve data through these aliases. While reindexing, keep the alias pointing at the old index, and then switch it to reference the new index once reindexing is complete. This approach is also useful for accessing time-based data that creates a new index each day. To access the current data use an alias that rolls over to the new index as it is created. + +### Managing mappings +Elasticsearch uses mappings to determine how to interpret the data that occurs in each field in a document. Each type has its own mapping, which effectively defines a schema for that type. Elasticsearch uses this information to generate inverted indexes for each field in the documents in a type. In any document, each field has a datatype (such as *string*, *date*, or *long*) and a value. You can specify the mappings for an index when the index is first created, or they can be inferred by Elasticsearch when new documents are added to a type. However, consider the following points: + +* Mappings generated dynamically can cause errors depending on how fields are interpreted when documents are added to an index. For example, document 1 could contain a field A that holds a number and causes Elasticsearch to add a mapping that specifies that this field is a *long*. If a subsequent document is added in which field A contains nonnumeric data, then it will fail. In this case, field A should probably have been interpreted as a string when the first document was added. Specifying this mapping when the index is created can help to prevent such problems. +* Design your documents to avoid generating excessively large mappings as this can add significant overhead when performing searches, consume lots of memory, and also cause queries to fail to find data. Adopt a consistent naming convention for fields in documents that share the same type. For example, don't use field names such as "first_name", "FirstName", and "forename" in different documents. Use the same field name in each document. Additionally, do not attempt to use values as keys (this is a common approach in Column-Family databases, but can cause inefficiencies and failures with Elasticsearch.) For more information, see [Mapping Explosion](https://www.elastic.co/blog/found-crash-elasticsearch#mapping-explosion). +* Use *not_analyzed* to avoid tokenization where appropriate. For example, if a document contains a string field named *data* that holds the value "ABC-DEF" then you might attempt to perform a search for all documents that match this value as follows: + + ```http + GET /myindex/mydata/_search + { + "query" : { + "filtered" : { + "filter" : { + "term" : { + "data" : "ABC-DEF" + } + } + } + } + } + ``` + + However, this search will fail to return the expected results due to the way in which the string ABC-DEF is tokenized when it is indexed. It will be effectively split into two tokens, ABC and DEF, by the hyphen. This feature is designed to support full text searching, but if you want the string to be interpreted as a single atomic item you should disable tokenization when the document is added to the index. You can use a mapping such as this: + + ```http + PUT /myindex + { + "mappings" : { + "mydata" : { + "properties" : { + "data" : { + "type" : "string", + "index" : "not_analyzed" + } + } + } + } + } + ``` + + For more information, see [Finding Exact Values](https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html#_term_filter_with_text). + +### Using doc values +Many queries and aggregations require that data is sorted as part of the search operation. Sorting requires being able to map one or more terms to a list of documents. To assist in this process, Elasticsearch can load all of the values for a field used as a sort key into memory. This information is known as *fielddata*. The intent is that caching fielddata in memory incurs less I/O and might be faster than repeatedly reading the same data from disk. However, if a field has high cardinality then storing the fielddata in memory can consume a lot of heap space, possibly impacting the ability to perform other concurrent operations, or even exhausting storage causing Elasticsearch to fail. + +As an alternative approach, Elasticsearch also supports *doc values*. A doc value is similar to an item of in-memory fielddata except that it is stored on disk and created when data is stored in an index (fielddata is constructed dynamically when a query is performed.) Doc values do not consume heap space, and so are useful for queries that sort or aggregate data across fields that can contain a very large number of unique values. Additionally, the reduced pressure on the heap can help to offset the performance differences between retrieving data from disk and reading from memory. Garbage collection is likely to occur less often, and other concurrent operations that utilize memory are less likely to be effected. + +You enable or disable doc values on a per-property basis in an index using the *doc_values* attribute, as shown by the following example: + +```http +PUT /myindex +{ + "mappings" : { + "mydata" : { + "properties" : { + "data" : { + ... + "doc_values": true + } + } + } + } +} +``` +> [!NOTE] +> Doc values are enabled by default with Elasticsearch version 2.0.0 onwards. +> +> + +The exact impact of using doc values is likely to be highly specific to your own data and query scenarios, so be prepared to conduct performance testing to establish their usefulness. You should also note that doc values do not work with analyzed string fields. + +### Using replicas to reduce query contention +A common strategy to boost the performance of queries is to create many replicas of each index. Data retrieval operations can be satisfied by fetching data from a replica. However, this strategy can severely impact the performance of data ingestion operations, so it needs to be used with care in scenarios that involve mixed workloads. Additionally, this strategy is only of benefit if replicas are distributed across nodes and do not compete for resources with primary shards that are part of the same index. Remember that it is possible to increase or decrease the number of replicas for an index dynamically. + +### Using the shard request cache +Elasticsearch can cache the local data requested by queries on each shard in memory. This enables searches that retrieve the same data to run more quickly, data can be read from memory rather than disk storage. Caching data in this way can therefore improve the performance of some search operations, at the cost of reducing the memory available to other tasks being performed concurrently. There is also the risk that data served from the cache is outdated. The data in the cache is only invalidated when the shard is refreshed and the data has changed. The frequency of refreshes is governed by the value of the *refresh_interval* setting of the index. + +The request caching for an index is disabled by default, but you can enable it as follows: + +```http +PUT /myindex/_settings +{ + "index.requests.cache.enable": true +} +``` + +The shard request cache is most suitable for information that remains relatively static, such as historical or logging data. + +### Using client nodes +All queries are processed by the node that first receives the request. This node sends further requests to all other nodes containing shards for the indices being queried, and then accumulates the results for returning the response. If a query involves aggregating data or performing complex computations, the initial node is responsible for performing the appropriate processing. If your system has to support a relatively small number of complex queries, consider creating a pool of client nodes to alleviate the load from the data nodes. Conversely, if your system has to handle a large number of simple queries, then submit these requests direct to the data nodes, and use a load balancer to distribute the requests evenly. + +### Tuning queries +The following points summarize tips for maximizing the performance of Elasticsearch queries: + +* Avoid queries that involve wild cards wherever possible. +* If the same field is subject to full-text searching and exact matching, then consider storing the data for the field in analyzed and nonanalyzed forms. Perform full-text searches against the analyzed field, and exact matches against the nonanalyzed field. +* Only return the data necessary. If you have large documents, but an application only requires information held in a subset of the fields, then return this subset from queries rather than entire documents. This strategy can reduce the network bandwidth requirements of the cluster. +* Wherever possible, use filters instead of queries when searching for data. A filter simply determines whether a document matches a given criterion whereas a query also calculates how close a match a document is (scoring). Internally, the values generated by a filter are stored as a bitmap indicating match/no match for each document, and they can be cached by Elasticsearch. If the same filter criterion occurs subsequently, the bitmap can be retrieved from cache and used to quickly fetch the matching documents. For more information, see [Internal Filter Operation](https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html#_internal_filter_operation). +* Use *bool* filters for performing static comparisons, and only use *and*, *or*, and *not* filters for dynamically calculated filters, such as those that involve scripting or the *geo-\** filters. +* If a query combines *bool* filters with *and*, *or*, or *not* with *geo-** filters, place the *and*/*or*/*not geo-** filters last so that they operate on the smallest data set possible. + + Similarly, use a *post_filter* to run expensive filter operations. These filters will be performed last. +* Use aggregations rather than facets. Avoid calculating aggregates that are analyzed or that have many possible values. + + > **Note**: Facets have been removed in Elasticsearch version 2.0.0. + > + > +* Use the *cardinality* aggregation in preference to the *value_count* aggregation unless your application requires an exact count of matching items. An exact count can become quickly outdated, and many applications only require a reasonable approximation. +* Avoid scripting. Scripts in queries and filters can be expensive and the results are not cached. Long-running scripts can consume search threads indefinitely, causing subsequent requests to be queued. If the queue fills up, further requests will be rejected. + +## Testing and analyzing aggregation and search performance +This section describes the results of a series of tests that were performed against varying cluster and index configurations. Two types of tests were performed, as follows: + +* **The *ingestion and query* test**. This test started with an empty index that was populated as the test proceeded by performing bulk insert operations (each operation added 1000 documents). At the same time, a number of queries designed to search for documents added during the preceding 15 minute period and generate aggregations were repeated at 5 second intervals. This test was typically allowed to run for 24 hours, to reproduce the effects of a challenging workload comprising large-scale data ingestion with near real-time queries. +* **The *query-only* test**. This test is similar to the *ingestion and query* test except that the ingestion part is omitted, and the index on each node is prepopulated with 100 million documents. An amended set of queries is performed; the time element limiting documents to those added in the last 15 minutes was removed as the data was now static. The tests ran for 90 minutes, there is less time needed to establish a performance pattern due to the fixed amount of data. + +- - - +Each document in the index had the same schema. This table summarizes the fields in the schema: + +| Name | Type | Notes | +| --- | --- | --- | +| Organization |String |The test generates 200 unique organizations. | +| CustomField1 - CustomField5 |String |These are five string fields which are set to the empty string. | +| DateTimeRecievedUtc |Timestamp |The date and time at which the document was added. | +| Host |String |This field is set to the empty string. | +| HttpMethod |String |This field is set to one of the following values: "POST","GET","PUT". | +| HttpReferrer |String |This field is set to the empty string. | +| HttpRequest |String |This field is populated with random text between 10 and 200 characters in length. | +| HttpUserAgent |String |This field is set to the empty string. | +| HttpVersion |String |This field is set to the empty string. | +| OrganizationName |String |This field is set to the same value as the Organization field. | +| SourceIp |IP |This field contains an IP address indicating the "origin" of the data. | +| SourceIpAreaCode |Long |This field is set to 0. | +| SourceIpAsnNr |String |This field is set to "AS\#\#\#\#\#". | +| SourceIpBase10 |Long |This field is set to 500. | +| SourceIpCountryCode |String |This field contains a 2-character country code. | +| SourceIpCity |String |This field contains a string identifying a city in a country. | +| SourceIpLatitude |Double |This field contains a random value. | +| SourceIpLongitude |Double |This field contains a random value. | +| SourceIpMetroCode |Long |This field is set to 0. | +| SourceIpPostalCode |String |This field is set to the empty string. | +| SourceLatLong |Geo point |This field is set to a random geo point. | +| SourcePort |String |This field is populated with the string representation of a random number. | +| TargetIp |IP |This is populated with a random IP address in the range 0.0.100.100 to 255.9.100.100. | +| SourcedFrom |String |This field is set to the string "MonitoringCollector". | +| TargetPort |String |This field is populated with the string representation of a random number. | +| Rating |String |This field is populated with one of 20 different string values selected at random. | +| UseHumanReadableDateTimes |Boolean |This field is set to false. | + +The following queries were performed as a batch by each iteration of the tests. The names in italics are used to refer to these queries in the remainder of this document. Note that the time criterion (documents added in the last 15 minutes) was omitted from the *query-only* tests: + +* How many documents with each *Rating* value have been entered in the last 15 minutes (*Count By rating*)? +* How many documents have been added in each 5 minute interval during the last 15 minutes (*Count over time*)? +* How many documents of each *Rating* value have been added for each country in the last 15 minutes (*Hits by country*)? +* Which 15 organizations occur most frequently in documents added in the last 15 minutes (*Top 15 organizations*)? +* How many different organizations occur in documents added in the last 15 minutes (*Unique count organizations*)? +* How many documents have been added in the last 15 minutes (*Total hits count*)? +* How many different *SourceIp* values occur in documents added in the last 15 minutes (*Unique IP count*)? + +The definition of the index and the details of the queries are outlined in the [appendix](#appendix-the-query-and-aggregation-performance-test). + +The tests were designed to understand the effects of the following variables: + +* **Disk type**. Tests were performed on a 6-node cluster of D4 VMs using standard storage (HDDs) and repeated on a 6-node cluster of DS4 VMs using premium storage (SSDs). +* **Machine size - scaling up**. Tests were performed on a 6-node cluster comprising DS3 VMs (designated as the *small* cluster), repeated on a cluster of DS4 VMs (the *medium* cluster), and repeated again on a cluster of DS14 machines (the *large* cluster). The following table summarizes the key characteristics of each VM SKU: + + | Cluster | VM SKU | Number of cores | Number of data disks | RAM (GB) | + | --- | --- | --- | --- | --- | + | Small |Standard DS3 |4 |8 |14 | + | Medium |Standard DS4 |8 |16 |28 | + | Large |Standard DS14 |16 |32 |112 | +* **Cluster size - scaling out**. Tests were performed on clusters of DS14 VMs containing 1, 3, and 6 nodes. +* **Number of index replicas**. Tests were performed using indexes configured with 1 and 2 replicas. +* **Doc values**. Initially the tests were performed with the index setting *doc_values* set to *true* (the default value). Selected tests were repeated with *doc_values* set to *false*. +* **Caching**. Tests were conducted with the shard request cache enabled on the index. +* **Number of shards**. Tests were repeated using varying numbers of shards to establish whether queries ran more efficiently across indexes containing fewer, larger shards or more, smaller shards. + +## Performance results - disk type +Disk performance was evaluated by running the *ingestion and query* test on the 6-node cluster of D4 VMs (using HDDs), and on the 6-node cluster of DS4 VMs (using SSDs). The configuration of Elasticsearch in both clusters was the same. The data was spread across 16 disks on each node, and each node had 14GB of RAM allocated to the Java virtual machine (JVM) running Elasticsearch, the remaining memory (also 14GB) was left for operating system use. Each test ran for 24 hours. This period was selected to enable the effects of the increasing volume of data to become apparent and to allow the system to stabilize. The table below summarizes the results, highlighting the response times of the various operations that comprised the test. + +| Cluster | Operation/query | Average response time (ms) | +| --- | --- | --- | +| D4 |Ingestion |978 | +| Count By Rating |103 | | +| Count Over Time |134 | | +| Hits By Country |199 | | +| Top 15 Organizations |137 | | +| Unique Count Organizations |139 | | +| Unique IP Count |510 | | +| Total Hits Count |89 | | +| DS4 |Ingestion |511 | +| Count By Rating |187 | | +| Count Over Time |411 | | +| Hits By Country |402 | | +| Top 15 Organizations |307 | | +| Unique Count Organizations |320 | | +| Unique IP Count |841 | | +| Total Hits Count |236 | | + +At first glance, it would appear that the DS4 cluster performed queries less well than the D4 cluster, at times doubling (or worse) the response time. This does not tell the whole story though. The next table shows the number of ingestion operations performed by each cluster (remember that each operation loads 1000 documents): + +| Cluster | Ingestion operation count | +| --- | --- | +| D4 |264769 | +| DS4 |503157 | + +The DS4 cluster was able to load nearly twice as much data than the D4 cluster during the test. Therefore, when analyzing the response times for each operation, you also need to consider how many documents each query has to scan, and how many documents are returned. These are dynamic figures as the volume of documents in the index is continually growing. You cannot simply divide 503137 by 264769 (the number of ingestion operations performed by each cluster) and then multiply the result by the average response time for each query performed by the D4 cluster to give a comparative information as this ignores the amount of I/O being performed concurrently by the ingestion operation. Instead, you should measure the physical amount of data being written to and read from disk as the test proceeds. The JMeter test plan captures this information for each node. The summarized results are: + +| Cluster | Average bytes written/read by each operation | +| --- | --- | +| D4 |13471557 | +| DS4 |24643470 | + +This data shows that the DS4 cluster was able to sustain an I/O rate approximately 1.8 times that of the D4 cluster. Given that, apart from nature of the disks, all other resources are the same, the difference must be due to using SSDs rather HDDs. + +To help justify this conclusion, the following graphs illustrate the how the I/O was performed over time by each cluster: + +![Disk activity graph](./images/query-performance2.png) + + + +***Disk activity for the D4 and DS4 clusters*** + +The graph for the D4 cluster shows significant variation, especially during the first half of the test. This was likely due to throttling to reduce the I/O rate. In the initial stages of the test, the queries are able to run quickly as there is little data to analyze. The disks in the D4 cluster are therefore likely to be operating close to their input/output operations per second (IOPS) capacity, although each I/O operation might not be returning much data. The DS4 cluster is able to support a higher IOPS rate and does not suffer the same degree of throttling, the I/O rates are more regular. To support this theory, the next pair of graphs show how the CPU was blocked by disk I/O over time (the disk wait times shown in the graphs are the proportion of the time that the CPU spent waiting for I/O): + +![Disk wait times graph](./images/query-performance3.png) + +***CPU disk I/O wait times for the D4 and DS4 clusters*** + +It is important to understand that there are two predominant reasons for I/O operations to block the CPU: + +* The I/O subsystem could be reading or writing data to or from disk. +* The I/O subsystem could be throttled by the host environment. Azure disks implemented by using HDDs have a maximum throughput of 500 IOPS, and SSDs have a maximum throughput of 5000 IOPS. + +For the D4 cluster, the amount of time spent waiting for I/O during the first half of the test correlates closely in an inverted manner with the graph showing the I/O rates. Periods of low I/O correspond to periods of significant time the CPU spends blocked, this indicates that I/O is being throttled. As more data is added to the cluster the situation changes, and in the second half of the test peaks in I/O wait times correspond with peaks in I/O throughput. At this point, the CPU is blocked while performing real I/O. Again, with the DS4 cluster, the time spent waiting for I/O is much more even. Each peak matches an equivalent peak in I/O performance rather than a trough, this implies that there is little or no throttling occurring. + +There is one other factor to consider. During the test, the D4 cluster generated 10584 ingestion errors, and 21 query errors. The test on the DS4 cluster produced no errors. + +## Performance results - scaling up +Scale-up testing was performed by running tests against 6-node clusters of DS3, DS4, and DS14 VMs. These SKUS were selected because a DS4 VM provides twice as many CPU cores and memory as a DS3, and a DS14 machine doubles the CPU resources again while providing four times the amount of memory. The table below compares the key aspects of each SKU: + +| SKU | \#CPU cores | Memory (GB) | Max disk IOPS | Max bandwidth (MB/s) | +| --- | --- | --- | --- | --- | +| DS3 |4 |14 |12,800 |128 | +| DS4 |8 |28 |25,600 |256 | +| DS14 |16 |112 |50,000 |512 | + +The following table summarizes the results of running the tests on the small (DS3), medium (DS4), and large (DS14) clusters. Each VM used SSDs to hold the data. Each test ran for 24 hours. + +The table reports the number of successful requests for each type of query (failures are not included). The number of requests attempted for each type of query is roughly the same during a run of the test. This is because the JMeter test plan executes a single occurrence of each query (Count By Rating, Count Over Time, Hits By Country, Top 15 Organizations, Unique Count Organizations, Unique IP Count, and Total Hits Count ) together in a single unit known as a *test transaction* (this transaction is independent of the task that performs the ingestion operation, which is run by a separate thread). Each iteration of the test plan performs a single test transaction. The number of test transactions completed is therefore a measure of the response time of the slowest query in each transaction. + +| Cluster | Operation/query | Number of requests | Average response time (ms) | +| --- | --- | --- | --- | +| Small (DS3) |Ingestion |207284 |3328 | +| Count By Rating |18444 |268 | | +| Count Over Time |18444 |340 | | +| Hits By Country |18445 |404 | | +| Top 15 Organizations |18439 |323 | | +| Unique Count Organizations |18437 |338 | | +| Unique IP Count |18442 |468 | | +| Total Hits Count |18428 |294 | | +| | | | | +| Medium (DS4) |Ingestion |503157 |511 | +| Count By Rating |6958 |187 | | +| Count Over Time |6958 |411 | | +| Hits By Country |6958 |402 | | +| Top 15 Organizations |6958 |307 | | +| Unique Count Organizations |6956 |320 | | +| Unique IP Count |6955 |841 | | +| Total Hits Count |6958 |236 | | +| | | | | +| Large (DS14) |Ingestion |502714 |511 | +| Count By Rating |7041 |201 | | +| Count Over Time |7040 |298 | | +| Hits By Country |7039 |363 | | +| Top 15 Organizations |7038 |244 | | +| Unique Count Organizations |7037 |283 | | +| Unique IP Count |7037 |681 | | +| Total Hits Count |7038 |200 | | + +These figures show that, for this test, the performance of the DS4 and DS14 cluster were reasonably similar. The response times for the query operations for the DS3 cluster also appear to compare favorably initially, and the number of query operations performed is far in excess of the values for the DS4 and DS14 cluster. However, one should also take strong notice of the ingestion rate and the consequent number of documents being searched. In the DS3 cluster ingestion is far more constrained, and by the end of the test the database only contained about 40% of the documents read in by each of the other two clusters. This is could be due to the processing resources, network, and disk bandwidth available to a DS3 VM compared to a DS4 or DS14 VM. Given that a DS4 VM has twice as many resources available as a DS3 VM, and a DS14 has twice (four times for memory) the resources of a DS4 VM, one question remains: Why is the difference in ingestion rates between the DS4 and DS14 clusters significantly less than that which occurs between the DS3 and DS4 clusters? This could be due to the network utilization and bandwidth limits of Azure VMs. The graphs below show this data for all three clusters: + +![Disk activity graph](./images/query-performance4.png) + +**Network utilization for the DS3, DS4, and DS14 clusters performing the ingestion and query test** + + + +The limits of available network bandwidth with Azure VMs are not published and can vary, but the fact that network activity appears to have levelled off at an average of around 2.75GBps for both the DS4 and DS14 tests suggests that such a limit has been reached and has become the primary factor in restricting throughput. In the case of the DS3 cluster, the network activity was considerably lower so the lower performance is more likely due to constraints in the availability of other resources. + +To isolate the effects of the ingestion operations and illustrate how query performance varies as nodes scale up, a set of query-only tests was performed using the same nodes. The following table summarizes the results obtained on each cluster: + +> [!NOTE] +> You should not compare the performance and number of requests executed by queries in the *query-only* test with those run by the *ingestion and query* test. This is because the queries have been modified and the volume of documents involved is different. +> +> + +| Cluster | Operation/query | Number of requests | Average response Ttme (ms) | +| --- | --- | --- | --- | +| Small (DS3) |Count By Rating |464 |11758 | +| Count Over Time |464 |14699 | | +| Hits By Country |463 |14075 | | +| Top 15 Organizations |464 |11856 | | +| Unique Count Organizations |462 |12314 | | +| Unique IP Count |461 |19898 | | +| Total Hits Count |462 |8882 | | +| | | | | +| Medium (DS4) |Count By Rating |1045 |4489 | +| Count Over Time |1045 |7292 | | +| Hits By Country |1053 |7564 | | +| Top 15 Organizations |1055 |5066 | | +| Unique Count Organizations |1051 |5231 | | +| Unique IP Count |1051 |9228 | | +| Total Hits Count |1051 |2180 | | +| | | | | +| Large (DS14) |Count By Rating |1842 |1927 | +| Count Over Time |1839 |4483 | | +| Hits By Country |1838 |4761 | | +| Top 15 Organizations |1842 |2117 | | +| Unique Count Organizations |1837 |2393 | | +| Unique IP Count |1837 |7159 | | +| Total Hits Count |1837 |642 | | + +This time, the trends in the average response times across the different clusters is clearer. Network utilization is well below the 2.75GBps required earlier by the DS4 and DS14 clusters (which probably saturated the network in the ingestion and query tests), and the 1.5GBps for the DS3 cluster. In fact, it is closer to 200MBps in all cases as shown by the graphs below: + +![CPU utilization graph](./images/query-performance5.png) + +***Network utilization for the DS3, DS4 and DS14 clusters performing the query-only test*** + +The limiting factor in the DS3 and DS4 clusters now appears to be CPU utilization, which is close to 100% for much of the time. In the DS14 cluster the CPU usage averages just over 80%. This is still high, but clearly highlights the advantages of having more CPU cores available. The following image depicts the CPU usage patterns for the DS3, DS4, and DS14 clusters. + +![CPU utilization graph](./images/query-performance6.png) + +***CPU utilization for the DS3 and DS14 clusters performing the query-only test*** + +## Performance results - scaling out +To illustrate how the system scales out with the number of nodes, tests were run using DS14 clusters containing 1, 3, and 6 nodes. This time, only the *query-only* test was performed, using 100 million documents and running for 90 minutes: + +| Cluster | Operation/query | Number of requests | Average response time (ms) | +| --- | --- | --- | --- | +| 1 Node |Count By Rating |288 |6216 | +| Count Over Time |288 |28933 | | +| Hits By Country |288 |29455 | | +| Top 15 Organizations |288 |9058 | | +| Unique Count Organizations |287 |19916 | | +| Unique IP Count |284 |54203 | | +| Total Hits Count |287 |3333 | | +| | | | | +| 3 Nodes |Count By Rating |1194 |3427 | +| Count Over Time |1194 |5381 | | +| Hits By Country |1191 |6840 | | +| Top 15 Organizations |1196 |3819 | | +| Unique Count Organizations |1190 |2938 | | +| Unique IP Count |1189 |12516 | | +| Total Hits Count |1191 |1272 | | +| | | | | +| 6 Nodes |Count By Rating |1842 |1927 | +| Count Over Time |1839 |4483 | | +| Hits By Country |1838 |4761 | | +| Top 15 Organizations |1842 |2117 | | +| Unique Count Organizations |1837 |2393 | | +| Unique IP Count |1837 |7159 | | +| Total Hits Count |1837 |642 | | + +The number of nodes makes a significant difference in the query performance of the cluster, although in a nonlinear manner. The 3 node cluster completes approximately 4 times as many queries as the single node cluster, while the 6 node cluster handles 6 times as many. To help explain this nonlinearity, the following graphs show how the CPU was being consumed by the three clusters: + +![CPU utilization graph](./images/query-performance7.png) + +***CPU utilization for the 1, 3, and 6-node clusters performing the query-only test*** + +The single-node and 3-node clusters are CPU-bound, while although CPU utilization is high in the 6-node cluster there is spare processing capacity available. In this case, other factors are likely to be limiting the throughput. This could be confirmed by testing with 9 and 12 nodes, which would likely show further spare processing capacity. + +The data in the table above also shows how the average response times for the queries vary. This is the item that is most informative when testing how a system scales for specific types of query. Some searches are clearly far more efficient when spanning more nodes than others. This could be due to the ratio between the number of nodes and the number of documents in the cluster increasing, each cluster contained 100 million documents. When performing searches that involve aggregating data, Elasticsearch will process and buffer the data retrieved as part of the aggregation process in memory on each node. If there are more nodes, there is less data to retrieve, buffer, and process on each node. + +## Performance results - number of replicas +The *ingestion and query* tests were run against an index with a single replica. The tests were repeated on the 6-node DS4 and DS14 clusters using an index configured with two replicas. All tests ran for 24 hours. The table below shows the comparative results for one and two replicas: + +| Cluster | Operation/query | Average response time (ms) - 1 replica | Average response time (ms) - 2 replicas | % difference in response time | +| --- | --- | --- | --- | --- | +| DS4 |Ingestion |511 |655 |+28% | +| Count By Rating |187 |168 |-10% | | +| Count Over Time |411 |309 |-25% | | +| Hits By Country |402 |562 |+40% | | +| Top 15 Organizations |307 |366 |+19% | | +| Unique Count Organizations |320 |378 |+18% | | +| Unique IP Count |841 |987 |+17% | | +| Total Hits Count |236 |236 |+0% | | +| | | | | | +| DS14 |Ingestion |511 |618 |+21% | +| Count By Rating |201 |275 |+37% | | +| Count Over Time |298 |466 |+56% | | +| Hits By Country |363 |529 |+46% | | +| Top 15 Organizations |244 |407 |+67% | | +| Unique Count Organizations |283 |403 |+42% | | +| Unique IP Count |681 |823 |+21% | | +| Total Hits Count |200 |221 |+11% | | + +The ingestion rate decreased as the number of replicas increased. This should be expected as Elasticsearch is writing more copies of each document, generating additional disk I/O. This is reflected by the graphs for the DS14 cluster for indexes with 1 and 2 replicas shown in the image below. In the case of the index with 1 replica, the average I/O rate was 16896573 bytes/second. For the index with 2 replicas, the average I/O rate was 33986843 bytes/second, just over twice as many. + +![Disk activity graph](./images/query-performance8.png) + +***Disk I/O rates for nodes with 1 and 2 replicas performing the ingestion and query test*** + +| Cluster | Query | Average response time (ms) - 1 replica | Average response time (ms) - 2 replicas | +| --- | --- | --- | --- | +| DS4 |Count By Rating |4489 |4079 | +| Count Over Time |7292 |6697 | | +| Hits By Country |7564 |7173 | | +| Top 15 Organizations |5066 |4650 | | +| Unique Count Organizations |5231 |4691 | | +| Unique IP Count |9228 |8752 | | +| Total Hits Count |2180 |1909 | | +| | | | | +| DS14 |Count By Rating |1927 |2330 | +| Count Over Time |4483 |4381 | | +| Hits By Country |4761 |5341 | | +| Top 15 Organizations |2117 |2560 | | +| Unique Count Organizations |2393 |2546 | | +| Unique IP Count |7159 |7048 | | +| Total Hits Count |642 |708 | | + +These results show an improvement in average response time for the DS4 cluster, but an increase for the DS14 cluster. To help interpret these results, you should also consider the number of queries performed by each test: + +| Cluster | Query | Number performed - 1 replica | Number performed - 2 replicas | +| --- | --- | --- | --- | +| DS4 |Count By Rating |1054 |1141 | +| Count Over Time |1054 |1139 | | +| Hits By Country |1053 |1138 | | +| Top 15 Organizations |1055 |1141 | | +| Unique Count Organizations |1051 |1136 | | +| Unique IP Count |1051 |1135 | | +| Total Hits Count |1051 |1136 | | +| | | | | +| DS14 |Count By Rating |1842 |1718 | +| Count Over Time |1839 |1716 | | +| Hits By Country |1838 |1714 | | +| Top 15 Organizations |1842 |1718 | | +| Unique Count Organizations |1837 |1712 | | +| Unique IP Count |1837 |1712 | | +| Total Hits Count |1837 |1712 | | + +This data shows that the number of queries performed by the DS4 cluster increased in line with the decrease in average response time, but again the converse is true of the DS14 cluster. One significant factor is that the CPU utilization of the DS4 cluster in the 1-replica and 2-replica tests was unevenly spread. Some nodes exhibited close to 100% utilization while others had spare processing capacity. The improvement in performance is most likely due to the increased ability to distribute processing across the nodes of the cluster. The following image shows the variation in CPU processing between the most lightly and heavily used VMs (nodes 4 and 3): + +![CPU utilization graph](./images/query-performance9.png) + +***CPU utilization for the least used and most used nodes in the DS4 cluster performing the query-only test*** + +For the DS14 cluster this was not the case. CPU utilization for both tests was lower across all nodes, and the availability of a second replica became less of an advantage and more of an overhead: + +![CPU utilization graph](./images/query-performance10.png) + +***CPU utilization for the least used and most used nodes in the DS14 cluster performing the query-only test*** + +These results show the need to benchmark your system carefully when deciding whether to use multiple replicas. You should always have at least one replica of each index (unless you are willing to risk losing data if a node fails), but additional replicas can impose a burden on the system for little benefit, depending on your workloads and the hardware resources available to the cluster. + +## Performance results - doc values +The *ingestion and query* tests were conducted with doc values enabled, causing Elasticsearch to store data used for sorting fields on disk. The tests were repeated with doc values disabled, so Elasticsearch constructed fielddata dynamically and cached it in memory. All tests ran for 24 hours. The table below compares the response times for tests run against clusters of 6 nodes built using D4, DS4, and DS14 VMs (the D4 cluster uses regular hard disks, while the DS4 and DS14 clusters use SSDs). + +| Cluster | Operation/query | Average response time (ms) - doc values enabled | Average response time (ms) - doc values disabled | % difference in response time | +| --- | --- | --- | --- | --- | +| D4 |Ingestion |978 |835 |-15% | +| Count By Rating |103 |132 |+28% | | +| Count Over Time |134 |189 |+41% | | +| Hits By Country |199 |259 |+30% | | +| Top 15 Organizations |137 |184 |+34% | | +| Unique Count Organizations |139 |197 |+42% | | +| Unique IP Count |510 |604 |+18% | | +| Total Hits Count |89 |134 |+51% | | +| | | | | | +| DS4 |Ingestion |511 |581 |+14% | +| Count By Rating |187 |190 |+2% | | +| Count Over Time |411 |409 |-0.5% | | +| Hits By Country |402 |414 |+3% | | +| Top 15 Organizations |307 |284 |-7% | | +| Unique Count Organizations |320 |313 |-2% | | +| Unique IP Count |841 |955 |+14% | | +| Total Hits Count |236 |281 |+19% | | +| | | | | | +| DS14 |Ingestion |511 |571 |+12% | +| Count By Rating |201 |232 |+15% | | +| Count Over Time |298 |341 |+14% | | +| Hits By Country |363 |457 |+26% | | +| Top 15 Organizations |244 |338 |+39% | | +| Unique Count Organizations |283 |350 |+24% | | +| Unique IP Count |681 |909 |+33% | | +| Total Hits Count |200 |245 |+23% | | + +The next table compares the number of ingestion operations performed by the tests: + +| Cluster | Ingestion operation count - doc values enabled | Ingestion Operation count - doc values disabled | % difference in \number of ingestion operations | +| --- | --- | --- | --- | +| D4 |264769 |408690 |+54% | +| DS4 |503137 |578237 |+15% | +| DS14 |502714 |586472 |+17% | + +The improved ingestion rates occur with doc values disabled as less data is being written to disk as documents are inserted. The improved performance is especially noticeable with the D4 VM using HDDs to store data. In this case, the response time for ingestion operations also decreased by 15% (see the first table in this section). This could be due to the reduced pressure on the HDDs which were likely running close to their IOPS limits in the test with doc values enabled, see the Disk Type test for more information. The following graph compares the I/O performance of the D4 VMs with doc values enabled (values held on disk) and doc values disabled (values held in memory): + +![Disk activity graph](./images/query-performance11.png) + +***Disk activity for the D4 cluster with doc values enabled and disabled*** + +In contrast, the ingestion values for the VMs using SSDs show a small increase in the number of documents but also an increase in the response time of the ingestion operations. With one or two small exceptions, the query response times were also worse. The SSDs are less likely to be running close to their IOPS limits with doc values enabled, so changes in performance are more likely due to increased processing activity and the overhead of managing the JVM heap. This is evident by comparing the CPU utilization with doc values enabled and disabled. The next graph highlights this data for the DS4 cluster, where most of the CPU utilization moves from the 30%-40% band with doc values enabled, to the 40%-50% band with doc values disabled (the DS14 cluster showed a similar trend): + +![CPU utilization graph](./images/query-performance12.png) + +***CPU utilization for the DS4 cluster with doc values enabled and disabled*** + +To distinguish the effects of doc values on query performance from data ingestion, pairs of query-only tests were performed for the DS4 and DS14 clusters with doc values enabled and disabled. The table below summarizes the results of these tests: + +| Cluster | Operation/query | Average response time (ms) - doc values enabled | Average response time (ms) - doc values disabled | % difference in response time | +| --- | --- | --- | --- | --- | +| DS4 |Count By Rating |4489 |3736 |-16% | +| Count Over Time |7293 |5459 |-25% | | +| Hits By Country |7564 |5930 |-22% | | +| Top 15 Organizations |5066 |3874 |-14% | | +| Unique Count Organizations |5231 |4483 |-2% | | +| Unique IP Count |9228 |9474 |+3% | | +| Total Hits Count |2180 |1218 |-44% | | +| | | | | | +| DS14 |Count By Rating |1927 |2144 |+11% | +| Count Over Time |4483 |4337 |-3% | | +| Hits By Country |4761 |4840 |+2% | | +| Top 15 Organizations |2117 |2302 |+9% | | +| Unique Count Organizations |2393 |2497 |+4% | | +| Unique IP Count |7159 |7639 |+7% | | +| Total Hits Count |642 |633 |-1% | | + +Remember that, with Elasticsearch 2.0 onwards, doc values are enabled by default. In the tests covering the DS4 cluster, disabling doc values appears to have a positive effect overall, whereas the converse is generally true for the DS14 cluster (the two cases where performance is better with doc values disabled are very marginal). + +For the DS4 cluster, CPU utilization in both cases was close to 100% for the duration of both tests indicating that the cluster was CPU bound. However, the number of queries processed decreased from 7369 to 5894 (20%). Remember that if doc values are disabled Elasticsearch will dynamically generate fielddata in memory, and this consumes CPU power. This configuration has reduced the rate of disk I/O but increased stress on CPUs already running close to their maximum capabilities, so in this case queries are faster with doc values disabled but there are fewer of them. + +In the DS14 tests with and without doc values CPU activity was high, but not 100%. The number of queries performed was slightly higher (approximately 4%) in tests with doc values enabled: + +| Cluster | Query | Number performed - doc values enabled | Number performed - doc values disabled | +| --- | --- | --- | --- | +| DS4 |Count By Rating |1054 |845 | +| Count Over Time |1054 |844 | | +| Hits By Country |1053 |842 | | +| Top 15 Organizations |1055 |846 | | +| Unique Count Organizations |1051 |839 | | +| Unique IP Count |1051 |839 | | +| Total Hits Count |1051 |839 | | +| | | | | +| DS14 |Count By Rating |1772 |1842 | +| Count Over Time |1772 |1839 | | +| Hits By Country |1770 |1838 | | +| Top 15 Organizations |1773 |1842 | | +| Unique Count Organizations |1769 |1837 | | +| Unique IP Count |1768 |1837 | | +| Total Hits Count |1769 |1837 | | + +## Performance results - shard request cache +To demonstrate how the caching index data in the memory of each node can impact performance, the *query and ingestion* test was conducted on a DS4 and a DS14 6-node cluster with index caching enabled - see the section [Using the shard request cache](#using-the-shard-request-cache) for more information. The results were compared to those generated by the earlier tests using the same index but with index caching disabled. The table below summarizes the results. Note that the data has been curtailed to cover only the first 90 minutes of the test, at this point the comparative trend was apparent and continuing the test would probably have not yielded any additional insights: + +| Cluster | Operation/query | Average response time (ms) - index cache disabled | Average response time (ms) - index cache enabled | % difference in response time | +| --- | --- | --- | --- | --- | +| DS4 |Ingestion |504 |3260 |+547% | +| Count By Rating |218 |273 |+25% | | +| Count Over Time |450 |314 |-30% | | +| Hits By Country |447 |397 |-11% | | +| Top 15 Organizations |342 |317 |-7% | | +| Unique Count Organizations |370 |324 |-12%% | | +| Unique IP Count |760 |355 |-53% | | +| Total Hits Count |258 |291 |+12% | | +| | | | | | +| DS14 |Ingestion |503 |3365 |+569% | +| Count By Rating |234 |262 |+12% | | +| Count Over Time |357 |298 |-17% | | +| Hits By Country |416 |383 |-8% | | +| Top 15 Organizations |272 |324 |-7% | | +| Unique Count Organizations |330 |321 |-3% | | +| Unique IP Count |674 |352 |-48% | | +| Total Hits Count |227 |292 |+29% | | + +This data shows two points of interest: + +* Data ingestion rates appear to be greatly diminished by enabling index caching. +* Index caching does not necessarily improve the response time of all types of query, and can have an adverse effect on certain aggregate operations such as those performed by the Count By Rating and Total Hits Count queries. + +To understand why the system exhibits this behavior, you should consider the number of queries successfully performed in each case during the test runs. The following table summarizes this data: + +| Cluster | Operation/query | Operations/queries count - index cache disabled | Operations/queries count - index cache enabled | +| --- | --- | --- | --- | +| DS4 |Ingestion |38611 |13232 | +| Count By Rating |524 |18704 | | +| Count Over Time |523 |18703 | | +| Hits By Country |522 |18702 | | +| Top 15 Organizations |521 |18706 | | +| Unique Count Organizations |521 |18700 | | +| Unique IP Count |521 |18699 | | +| Total Hits Count |521 |18701 | | +| | | | | +| DS14 |Ingestion |38769 |12835 | +| Count By Rating |528 |19239 | | +| Count Over Time |528 |19239 | | +| Hits By Country |528 |19238 | | +| Top 15 Organizations |527 |19240 | | +| Unique Count Organizations |524 |19234 | | +| Unique IP Count |524 |19234 | | +| Total Hits Count |527 |19236 | | + +You can see that although the ingestion rate when caching was enabled was approximately 1/3 of that when caching was disabled, the number of queries performed increased by a factor of 34. Queries no longer incur as much disk I/O and did not have to compete for disk resources. This is reflected by the graphs in the figure below that compare the I/O activity for all four cases: + +![Disk activity graph](./images/query-performance13.png) + +***Disk I/O activity for the ingestion and query test with index caching disabled and enabled*** + +The decrease in disk I/O also meant that the CPU spent less time waiting for I/O to complete. This is highlighted by the next figure: + +![Disk wait times graph](./images/query-performance14.png) + +***CPU time spent waiting for disk I/O to complete for the ingestion and query test with index caching disabled and enabled*** + +The reduction in disk I/O meant that Elasticsearch could spend a much greater proportion of its time servicing queries from data held in memory. This increased CPU utilization, which becomes apparent if you look at the CPU utilization for all four cases. The graphs below show how CPU use was more sustained with caching enabled: + +![CPU utilization graph](./images/query-performance15.png) + +***CPU utilization for the ingestion and query test with index caching disabled and enabled*** + +The volume of network I/O in both scenarios for the duration of the tests was broadly similar. The tests without caching showed a gradual degradation during the test period, but the longer, 24 hour runs of these tests showed that this statistic levelled off at approximately 2.75GBps. The image below shows this data for the DS4 clusters (the data for the DS14 clusters was very similar): + +![Network traffic graph](./images/query-performance16.png) + +***Network traffic volumes for the ingestion and query test with index caching disabled and enabled*** + +As described in the [scaling up](#performance-results---scaling-up) test, the restrictions to network bandwidth with Azure VMs are not published and can vary, but the moderate levels of CPU and disk activity suggests that network utilization may be the limiting factor in this scenario. + +Caching is more naturally suited to scenarios where data changes infrequently. To highlight the impact of caching in this scenario, the *query-only* tests were performed with caching enabled. The results are shown below (these tests ran for 90 minute, and the indexes under test contained 100 million documents): + +| Cluster | Query | Average response time (ms) | Number of Queries Performed | +| --- | --- | --- | --- | +| |**Cache disabled** |**Cache enabled** | | +| DS4 |Count By Rating |4489 |210 | +| Count Over Time |7292 |211 | | +| Hits By Country |7564 |231 | | +| Top 15 Organizations |5066 |211 | | +| Unique Count Organizations |5231 |211 | | +| Unique IP Count |9228 |218 | | +| Total Hits Count |2180 |210 | | +| | | | | +| DS14 |Count By Rating |1927 |211 | +| Count Over Time |4483 |219 | | +| Hits By Country |4761 |236 | | +| Top 15 Organizations |2117 |212 | | +| Unique Count Organizations |2393 |212 | | +| Unique IP Count |7159 |220 | | +| Total Hits Count |642 |211 | | + +The variance in the performance of the noncached tests is due to the difference in resources available between the DS4 and DS14 VMs. In both cases of the cached test the average response time dropped significantly as data was being retrieved directly from memory. It is also worth noting that the response times for the cached DS4 and DS14 cluster tests were very similar despite the disparity with the noncached results. There is also very little difference between the response times for each query within each test, they all take approximately 220ms. The disk I/O rates and CPU utilization for both clusters were very low as once all the data is in memory little I/O or processing is required. The network I/O rate was similar to that of the uncached tests, confirming that network bandwidth may be a limiting factor in this test. The following graphs present this information for the DS4 cluster. The profile of the DS14 cluster was very similar: + +![DS14 cluster performance graphs](./images/query-performance17.png) + +***Disk I/O, CPU utilization, and network utilization for the query-only test with index caching enabled*** + +The figures in the table above suggest that using the DS14 architecture shows little benefit over using the DS4. In fact, the number of samples generated by the DS14 cluster was about 5% below that of the DS4 cluster, but this could also be due to network restrictions which can vary slightly over time. + +## Performance results - number of shards +The purpose of this test was to determine whether the number of shards created for an index has any bearing on the query performance of that index. + +Separate tests conducted previously showed that the shard configuration of an index can impact the rate of data ingestion. The tests conducted to determine query performance followed a similar methodology, but were restricted to a 6-node cluster running on DS14 hardware. This approach helps to minimize the number of variables, so any differences in performance should be due to the volume of shards. + +The *query-only* test was conducted on copies of the same index configured with 7, 13, 23, 37, and 61 primary shards. The index contained 100 million documents and had a single replica, doubling the number of shards across the cluster. Each test ran for 90 minutes. The following table summarizes the results. The average response time shown is the response time for the JMeter test transaction that encompasses the complete set of queries performed by each iteration of the test. See the note in the section [Performance results - scaling up](#performance-results---scaling-up) for more information: + +| Number of shards | Shard layout (shards per node, including replicas) | Number of queries performed | Avg response time (ms) | +| --- | --- | --- | --- | +| 7 (14 including replicas) |3-2-2-2-2-3 |7461 |40524 | +| 13 (26) |5-4-5-4-4-4 |7369 |41055 | +| 23 (46) |7-8-8-7-8-8 |14193 |21283 | +| 37 (74) |13-12-12-13-12-12 |13399 |22506 | +| 61 (122) |20-21-20-20-21-20 |14743 |20445 | + +These results indicate that there is a significant difference in performance between the 13(26) shard cluster and the 23,(46) shard cluster, throughput nearly doubles and response times halve. This is most likely due to the configuration of the VMs and the structures that Elasticsearch uses to process search requests. Search requests are queued, and each search request is handled by a single search thread. The number of search threads created by an Elasticsearch node is a function of the number of processors available on the machine hosting the node. The results suggest that with only 4 or 5 shards on a node, processing resources are not being fully utilized. This is supported by looking at the CPU utilization while running this test. The following image is a snapshot taken from Marvel while performing the 13(26) shard test: + +![CPU utilization](./images/query-performance18.png) + +***CPU utilization for the query-only test on the 7(14) shard cluster*** + +Compare these figures with those of the 23(46) shard test: + +![CPU utilization](./images/query-performance19.png) + +***CPU utilization for the query-only test on the 23(46) shard cluster*** + +In the 23(46) shard test, CPU utilization was far higher. Each node contains 7 or 8 shards. The DS14 architecture provides 16 processors, and Elasticsearch is better able to exploit this number of cores with the additional shards. The figures in the table above suggest that increasing the number of shards beyond this point may improve performance slightly, but you should offset these figures against the additional overhead of maintaining a high volume of shards. These tests imply that the optimal number of shards per node is half the number of processor cores available on each node. However, remember that these results were achieved when only running queries. If your system imports data, you should also consider how sharding can impact the performance of data ingestion operations. + +## Summary +Elasticsearch provides many options that you can use to structure indexes and tune them to support large-scale query operations. This document has summarized some common configurations and techniques that you can use to tune your database for query purposes. However, you should recognize that there is a tradeoff between optimizing a database to support fast retrieval as opposed to supporting high-volume data ingestion. Sometimes what is good for querying can have a detrimental impact on insert operations and vice versa. In a system that is exposed to mixed workloads, you need to assess where the balance lies and adjust the system parameters accordingly. + +Additionally, the applicability of different configurations and techniques can vary depending on the structure of the data and the limitations (or otherwise) of the hardware the system is constructed on. Many of the tests shown in this document illustrate how the selection of the hardware platform can impact throughput, and also how some strategies can be beneficial in certain cases but detrimental in others. The important point is to understand the options available and then perform rigorous benchmarking using your own data to determine the most optimal combination. + +Finally, remember that an Elasticsearch database is not necessarily a static item. It will likely grow over time, and the strategies used to structure the data may need to be revised regularly. For example, it may be necessary to scale up, scale out, or reindex data with additional shards. As the system increases in size and complexity, be prepared to continually test performance to ensure that you are still meeting any SLAs guaranteed to your customers. + +## Appendix: the query and aggregation performance test +This appendix describes the performance test performed against the Elasticsearch cluster. The tests were run by using JMeter running on a separate set of VMs. Details the configuration of the test environment are described in [Creating a performance testing environment for Elasticsearch on Azure](./performance-testing-environment.md). To perform your own testing, you can create your own JMeter test plan manually following the guidance in this appendix, or you can use the automated test scripts available separately. For more information, see [Running the automated Elasticsearch performance tests](./automated-performance-tests.md). + +The data query workload performed the set of queries described below while performing a large-scale upload of documents at the same time. The purpose of this workload was to simulate a production environment where new data is constantly being added while searches are performed. The queries were structured to retrieve only the most recent data from documents added in the last 15 minutes. + +Each document was stored in a single index named *idx*, and had the type *doc*. You can use the following HTTP request to create the index. The *number_of_replicas* and *number_of_shards* settings varied from the values shown below in many of the tests. Additionally, for the tests that used fielddata rather than doc values, each property was annotated with the attribute *"doc_values" : false*. + +**Important**: The index was dropped and recreated prior to each test run. + +``` http +PUT /idx +{ + "settings" : { + "number_of_replicas": 1, + "refresh_interval": "30s", + "number_of_shards": "5", + "index.translog.durability": "async" + }, + "doc": { + "mappings": { + "event": { + "_all": { + "enabled": false + }, + "_timestamp": { + "enabled": true, + "store": true, + "format": "date_time" + }, + "properties": { + "Organization": { + "type": "string", + "index": "not_analyzed" + }, + "CustomField1": { + "type": "string", + "index": "not_analyzed" + }, + "CustomField2": { + "type": "string", + "index": "not_analyzed" + }, + "CustomField3": { + "type": "string", + "index": "not_analyzed" + }, + "CustomField4": { + "type": "string", + "index": "not_analyzed" + }, + "CustomField5": { + "type": "string", + "index": "not_analyzed" + }, + "DateTimeReceivedUtc": { + "type": "date", + "format": "dateOptionalTime" + }, + "Host": { + "type": "string", + "index": "not_analyzed" + }, + "HttpMethod": { + "type": "string", + "index": "not_analyzed" + }, + "HttpReferrer": { + "type": "string", + "index": "not_analyzed" + }, + "HttpRequest": { + "type": "string", + "index": "not_analyzed" + }, + "HttpUserAgent": { + "type": "string", + "index": "not_analyzed" + }, + "HttpVersion": { + "type": "string", + "index": "not_analyzed" + }, + "OrganizationName": { + "type": "string", + "index": "not_analyzed" + }, + "SourceIp": { + "type": "ip" + }, + "SourceIpAreaCode": { + "type": "long" + }, + "SourceIpAsnNr": { + "type": "string", + "index": "not_analyzed" + }, + "SourceIpBase10": { + "type": "long" + }, + "SourceIpCity": { + "type": "string", + "index": "not_analyzed" + }, + "SourceIpCountryCode": { + "type": "string", + "index": "not_analyzed" + }, + "SourceIpLatitude": { + "type": "double" + }, + "SourceIpLongitude": { + "type": "double" + }, + "SourceIpMetroCode": { + "type": "long" + }, + "SourceIpPostalCode": { + "type": "string", + "index": "not_analyzed" + }, + "SourceIpRegion": { + "type": "string", + "index": "not_analyzed" + }, + "SourceLatLong": { + "type": "geo_point", + "doc_values": true, + "lat_lon": true, + "geohash": true + }, + "SourcePort": { + "type": "string", + "index": "not_analyzed" + }, + "SourcedFrom": { + "type": "string", + "index": "not_analyzed" + }, + "TargetIp": { + "type": "ip" + }, + "TargetPort": { + "type": "string", + "index": "not_analyzed" + }, + "Rating": { + "type": "string", + "index": "not_analyzed" + }, + "UseHumanReadableDateTimes": { + "type": "boolean" + } + } + } + } + } +} +``` + +The following queries were performed by the test: + +* How many documents with each Rating value have been entered in the last 15 minutes? + + ```http + GET /idx/doc/_search + { + "query": { + "bool": { + "must": [ + { + "range": { + "DateTimeReceivedUtc": { + "gte": "now-15m", + "lte": "now" + } + } + } + ], + "must_not": [], + "should": [] + } + }, + "from": 0, + "size": 0, + "aggs": { + "2": { + "terms": { + "field": "Rating", + "size": 5, + "order": { + "_count": "desc" + } + } + } + } + } + ``` +* How many documents have been added in each 5 minute interval during the last 15 minutes? + + ```http + GET /idx/doc/_search + { + "query": { + "bool": { + "must": [ + { + "range": { + "DateTimeReceivedUtc": { + "gte": "now-15m", + "lte": "now" + } + } + } + ], + "must_not": [], + "should": [] + } + }, + "from": 0, + "size": 0, + "sort": [], + "aggs": { + "2": { + "date_histogram": { + "field": "DateTimeReceivedUtc", + "interval": "5m", + "time_zone": "America/Los_Angeles", + "min_doc_count": 1, + "extended_bounds": { + "min": "now-15m", + "max": "now" + } + } + } + } + } + ``` +* How many documents of each Rating value have been added for each country in the last 15 minutes? + + ```HTTP + GET /idx/doc/_search + { + "query": { + "filtered": { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + }, + "filter": { + "bool": { + "must": [ + { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + } + }, + { + "range": { + "DateTimeReceivedUtc": { + "gte": "now-15m", + "lte": "now" + } + } + } + ], + "must_not": [] + } + } + } + }, + "size": 0, + "aggs": { + "2": { + "terms": { + "field": "Rating", + "size": 5, + "order": { + "_count": "desc" + } + }, + "aggs": { + "3": { + "terms": { + "field": "SourceIpCountryCode", + "size": 15, + "order": { + "_count": "desc" + } + } + } + } + } + } + } + ``` +* Which 15 organizations occur most frequently in documents added in the last 15 minutes? + + ```http + GET /idx/doc/_search + { + "query": { + "filtered": { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + }, + "filter": { + "bool": { + "must": [ + { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + } + }, + { + "range": { + "DateTimeReceivedUtc": { + "gte": "now-15m", + "lte": "now" + } + } + } + ], + "must_not": [] + } + } + } + }, + "size": 0, + "aggs": { + "2": { + "terms": { + "field": "Organization", + "size": 15, + "order": { + "_count": "desc" + } + } + } + } + } + ``` +* How many different organizations occur in documents added in the last 15 minutes? + + ```http + GET /idx/doc/_search + { + "query": { + "filtered": { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + }, + "filter": { + "bool": { + "must": [ + { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + } + }, + { + "range": { + "DateTimeReceivedUtc": { + "gte": "now-15m", + "lte": "now" + } + } + } + ], + "must_not": [] + } + } + } + }, + "size": 0, + "aggs": { + "2": { + "cardinality": { + "field": "Organization" + } + } + } + } + ``` +* How many documents have been added in the last 15 minutes? + + ```http + GET /idx/doc/_search + { + "query": { + "filtered": { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + }, + "filter": { + "bool": { + "must": [ + { + "query": { + "query_string": { + "analyze_wildcard": true, + "query": "*" + } + } + }, + { + "range": { + "DateTimeReceivedUtc": { + "gte": "now-15m", + "lte": "now" + } + } + } + ], + "must_not": [] + } + } + } + }, + "size": 0, + "aggs": {} + } + ``` +* How many different SourceIp values occur in documents added in the last 15 minutes? + + ```http + GET /idx/doc/_search + { + "query": { + "filtered": { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + }, + "filter": { + "bool": { + "must": [ + { + "query": { + "query_string": { + "query": "*", + "analyze_wildcard": true + } + } + }, + { + "range": { + "DateTimeReceivedUtc": { + "gte": "now-15m", + "lte": "now" + } + } + } + ], + "must_not": [] + } + } + } + }, + "size": 0, + "aggs": { + "2": { + "cardinality": { + "field": "SourceIp" + } + } + } + } + ``` + diff --git a/docs/elasticsearch/data-ingestion-performance.md b/docs/elasticsearch/data-ingestion-performance.md new file mode 100644 index 00000000000..1a991526d11 --- /dev/null +++ b/docs/elasticsearch/data-ingestion-performance.md @@ -0,0 +1,1197 @@ +--- +title: Tune data ingestion performance for Elasticsearch on Azure +description: How to maximize data ingestion performance with Elasticsearch on Azure. +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.prev: index +pnp.series.next: data-aggregation-and-query-performance +ms.assetid: 1fe28243-bb73-4d76-a534-0e312f866d77 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +--- +# Tune data ingestion performance +[!INCLUDE [header](../_includes/header.md)] + +## Overview +An important aspect when creating any search database is to determine the best way to structure the system to ingest searchable data quickly and efficiently. The considerations surrounding this requirement +concern not only the choice of infrastructure on which you implement the system, but also the various +optimizations that you can use to help ensure that the system can keep up with the expected levels of +data influx. + +This document describes the deployment and configuration options that you should consider +for implementing an Elasticsearch cluster that expects a high rate of data ingestion. To provide solid +data for illustrative purposes, this document also shows the results of benchmarking various +configurations using a simple high-volume data ingestion workload. The details of the workload are +described in the [Appendix](#appendix-the-bulk-load-data-ingestion-performance-test) at the end of this +document. + +The purpose of the benchmarks was not to generate absolute performance figures for running Elasticsearch +or even to recommend a particular topology, but rather to illustrate methods that you can use for +assessing performance, sizing data nodes, and implementing clusters that can meet your own performance +requirements. + +When sizing your own systems, it is important to test performance thoroughly based on your +own workloads. Gather telemetry that enables you to obtain information about the optimal hardware +configuration to use, and the horizontal scaling factors that you should consider. In particular, you +should: + +* Consider the overall size of the payload sent and not just the number of items in each bulk insert + request. A smaller number of large bulk items in each request could be more optimal than a larger number, + depending on the resource available to process each request. + +You can monitor the effects of varying the bulk insert request by using [Marvel](https://www.elastic.co/products/marvel), using the +*readbytes*/*writebytes* I/O counters with [JMeter](https://jmeter.apache.org/), and operating system tools such as *iostat* and +*vmstat* on Ubuntu. + +* Conduct performance testing and gather telemetry to measure CPU processing and I/O wait times, disk + latency, throughput, and response times. This information can help to identify potential bottlenecks and + assess the costs and benefits of using premium storage. Bear in mind that CPU and disk utilization might + not be even across all nodes depending on the way in which shards and replicas are distributed across the + cluster (some nodes can contain more shards than others). +* Consider how the number of concurrent requests for your workload will be distributed across the cluster + and assess the impact of using different numbers of nodes to handle this workload. +* Consider how workloads might grow as the business expands. Assess the impact of this growth on the + costs of the VMs and storage used by the nodes. +* Recognize that using a cluster with a larger number of nodes with regular disks might be more + economical if your scenario requires a high number of requests and the disk infrastructure maintains + throughput that satisfies your service level agreements (SLAS). However, increasing the number of nodes can introduce overhead in + the form of additional inter-node communications and synchronization. +* Understand that a higher number of cores per node may generate more disk traffic as more documents can + be processed. In this case, measure disk utilization to assess whether the I/O subsystem might become a + bottleneck and determine the benefits of using premium storage. +* Test and analyze the trade-offs with a higher number of nodes with fewer cores versus fewer nodes with + more cores. Keep in mind that the increasing the number of replicas escalates the demands on the cluster + and may require you to add nodes. +* Consider that using ephemeral disks might require that indexes have to be recovered more frequently. +* Measure storage volume usage to assess capacity and under utilization of storage. For example, in our + scenario we stored 1.5 billion documents using 350GB storage. +* Measure the transfer rates for your workloads and consider how close you are likely to get to the total + I/O rate transfer limit for any given storage account in which you have created virtual disks. + +## Node and index design +In a system that must support large-scale data ingestion, you ask the following questions: + +* **Is the data fast-moving or relatively static?** The more dynamic the data, the greater the + maintenance overhead for Elasticsearch. If the data is replicated, each replica is maintained + synchronously. Fast-moving data that has only a limited lifespan or that can easily be reconstructed + might benefit from disabling replication altogether. This option is discussed in the section + [Tuning large-scale data ingestion.](#tuning-large-scale-data-ingestion) +* **How up-to-date do you require the data discovered by searching to be?** To maintain performance, + Elasticsearch buffers as much data in memory as it can. This means that not all changes are immediately + available for search requests. The process Elasticsearch uses to persist changes and make them visible + is described in [Making Changes Persistent](https://www.elastic.co/guide/en/elasticsearch/guide/current/translog.html#translog). + + The rate data becomes visible is governed by the *refresh\_interval* setting of the relevant + index. By default, this interval is set at 1 second. However, not every situation requires refreshes to + occur this quickly. For example, indexes recording log data might need to cope with a rapid and continual + influx of information which needs to be ingested quickly, but does not require the information to be + immediately available for querying. In this case, consider reducing the frequency of refreshes. This + feature is also described in the section [Tuning large-scale data ingestion.](#tuning-large-scale-data-ingestion) +* **How quickly is the data likely to grow?** Index capacity is determined by the number of shards + specified when the index is created. To allow for growth, specify an adequate number of shards (the + default is five). If the index is initially created on a single node, all five shards will be located on + that node, but as the volume of data grows additional nodes can be added and Elasticsearch will + dynamically distribute shards across nodes. However, each shard has an overhead. All searches in an index + will query all shards, so creating a large number of shards for a small amount of data can slow data + retrievals (avoid the [Kagillion shards](https://www.elastic.co/guide/en/elasticsearch/guide/current/kagillion-shards.html) scenario). + + Some workloads (such as logging) might create a new index each day, and if you observe that the number of + shards is insufficient for the volume of data, you should change it prior to creating the next index + (existing indexes will be unaffected). If you must distribute existing data across more shards, then one + option is to reindex the information. Create a new index with the appropriate configuration and copy the + data into it. This process can be made transparent to applications by using [index aliases](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html). +* **Does data need to be partitioned between users in a multitenancy scenario?** You can create separate + indexes for each user, but this can be expensive if each user only has a moderate amount of data. Instead, + consider creating [shared indexes](https://www.elastic.co/guide/en/elasticsearch/guide/current/shared-index.html) and use [aliases based on filters](https://www.elastic.co/guide/en/elasticsearch/guide/current/faking-it.html) to direct requests + to the per-user data. To keep the data for a user together in the same shard, override the default + routing configuration for the index and route data based on some identifying attribute of the user. +* **Is data long or short-lived?** If you are using a set of Azure VMs to implement an Elasticsearch + cluster, you can store ephemeral data on a local resource system disk rather than an attached drive. + Using a VM SKU that utilizes an SSD for the resource disk can improve I/O performance. However, any + information held on the resource disk is temporary and may be lost if the VM restarts (see the section + When Will the Data on a Temporary Drive Be Lost in [Understanding the temporary drive on Microsoft Azure Virtual Machines](http://blogs.msdn.com/b/mast/archive/2013/12/07/understanding-the-temporary-drive-on-windows-azure-virtual-machines.aspx) for more details). If you need to retain data between restarts, create data disks to hold this information and attach them to the VM. +* **How active is the data?** Azure VHDs are subject to throttling if the amount of read/write activity + exceeds specified parameters (currently 500 input/output operations per second (IOPS) for a disk attached to a Standard Tier VM, and 5000 IOPS + for a Premium Storage disk). + + To reduce the chances of throttling and increase I/O performance, consider creating multiple data disks + for each VM and configure Elasticsearch to stripe data across these disks as described in the [Disk and file system requirements](index.md#disk-and-file-system-requirements). + + You should select a hardware configuration that helps to minimize the number of disk I/O + read operations by ensuring that sufficient memory is available to cache frequently accessed data. This + is described in the [Memory requirements](index.md#memory-requirements) section of Running Elasticsearch on Azure. +* **What type of workload will each node need to support?** Elasticsearch benefits from having memory + available to cache data in (in the form of the file system cache) and for the JVM heap as described + in the [Memory requirements](index.md#memory-requirements) section of Running Elasticsearch on Azure. + + The amount of memory, number of CPU cores, and quantity of available disks are set by the + SKU of the virtual machine. For more information, see [Virtual Machines Pricing](http://azure.microsoft.com/pricing/details/virtual-machines/) on the Azure website. + +### Virtual machine options +You can provision VMs in Azure using a number of different SKUs. The resources available to an Azure VM +depend on SKU selected. Each SKU offers a different mix of cores, memory, and storage. You need to select +an appropriate size of VM that will handle the expected workload but that will also prove cost-effective. +Start with a configuration that will meet your current requirements (perform benchmarking to test, as +described later in this document). You can scale a cluster later by adding more VMs running Elasticsearch +nodes. + +[Sizes for Virtual Machines](/azure/virtual-machines/virtual-machines-linux-sizes/?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json) on the Azure website documents the +various options and SKUs available for VMs. + +You should match the size and resources of a VM to the role that nodes running on the VM will perform. + +For a data node: + +* Allocate up to 30 GB or 50% of the available RAM memory to the Java heap, whichever is the lower. Leave + the remainder to the operating system to use for caching files. If you are using Linux, you can specify + the amount of memory to allocate to the Java heap by setting the ES\_HEAP\_SIZE environment variable + before running Elasticsearch. Alternatively, if you are using Windows or Linux, you can stipulate memory + size with the *Xmx* and *Xms* parameters when you start Elasticsearch. + + Depending on the workload, fewer large VMs may not be as effective for performance as using a larger + number of moderately sized VMs. You should conduct tests that can measure the tradeoffs between the + additional network traffic and maintenance involved versus the costs of increasing the number of cores + available and the reduced disk contention on each node. +* Use premium storage for storing Elasticsearch data. This is discussed in more detail in the [Storage options](#storage-options) section. +* Use multiple disks of the same size and stripe data across these disks. The SKU of your VMs will dictate the maximum number of data disks that you can attach. For more information, see [Disk and file system requirements](index.md#disk-and-file-system-requirements). +* Use a multi-core CPU SKU with at least 2 cores, preferably 4 or more. + +For a client node: + +* Do not allocate disk storage for Elasticsearch data, dedicated clients do not store data on disk. +* Ensure that adequate memory is available to handle workloads. Bulk insert requests are read into memory + prior to the data being sent to the various data nodes, and the results of aggregations and queries are + accumulated in memory before being returned to the client application. Benchmark your own workloads and + monitor memory use by using a tool such as Marvel or the [JVM information](https://www.elastic.co/guide/en/elasticsearch/guide/current/_monitoring_individual_nodes.html#_jvm_section) returned by using the *node/stats* API (`GET _nodes/stats`) to assess the optimal requirements. In particular, monitor the *heap\_used\_percent* metric for each node and aim to keep the + heap size below 75% of the space available. +* Ensure that sufficient CPU cores are available to receive and process the expected volume of requests. + Requests are queued as they are received prior to processing, and the volume of items that can be queued + is a function of the number of CPU cores on each node. You can monitor the queue lengths by using the + data in the [Threadpool information](https://www.elastic.co/guide/en/elasticsearch/guide/current/_monitoring_individual_nodes.html#_threadpool_section) returned by using the node/stats API. + + If the *rejected* count for a queue indicates that requests are being refused, this indicates + that the cluster is starting to bottleneck. This may be due to CPU bandwidth, but may also be due to + other factors such as lack of memory or slow I/O performance, so use this information in conjunction + with other statistics to help determine the root cause. + + Client nodes may or may not be necessary, depending on your workloads. Data ingestion workloads tend + not to benefit from using dedicated clients, whereas some searches and aggregations can run more + quickly. Be prepared to benchmark your own scenarios. + + Client nodes are primarily useful for applications that use the Transport Client API to connect to + the cluster. You can also use the Node Client API, which dynamically creates a dedicated client for + the application, using the resources of the application host environment. If your applications use + the Node Client API, then it may not be necessary for your cluster to contain preconfigured dedicated + client nodes. + + However, be aware that a node created using the Client Node API is a first-class member + of the cluster and as such participates in the network chatter with other nodes. Frequently starting + and stopping client nodes can create unnecessary noise across the entire cluster. + +For a master node: + +* Do not allocate disk storage for Elasticsearch data, dedicated master nodes do not store data on disk. +* CPU requirements should be minimal. +* Memory requirements depend on the size of the cluster. Information about the state of the cluster is + retained in memory. For small clusters the amount of memory required is minimal, but for a large, highly + active cluster where indexes are being created frequently and shards moving around, the amount of state + information can grow significantly. Monitor the JVM heap size to determine whether you need to add more + memory. + +> [!NOTE] +> For cluster reliability, always create multiple master nodes to and configure the +> remaining nodes to avoid the possibility of a split brain occurring. Ideally, there should be an +> odd number of master nodes. This topic is described in more detail in [Configuring resilience and recovery on Elasticsearch on Azure][Configuring resilience and recovery on Elasticsearch on Azure]. +> +> + +### Storage options +There are a number of storage options available on Azure VMs with different trade-offs affecting cost, performance, availability, and recovery that you need to consider carefully. + +Note that you should store Elasticsearch data on dedicated data disks. This will help to reduce +contention with the operating system and ensure that large volumes of Elasticsearch I/O do not compete +with operating system functions for I/O resources. + +Azure disks are subject to performance constraints. If you find that a cluster undergoes periodic bursts +of activity then I/O requests may be throttled. To help prevent this, tune your design to balance the +document size in Elasticsearch against the volume of requests likely to be received by each disk. + +Disks based on standard storage support a maximum request rate of 500 IOPS whereas disks based on premium +storage can operate at up to 5,000 IOPS, depending on the size of the data disks. Premium storage disks are only available for the DS and GS +series of VMs. To find the maximum disk IOPS by VM size, see [Sizes for virtual machines in Azure](/azure/virtual-machines/virtual-machines-linux-sizes/?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json). Performance on premium storage is determined by the VM size together with disk size allocation. For more information, see [Premium Storage: High-Performance Storage for Azure Virtual Machine Workloads](/azure/storage/storage-premium-storage/). + +**Persistent data disks** + +Persistent data disks are VHDs that are backed by Azure Storage. If the VM needs to be recreated after a +major failure, existing VHDs can be easily attached to the new VM. VHDs can be created based on standard +storage (spinning media) or premium storage (SSDs). If you wish to use SSDs you must create VMs using the +DS series or better. DS machines cost the same as the equivalent D-series VMs, but you are charged extra +for using premium storage. + +In cases where the maximum transfer rate per disk is insufficient to support the expected workload, +consider either creating multiple data disks and allow Elasticsearch to [stripe data across these disks](index.md#disk-and-file-system-requirements), or implement system level [RAID 0 striping using virtual disks](/azure/virtual-machines/virtual-machines-linux-configure-raid/?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json). + +> [!NOTE] +> Experience within Microsoft has shown that using RAID 0 is particularly beneficial for +> smoothing out the I/O effects of *spiky* workloads that generate frequent bursts of activity. +> +> + +Use premium locally redundant (or locally redundant for low-end or QA workloads) storage for the +storage account holding the disks; replicating across geographies and zones is not required for +Elasticsearch high availability. + +**Ephemeral disks** + +Using persistent disks based on SSDs requires creating VMs that support premium storage. This has a price +implication. Using the local ephemeral disk to hold Elasticsearch data can be a cost effective solution +for moderately sized nodes requiring up to approximately 800GB of storage. On the Standard-D series of +VMs, ephemeral disks are implemented using SSDs which provide far greater performance and much lower +latency than ordinary disks + +When using Elasticsearch, the performance can be equivalent to using premium storage without incurring +the cost – see the section [Addressing disk latency issues](#addressing-disk-latency-issues) for more +information. + +The size of the VM limits the amount of space available in ephemeral storage as described in the blog post +[D-Series Performance Expectations](https://azure.microsoft.com/blog/d-series-performance-expectations/). + +For example, a Standard\_D1 VM provides 50GB of ephemeral storage, a Standard\_D2 VM has 100GB of ephemeral storage, and a Standard\_D14 VM provides 800GB of ephemeral space. For clusters where nodes only require this amount of space, using a D-series VM with ephemeral storage can be cost effective. + +You must balance the increased throughput available with ephemeral storage against the time and costs +involved in recovering this data after a machine restart. The contents of the ephemeral disk are lost if +the VM is moved to a different host server, if the host is updated, or if the host experiences a hardware +failure. If the data itself has a limited lifespan then this data loss might be tolerable. For +longer-lived data, it may be possible to rebuild an index or recover the missing information from a +backup. It is possible to minimize the potential for loss by using replicas held on other VMs. + +> [!NOTE] +> Do not use a **single** VM to hold critical production data. If the node fails, +> all of the data is unavailable. For critical information, ensure that data is replicated on at least +> one other node. +> +> + +**Azure files** + +The [Azure File Service](http://blogs.msdn.com/b/windowsazurestorage/archive/2014/05/12/introducing-microsoft-azure-file-service.aspx) provides shared file access using Azure Storage. You can create file shares that you can then mount on Azure VMs. Multiple VMs can mount the same file share, enabling them to access the same data. + +For performance reasons, it is not recommended that you use file shares for holding Elasticsearch data that does not need to be shared across nodes, regular data disks are more suited to this purpose. File shares can be used for creating Elasticsearch [shadow replica indexes](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-shadow-replicas.html). However, this feature is currently experimental and should not be implemented in a production environment at this time. For this reason, shadow indexes are not considered further in this guidance. + +**Network options** + +Azure implements a shared networking scheme. VMs utilizing the same hardware racks compete for network resources. Therefore available network bandwidth can vary according to the time of day and the daily cycle of work running on VMs sharing the same physical network infrastructure. You have little control over these factors. It is important to understand that network performance is likely to fluctuate over time, so set user expectations accordingly. + +## Scaling up nodes to support large-scale data ingestion +You can build Elasticsearch clusters using reasonably moderate hardware, and then scale up or scale out +as the volume of data grows and the number of requests increases. With Azure, you scale-up by running on +bigger and more expensive VMs, or you can scale-out by using additional smaller and cheaper VMs. + +You might also perform a combination of both strategies. There is no one-size-fits-all solution for all +scenarios, so to assess the best approach for any given situation you need to be prepared to undertake a +series performance tests. + +This section is concerned with the scale-up approach, scaling out is discussed in the section +[Scaling out: conclusions](#scaling-out-conclusions). +This section describes the results of a series of benchmarks that were performed against a set of +Elasticsearch clusters containing VMs of varying sizes. The clusters were designated as small, medium, +and large. The following table summarizes the resources allocated to the VMs in each cluster. + +| Cluster | VM SKU | Number of cores | Number of data disks | RAM | +| --- | --- | --- | --- | --- | +| Small |Standard D2 |2 |4 |7GB | +| Medium |Standard D3 |4 |8 |14GB | +| Large |Standard D4 |8 |16 |28GB | + +Each Elasticsearch cluster contained 3 data nodes. These data nodes handled client requests as well as +handling data processing. Separate client nodes were not used because they offered little benefit to the +data ingestion scenario used by the tests. The cluster also contained three master nodes, one of which +was elected by Elasticsearch to coordinate the cluster. + +The tests were performed using Elasticsearch 1.7.3. The tests were initially performed on clusters +running Ubuntu Linux 14.0.4, and then repeated using Windows Server 2012. The details of the workload +performed by the tests are described in the [Appendix](#appendix-the-bulk-load-data-ingestion-performance-test). + +### Data ingestion performance – Ubuntu Linux 14.0.4 +The following table summarizes the overall results of running the tests for two hours for each configuration: + +| Configuration | Sample count | Average response time (ms) | Throughput (operations/s) | +| --- | --- | --- | --- | +| Small |67057 |636 |9.3 | +| Medium |123482 |692 |17.2 | +| Large |197085 |839 |27.4 | + +The throughput and number of samples processed for the three configurations are in the approximate ratio +1:2:3. However, the resources available in terms of memory, CPU cores, and disks have the ratio 1:2:4. It +was felt to be worth investigating the low-level performance details of the nodes in the cluster to +assess why this might be the case. This information can help to determine whether there are limits to +scaling up and when it may be better to consider scaling out. + +### Determining limiting factors: network utilization +Elasticsearch is dependent on having sufficient network bandwidth to support the influx of client +requests as well as the synchronization information that flows between nodes in the cluster. As +highlighted earlier, you have limited control over the bandwidth availability, which depends on many +variables such as the datacenter in use, and the current network load of other VMs sharing the same +network infrastructure. However, it is still worth examining the network activity for each cluster to verify that the volume of traffic is not excessive. The graph below shows a comparison of the +network traffic received by node 2 in each of the clusters (the volumes for the other nodes in each +cluster was very similar). + +![Network activity graph](./images/data-ingestion-image1.png) + +The average bytes received per second for node 2 in each cluster configuration over the two hour period +were as follows: + +| Configuration | Average number of bytes received/sec | +| --- | --- | +| Small |3993640.3 | +| Medium |7311689.9 | +| Large |11893874.2 | + +The tests were conducted while the system was running in **steady state**. In situations where index +rebalancing or node recovering is occurring, data transmissions between nodes holding primary and replica +shards can generate significant network traffic. The effects of this process are described more in the +document [Configuring resilience and recovery on Elasticsearch on Azure][Configuring resilience and recovery on Elasticsearch on Azure]. + +### Determining limiting factors: CPU utilization +The rate at which requests are handled is at least partially governed by the available processing +capacity. Elasticsearch accepts bulk insert requests on the bulk insert queue. Each node has a set of +bulk insert queues determined by the number of available processors. By default, there is one queue for +each processor and each queue can hold up to 50 outstanding requests before they will start to be +rejected. + +Applications should send requests at a rate that does not cause the queues to overspill. The +number of items in each queue at any one time is going to be a function of the rate at which requests are +sent by client applications and the rate at which these same requests are retrieved and processed by +Elasticsearch. For this reason, one important statistic captured concerns the error rate summarized in +the following table. + +| Configuration | Total samples | Error count | Error rate | +| --- | --- | --- | --- | +| Small |67057 |0 |0.00% | +| Medium |123483 |1 |0.0008% | +| Large |200702 |3617 |1.8 % | + +Each of these errors was caused by the following Java exception: + +``` +org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction$PrimaryPhase$1@75a30c1b]; ] +[219]: index [systembase], type [logs], id [AVEAioKb2TRSNcPa_8YG], message [RemoteTransportException[[esdatavm2][inet[/10.0.1.5:9300]][indices:data/write/bulk[s]]]; nested: EsRejectedExecutionException[rejected execution (queue capacity 50) +``` + +Increasing the number of queues and/or the length of each queue might reduce the number of errors, but +this approach can only cope with bursts of short duration. Doing this while running a sustained series of +data ingestion tasks will simply delay the point at which errors start occurring. Furthermore, this +change will not improve the throughput and will likely harm the response time of client applications as +requests will be queued for longer before being processed. + +The default index structure of 5 shards with 1 replica (10 shards in all), results in a modest imbalance +in load between the nodes in a cluster, two nodes will contain three shards while the other node will contain four. The busiest node is most likely to be the item that restricts throughput the most, that's why +this node has been selected in each case. + +The following set of graphs illustrate the CPU utilization for the busiest node in each cluster. + +![CPU utilization graph](./images/data-ingestion-image2.png) + +![CPU utilization graph](./images/data-ingestion-image3.png) + +![CPU utilization graph](./images/data-ingestion-image4.png) + +For the small, medium, and large clusters, the average CPU utilization for these nodes was 75.01%, +64.93%., and 64.64%. Rarely does utilization actually hit 100%, and utilization drops as the size of the +nodes and the available CPU power available increases. CPU power is therefore unlikely to be a factor +limiting the performance of the large cluster. + +### Determining limiting factors: memory +Memory use is another important aspect that can influence performance. For the tests, Elasticsearch was allocated 50% of the available memory. This is in line with [documented recommendations](https://www.elastic.co/guide/en/elasticsearch/guide/current/heap-sizing.html#_give_half_your_memory_to_lucene). While the tests were running, the JVM was monitored for excess garbage collection activity (an indication of lack of heap memory). In all cases, the heap size was stable and the JVM exhibited low garbage collection activity. The screenshot below shows a snapshot of Marvel, highlighting the key JVM statistics for a short period while the test was running on the large cluster. + +![Marvel screenshot](./images/data-ingestion-image5.png) + +***JVM memory and garbage collection activity on the large cluster.*** + +### Determining limiting factors: disk I/O rRates +The remaining physical feature on the server side that might constrain performance is the performance of the disk I/O subsystem. The graph below compares the disk activity in terms of bytes written for the busiest nodes in each cluster. + +![Disk activity graph](./images/data-ingestion-image6.png) + +The following table shows the average bytes written per second for node 2 in each cluster configuration over the two hour period: + +| Configuration | Average number of bytes written/sec | +| --- | --- | +| Small |25502361.94 | +| Medium |48856124.5 | +| Large |88137675.46 | + +The volume of data written increases with the number of requests being processed by a cluster, but the I/O rates are within the limits of Azure storage (disks created by using Azure storage can support a sustained rates 10s to 100s of MB/s, depending on whether Standard or Premium storage is used). Examining the amount of time spent waiting for disk I/O helps to explain why the disk throughput is well below the theoretical maximum. The graphs and table below show these statistics for the same three nodes: + +> [!NOTE] +> The disk wait time is measured by monitoring the percentage of CPU time during which +> processors are blocked waiting for I/O operations to complete. +> +> + +![Disk wait times graph](./images/data-ingestion-image7.png) + +![Disk wait times graph](./images/data-ingestion-image8.png) + +![Disk wait times graph](./images/data-ingestion-image9.png) + +| Configuration | Average disk wait CPU time (%) | +| --- | --- | +| Small |21.04 | +| Medium |14.48 | +| Large |15.84 | + +This data indicates that a significant proportion of CPU time (between nearly 16% and 21%) is spent waiting for disk I/O to complete. This is restricting the ability of Elasticsearch to process requests and store data. + +During the test run, the large cluster inserted in excess of **five hundred million documents**. Allowing the test to continue showed that wait times increased significantly when the database contained over six hundred million documents. The reasons for this behavior were not fully investigated, but may be due to disk fragmentation causing increased disk latency. + +Increasing the size of the cluster over more nodes might help to alleviate the effects of this behavior. In extreme cases it may be necessary to defragment a disk that is showing excessive I/O times. However, defragmenting a large disk might take a considerable time (possibly more than 48 hours for a 2TB VHD drive), and simply reformatting the drive and allowing Elasticsearch to recover the missing data from replica shards could be a more cost-effective approach. + +### Addressing disk latency issues +The tests were initially performed using VMs configured with standard disks. A standard disk is based on spinning media and as a result is subject to rotational latency and other bottlenecks that can constrain I/O rates. Azure provides also premium storage in which disks are created using SSD devices. These devices have no rotational latency and as a result should provide improved I/O speeds. + +The table below compares the results of replacing standard disks with premium disks in the large cluster (the Standard D4 VMs in the large cluster were replaced with Standard DS4 VMs; the number of cores, memory and disks was the same in both cases, the only difference being that the DS4 VMs used SSDs). + +| Configuration | Sample count | Average response time (ms) | Throughput (operations/s) | +| --- | --- | --- | --- | +| Large - Standard |197085 |839 |27.4 | +| Large - Premium |255985 |581 |35.6 | + +Response times were noticeably better, resulting in an average throughput much closer to 4x that of the small cluster. This is more in line with the resources available on a Standard DS4 VM. Average CPU utilization on the busiest node in the cluster (node 1 in this case) increased as it spent less time waiting for I/O to complete: + +![CPU utilization graph](./images/data-ingestion-image10.png) + +The reduction in disk wait time becomes apparent when you consider the following graph, which shows that for the busiest node this statistic dropped to around 1% on average: + +![Disk wait times graph](./images/data-ingestion-image11.png) + +There is a price to pay for this improvement, however. The number of ingestion errors increased by a factor of 10 to 35797 (12.3%). Again, most of these errors were the result of the bulk insert queue overflowing. Given that the hardware now appears to be running close to capacity, it may be necessary to either add more nodes or throttle back the rate of bulk inserts to reduce the volume of errors. These issues are discussed later in this document. + +### Testing with ephemeral storage +The same tests were repeated on a cluster of D4 VMs using ephemeral storage. On D4 VMs, ephemeral storage is implemented as a single 400GB SSD. The number of samples processed, response time, and throughput were all very similar to the figures reported for the cluster based on DS14 VMs with premium storage. + +| Configuration | Sample count | Average response time (ms) | Throughput (operations/s) | +| --- | --- | --- | --- | +| Large - Premium |255985 |581 |35.6 | +| Large – Standard (Ephemeral disk) |255626 |585 |35.5 | + +The error rate was also similar (33862 failures out of 289488 requests in total – 11.7%). + +The following graphs show the CPU utilization and disk wait statistics for the busiest node in the cluster (node 2 this time): + +![CPU utilization graph](./images/data-ingestion-image12.png) + +![Disk wait times graph](./images/data-ingestion-image13.png) + +In this case, in performance terms alone, using ephemeral storage could be considered a more cost-effective solution than using premium storage. + +### Data ingestion performance – Windows Server 2012 +The same tests were repeated using a set of Elasticsearch clusters with nodes running Windows Server 2012. The purpose of these tests was to establish what effects, if any, the choice of operating system might have on cluster performance. + +To illustrate the scalability of Elasticsearch on Windows, the following table shows the throughput and response times achieved for the small, medium, and large cluster configurations. Note that these tests were all performed with Elasticsearch configured to use SSD ephemeral storage, as the tests with Ubuntu had shown that disk latency was likely to be a critical factor in achieving maximum performance: + +| Configuration | Sample count | Average response time (ms) | Throughput (operations/s) | +| --- | --- | --- | --- | +| Small |90295 |476 |12.5 | +| Medium |169243 |508 |23.5 | +| Large |257115 |613 |35.6 | + +These results indicate how Elasticsearch scales with VM size and available resources on Windows. + +The following tables compares the results for the large cluster on Ubuntu and Windows: + +| Operating system | Sample count | Average response time (ms) | Throughput (operations/s) | Error rate (%) | +| --- | --- | --- | --- | --- | +| Ubuntu |255626 |585 |35.5 |11.7 | +| Windows |257115 |613 |35.6 |7.2 | + +The throughput was consistent with that for the large Ubuntu clusters, although the response time was slightly higher. This may be accounted for by the lower error rate (errors are reported more quickly than successful operations, so have a lower response time). + +The CPU utilization reported by the Windows monitoring tools was marginally higher than that of Ubuntu. However, you should treat direct comparisons of measurements such as these across operating systems with extreme caution due to the way different operating systems report these statistics. Additionally, information on disk latency in terms of CPU time spent waiting for I/O is not available in the same way as it is for Ubuntu. The important point is that CPU utilization was high, indicating that time spent waiting for I/O was low: + +![CPU utilization graph](./images/data-ingestion-image14.png) + +### Scaling up: conclusions +Elasticsearch performance for a well-tuned cluster is likely to be equivalent on Windows and Ubuntu, and that it scales-up in a similar pattern on both operating systems. For best performance, **use premium storage for holding Elasticsearch data**. + +## Scaling out clusters to support large-scale data ingestion +Scaling out is the complimentary approach to scaling up investigated in the previous section. An important feature of Elasticsearch is the inherent horizontal scalability built into the software. Increasing the size of a cluster is simply a matter of adding more nodes. You do not need to perform any manual operations to redistribute indexes or shards as these tasks are handled automatically, although there are a number of configuration options available that you can use to influence this process. + +Adding more nodes helps to improve performance by spreading the load across more machinery. As you add more nodes, you may also need to consider reindexing data to increase the number of shards available. You can preempt this process to some extent by creating indexes that have more shards than there are available nodes initially. When further nodes are added, the shards can be distributed. + +Besides taking advantage of the horizontal scalability of Elasticsearch, there are other reasons for implementing indexes that have more shards than nodes. Each shard is implemented as a separate data structure (a [Lucene](https://lucene.apache.org/) index), and has its own internal mechanisms for maintaining consistency and handling concurrency. Creating multiple shards helps to increase parallelism within a node and can improve performance. + +However, maintaining performance while scaling is a balancing act. The more nodes and shards a cluster contains, the more effort is required to synchronize the work performed by the cluster, which can decrease throughput. For any given workload, there is an optimal configuration that maximizes ingestion performance while minimizing the maintenance overhead. This configuration is heavily dependent on the nature of the workload and the cluster; specifically, the volume, size, and content of the documents, the rate at which ingestion occurs, and the hardware on which the system runs. + +This section summarizes the results of investigations into sizing clusters intended to support the workload used by the performance tests described previously. The same test was performed on clusters with VMs based on the large VM size (Standard D4 with 8 CPU cores, 16 data disks, and 28GB of RAM) running Ubuntu Linux 14.0.4, but configured with different numbers of nodes and shards. The results are not intended to be definitive as they apply only to one specific scenario, but they can act as a good starting point to help you to analyze the horizontal scalability of your clusters, and generate numbers for the optimal ratio of shards to nodes that best meet your own requirements. + +### Baseline results – 3 nodes +To obtain a baseline figure, the data ingestion performance test was run against a 3 node cluster with 5 shards and 1 replica. This is the default configuration for an Elasticsearch index. In this configuration, Elasticsearch distributes 2 primary shards to 2 of the nodes, and the remaining primary shard is stored on the third node. The table below summarizes the throughput in terms of bulk ingestion operations per second, and the number of documents that were successfully stored by the test. + +> [!NOTE] +> In the tables that follow in this section, the distribution of the primary shards is presented as a number for each node separated by dashes. For example, the 5-shard 3-node layout is described as 2-2-1. The layout of replica shards is not included. They will follow a similar scheme to the primary shards. +> +> + +| Configuration | Document count | Throughput (operations/s) | Shard layout | +| --- | --- | --- | --- | +| 5 shards |200560412 |27.86 |2-2-1 | + +### 6-node results +The test was repeated on a 6 node cluster. The purpose of these tests was to try and ascertain more precisely the effects of storing more than one shard on a node. + +| Configuration | Document count | Throughput (operations/s) | Shard layout | +| --- | --- | --- | --- | +| 4 shards |227360412 |31.58 |1-1-0-1-1-0 | +| 7 shards |268013252 |37.22 |2-1-1-1-1-1 | +| 10 shards |258065854 |35.84 |1-2-2-2-1-2 | +| 11 shards |279788157 |38.86 |2-2-2-1-2-2 | +| 12 shards |257628504 |35.78 |2-2-2-2-2-2 | +| 13 shards |300126822 |41.68 |2-2-2-2-2-3 | + +These results appear to indicate the following trends: + +* More shards per node improves throughput. With the small number of shards per node created for these tests, this phenomenon was expected, for reasons described earlier. +* An odd number of shards gives better performance than an even number. The reasons for this are less clear, but it *may* be that the routing algorithm that Elasticsearch uses is better able to distribute the data across shards in this case, leading to a more even load per node. + +To test these hypotheses, several further tests were performed with larger numbers of shards. On advice from Elasticsearch, it was decided to use a prime number of shards for each test as these give a reasonable distribution of odd numbers for the range in question. + +| Configuration | Document count | Throughput (operations/s) | Shard layout | +| --- | --- | --- | --- | +| 23 shards |312844185 |43.45 |4-4-4-3-4-4 | +| 31 shards |309930777 |43.05 |5-5-5-5-6-5 | +| 43 shards |316357076 |43.94 |8-7-7-7-7-7 | +| 61 shards |305072556 |42.37 |10-11-10-10-10-10 | +| 91 shards |291073519 |40.43 |15-15-16-15-15-15 | +| 119 shards |273596325 |38.00 |20-20-20-20-20-19 | + +These results suggested that a tipping point was reached at around 23 shards. After this point, increasing the number of shards caused a small degradation in performance (the throughput for 43 shards is possibly an anomaly). + +### 9-node results +The tests were repeated using a cluster of 9 nodes, again using a prime number of shards. + +| Configuration | Document count | Throughput (operations/s) | Shard layout | +| --- | --- | --- | --- | +| 17 shards |325165364 |45.16 |2-2-2-2-2-2-2-2-1 | +| 19 shards |331272619 |46.01 |2-2-2-2-2-2-2-2-3 | +| 29 shards |349682551 |48.57 |3-3-3-4-3-3-3-4-3 | +| 37 shards |352764546 |49.00 |4-4-4-4-4-4-4-4-5 | +| 47 shards |343684074 |47.73 |5-5-5-6-5-5-5-6-5 | +| 89 shards |336248667 |46.70 |10-10-10-10-10-10-10-10-9 | +| 181 shards |297919131 |41.38 |20-20-20-20-20-20-20-20-21 | + +These results showed a similar pattern, with a tipping point around 37 shards. + +### Scaling out: conclusions +Using a crude extrapolation, the results of the 6-node and 9-node tests indicated that, for this specific scenario, the ideal number of shards to maximize performance was 4n+/-1, where n is the number of nodes. This *may* be a function of the number of bulk insert threads available, which in turn is dependent on the number of CPU cores, the rationale being as follows (see [Multidocument Patterns](https://www.elastic.co/guide/en/elasticsearch/guide/current/distrib-multi-doc.html#distrib-multi-doc) for details): + +* Each bulk insert request sent by the client application is received by a single data node. +* The data node builds a new bulk insert request for each primary shard affected by the original request and forwards them to the other nodes, in parallel. +* As each primary shard is written, another request is sent to each replica for that shard. The primary shard waits for the request sent to the replica to complete before finishing. + +By default, Elasticsearch creates one bulk insert thread for each available CPU core in a VM. In the case of the D4 VMs used by this test, each CPU contained 8 cores, so 8 bulk insert threads were created. The index used spanned 4 (in one case 5) primary shards on each node, but there were also 4 (5) replicas on each node. Inserting data into these shards and replicas could consume up to 8 threads on each node per request, matching the number available. Increasing or reducing the number of shards might cause threading inefficiencies as threads are possibly left unoccupied or requests are queued. However, without further experimentation this is just a theory and it is not possible to be definitive. + +The tests also illustrated one other important point. In this scenario, increasing the number of nodes can improve data ingestion throughput, but the results do not necessarily scale linearly. Conducting further tests with 12 and 15 node clusters could show the point at which scale out brings little additional benefit. If this number of nodes provides insufficient storage space, it may be necessary to return to the scale up strategy and start using more or bigger disks based on premium storage. + +> [!IMPORTANT] +> Do not take the ratio 4n+/-1 as a magic formula that will always work for every cluster. If you have fewer or more CPU cores available, the optimal shard configuration could be different. The findings were based on a specific workload that only did data ingestion. For workloads that also include a mix of queries and aggregations the results could be very diverse. +> +> Furthermore, the data ingestion workload utilized a single index. In many situations, the data is likely to be spread across multiple indexes leading to different patterns or resource use. +> +> The important point of this exercise is to understand the method used rather than the results obtained. You should be prepared to perform your own scalability assessment based on your own workloads to obtain information that is most applicable to your own scenario. +> +> + +## Tuning large-scale data ingestion +Elasticsearch is highly configurable, with many switches and settings that you can use to optimize the performance for specific use cases and scenarios. This section describes some common examples. Be aware that the flexibility that Elasticsearch provides in this respect comes with a warning, it is very easy to detune Elasticsearch and make performance worse. When tuning, only make one change at a time, and always measure the effects of any changes to ensure that they are not detrimental to your system. + +### Optimizing resources for indexing operations +The following list describes some points you should consider when tuning an Elasticsearch cluster to support large-scale data ingestion. The first two items are most likely to have an immediately obvious effect on performance while the remainder are more marginal, depending on the workload: + +* New documents added to an index only become visible to searches when the index is refreshed. Refreshing an index is an expensive operation, so it is only performed periodically rather than as each document is created. The default refresh interval is 1 second. If you are performing bulk operations, you should consider temporarily disabling index refreshes. Set the index *refresh\_interval* to -1. + + ```http + PUT /my_busy_index + { + "settings" : { + "refresh_interval": -1 + } + } + ``` + + Trigger a refresh manually by using the [*\_refresh*](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-refresh.html) API at the end of the operation to make the data visible. See [Bulk Indexing Usage](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-update-settings.html#bulk) for more information. Further details on the [impact of changing the refresh interval on data ingestion](#the-impact-of-changing-the-index-refresh-interval-on-data-ingestion-performance) are described later. +* If an index is replicated, each indexing operation (document create, update, or delete) is repeated on the replica shards as they occur in the primary shard. Consider disabling replication during bulk import operations and then reenable it when the import is complete: + + ```http + PUT /my_busy_index + { + "settings" : { + "number_of_replicas": 0 + } + } + ``` + + When you reenable replication, Elasticsearch performs a byte-for-byte network transfer of data from the index to each replica. This is more efficient than repeating the indexing process document by document on each node. The risk is that data can be lost of the primary node fails while performing the bulk import, but recovery may simply be a matter of starting the import again. The [impact of replication on data ingestion performance](#the-impact-of-replicas-on-data-ingestion-performance) is described in more detail later. +* Elasticsearch attempts to balance the resources available between those required for querying and those required for ingesting data. As a result, it may throttle data ingestion performance (throttling events are recorded in the Elasticsearch log). This restriction is intended to prevent a large number of index segments from being created concurrently that require merging and saving to disk, a process that can monopolize resources. If your system is not currently performing queries, you can disable data ingestion throttling. This should allow indexing to maximize performance. You can disable throttling for an entire cluster as follows: + + ```http + PUT /_cluster/settings + { + "transient" : { + "indices.store.throttle.type": "none" + } + } + ``` + + Set the throttle type of the cluster back to *"merge"* when ingestion has completed. Also note that disabling throttling may lead to instability in the cluster, so ensure that you have procedures in place that can recover the cluster if necessary. +* Elasticsearch reserves a proportion of the heap memory for indexing operations, the remainder is mostly used by queries and searches. The purpose of these buffers is to reduce the number of disk I/O operations, with the aim of performing fewer, larger writes than more, smaller writes. The default proportion of heap memory allocated is 10%. If you are indexing a large volume of data then this value might be insufficient. For systems that support high-volume data ingestion, you should allow up to 512MB of memory for each active shard in the node. For example, if you are running Elasticsearch on D4 VMs (28GB RAM) and have allocated 50% of the available memory to the JVM (14GB), then 1.4GB will be available for use by indexing operations. If a node contains 3 active shards, then this configuration is probably sufficient. However, if a node contains more shards than this, consider increasing the value of the *indices.memory.index\_buffer\_size* parameter in the elasticsearch.yml configuration file. For more information, see [Performance Considerations for Elasticsearch Indexing](https://www.elastic.co/blog/performance-considerations-elasticsearch-indexing). + + Allocating more than 512MB per active shard will most likely not improve indexing performance and may actually be detrimental as less memory is available for performing other tasks. Also be aware that allocating more heap space for index buffers removes memory for other operations such as searching and aggregating data, and can slow the performance of query operations. +* Elasticsearch restricts the number of threads (the default value is 8) that can concurrently perform indexing operations in a shard. If a node only contains a small number of shards, then consider increasing the *index\_concurrency* setting for an index that is subject to a large volume of indexing operations, or is the target of a bulk insert, as follows: + + ```http + PUT /my_busy_index + { + "settings" : { + "index_concurrency": 20 + } + } + ``` +* If you are performing a large number of indexing and bulk operations for a short period of time, you can increase the number of *index* and *bulk* threads available in the thread pool and extend the size of the *bulk insert* queue for each data node. This will allow more requests to be queued rather than being discarded. For more information, see [Thread Pool](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-threadpool.html). If you are performing sustained high levels of data ingestion, then increasing the number of bulk threads is not recommended. Instead create additional nodes and use sharding to distribute the indexing load across these nodes. Alternatively, consider sending bulk insert batches serially rather than in parallel as this will act as a natural throttling mechanism that can reduce the chances of the errors due to a bulk insert queue overflowing. + +### The impact of changing the index refresh interval on data ingestion performance +The refresh interval governs the rate at which ingested data becomes visible to queries and aggregations, but frequent refreshes can impact the performance of data ingestion operations. The default refresh interval is 1 second. You can disable refreshing completely, but this might not be appropriate to your workload. You can experiment by trying different intervals and establishing the sweet spot that balances ingestion performance against the need to present up to date information. + +As an example of the impact, the data ingestion performance test was repeated on an Elasticsearch cluster consisting of 7 shards spread across 3 data nodes. The index had a single replica. Each data node was based on a D4 VM (28GB RAM, 8 processor cores) using SSD-backed ephemeral storage to hold the data. Each test ran for 1 hour. + +In this test, the refresh rate was set to the default value of 1 second. The following table shows the throughput and response times for this test compared to a separate run where the refresh rate was reduced to once every 30 seconds. + +| Refresh rate | Sample count | Average response time – successful operations (ms) | Throughput – successful operations (operations/s) | +| --- | --- | --- | --- | +| 1 second |93755 |460 |26.0 | +| 30 seconds |117758 |365 |32.7 | + +In this test, dropping the refresh rate resulted in an 18% improvement in throughput, and a 21% reduction in average response time. The following graphs generated using Marvel illustrate the primary reason for this difference. The figures below show the index merge activity that occurred with the refresh interval set to 1 second and 30 seconds. + +Index merges are performed to prevent the number of in-memory index segments from becoming too numerous. A 1 second refresh interval generates a large number of small segments which have to be merged frequently, whereas a 30 second refresh interval generates fewer large segments which can be merged more optimally. + +![Index merge activity](./images/data-ingestion-image15.png) + +***Index merge activity for an index refresh rate of 1 second*** + +![Index merge activity](./images/data-ingestion-image16.png) + +***Index merge activity for an index refresh rate of 30 seconds*** + +### The impact of replicas on data ingestion performance +Replicas are an essential feature of any resilient cluster, and without using them you risk losing information if a node fails. However, replicas increase the amount of disk and network I/O being performed and can be detrimental to the rate at which data is ingested. For reasons described earlier, it may be beneficial to temporarily disable replicas for the duration of large-scale data upload operations. + +Data ingestion performance tests were repeated using three configurations: + +* Using a cluster with no replicas. +* Using a cluster with 1 replica. +* Using a cluster with 2 replicas. + +In all cases, the cluster contained 7 shards spread across 3 nodes and ran on VMs configured as described in the previous set of tests. The test index used a refresh interval of 30 seconds. + +The following table summarizes the response times and throughput of each test for comparison purposes: + +| Configuration | Sample count | Average response time – successful operations (ms) | Throughput – successful operations (operations/s) | Data Ingestion errors | +| --- | --- | --- | --- | --- | +| 0 replicas |215451 |200 |59.8 |0 | +| 1 replica |117758 |365 |32.7 |0 | +| 2 replicas |94218 |453 |26.1 |194262 | + +The decline in performance as the number of replicas increases is clear, but you should also notice the large volume of data ingestion errors in the third test. The messages generated by these errors indicated that they were due to the bulk insert queue overflowing causing requests to be rejected. These rejections occurred very quickly, that's why the number is large. + +> [!NOTE] +> The results of the third test highlight the importance of using an intelligent retry strategy when transient errors such as this occur—back off for a short period to allow the bulk insert queue to drain before reattempting to repeat the bulk insert operation. +> +> + +The next sets of graphs compare the response times during the tests. In each case the first graph shows the overall response times, while the second graph zooms in on the response times for the fastest operations (note that the scale of the first graph is ten times that of the second). You can see how the profile of the response times varies across the three tests. + +With no replicas, most operations took between 75ms and 750ms, with the quickest response times around 25ms: + +![Response times graph](./images/data-ingestion-image17.png) + +With 1 replica the most populated operational response time was in the range 125ms to 1250ms. The quickest responses took approximately 75ms, although there were fewer of these quick responses than in the 0 replicas case. There were also far more responses that took significantly longer than the most common cases, in excess of 1250ms: + +![Response times graph](./images/data-ingestion-image18.png) + +With 2 replicas, the most populated response time range was 200ms to 1500ms, but there were far fewer results below the minimum range than in the 1 replica test. However, the pattern of results above the upper limit were very similar to that of the 1 replica test. This is most likely due to the effects of the bulk insert queue overflowing (exceeding a queue length of 50 requests). The additional work required to maintain 2 replicas causes the queue to overflow more frequently, preventing ingestion operations from having excessive response times. Operations are rejected quickly rather than taking a lengthy period of time, possibly causing timeout exceptions or impacting the responsiveness of client applications (this is the purpose of the bulk insert queue mechanism): + +![Response times graph](./images/data-ingestion-image19.png) + +Using Marvel, you can see the effect of the number of replicas on the bulk index queue. The figure below shows the data from Marvel that depicts how the bulk insert queue filled during the test. The average queue length was around 40 requests, but periodic bursts caused it to overflow and requests were rejected as a result: + +![Marvel screenshot](./images/data-ingestion-image20.png) + +***Bulk index queue size and number of requests rejected with 2 replicas.*** + +You should compare this with the figure below which shows the results for a single replica. The Elasticsearch engine was able to process requests quickly enough to keep the average queue length at around 25, and at no point did the queue length exceed 50 requests so no work was rejected. + +![Marvel screenshot](./images/data-ingestion-image21.png) + +***Bulk index queue size and number of requests rejected with 1 replica.*** + +## Best practices for clients sending data to Elasticsearch +Many aspects of performance are concerned not only internally within the system but with how the system is used by client applications. Elasticsearch provides many features that can be utilized by the data ingestion process; generating unique identifiers for documents, performing document analysis, and even using scripting to transform the data as it is stored are some examples. However, these functions all add to the load on the Elasticsearch engine, and in many cases can be performed more efficiently by client applications prior to transmission. + +> [!NOTE] +> This list of best practices is primarily concerned with ingesting new data rather modifying existing data already stored in an index. Ingestion workloads are performed as append operations by Elasticsearch, whereas data modifications are performed as delete/append operations. This is because documents in an index are immutable, so modifying a document involves replacing the entire document with a new version. You can either perform an HTTP PUT request to overwrite an existing document, or you can use the Elasticsearch *update* API that abstracts a query to fetch an existing document, merges the changes, and then performs a PUT to store the new document. +> +> + +Additionally, consider implementing the following practices where appropriate: + +* Disable text analysis for index fields that do not need to be analyzed. Analysis involves tokenizing text to enable queries that can search for specific terms. However, it can be a CPU-intensive task, so be selective. If you are using Elasticsearch to store log data, it might be useful to tokenize the detailed log messages to allow complex searches. Other fields, such as those containing error codes or identifiers should probably not be tokenized (how frequently are you likely to request the details of all messages whose error code contains a "3", for example?) The following code disables analysis for the *name* and *hostip* fields in the *logs* type of the *systembase* index. + + ```http + PUT /systembase + { + "settings" : { + ... + }, + "logs" : { + ... + "name": { + "type": "string", + "index" : "not_analyzed" + }, + "hostip": { + "type": "string", + "index" : "not_analyzed" + }, + ... + } + } + ``` +* Disable the *_all* field of an index if it is not required. The *\_all* field concatenates the values of the other fields in the document for analysis and indexing. It is useful for performing queries that can match against any field in a document. If clients are expected to match against named fields, then enabling *\_all* simply incurs CPU and storage overhead. The following example shows how to disable the *\_all* field for the *logs* type in the *systembase* index. + + ```http + PUT /systembase + { + "settings" : { + ... + }, + "logs" : { + "_all": { + "enabled" : false + }, + ..., + ... + } + } + ``` + + Note that you can create a selective version of *\_all* that only contains information from specific fields. For more information, see [Disabling the \_all Field](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-all-field.html#disabling-all-field). +* Avoid dynamic mappings in indexes. Dynamic mapping is a powerful feature, but adding new fields to an existing index requires coordinating changes to the index structure across nodes and can temporarily cause the index to be locked. Dynamic mapping can also lead to an explosion in the number of fields and the consequent volume of index metadata if not used carefully. In turn, this results in increased storage requirements and I/O, both for ingesting data and when performing queries. Both of these issues will impact performance. Consider disabling dynamic mapping and define your index structures explicitly. For more information, see [Dynamic Field Mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-field-mapping.html#dynamic-field-mapping). +* Understand how to balance the workload to meet conflicting requirements. You should always consider that data ingestion can have a significant impact on the performance of other concurrent operations, such as users performing queries. Data ingestion might be subject to sudden bursts, and if the system attempts to consume all the data arriving immediately the influx could cause query rates to slow down to a trickle. Elasticsearch tries to prevent this situation from occurring by regulating the rate at which it will process ingestion requests through the bulk insert queue (refer to the section [Determining limiting factors – CPU utilization](#determining-limiting-factors-cpu-utilization) for further information), but this mechanism should really be treated as a last resort. If your application code is not prepared to handle rejected requests you risk losing data. Instead, consider using a pattern such as [Queue-Based Load Levelling](https://msdn.microsoft.com/library/dn589783.aspx) to control the rate at which data is passed to Elasticsearch. +* Ensure that your cluster has sufficient resources to handle the workload, especially if indexes are configured with multiple replicas. +* Use the bulk insert API to upload large batches of documents. Size bulk requests appropriately. Sometimes bigger batches are not better for performance, and can cause Elasticsearch threads and other resources to become overloaded, delaying other concurrent operations. The documents in a bulk insert batch are held in memory at the coordinating node while the operation is performed. The physical size of each batch is more important than the document count. There is no hard and fast rule as to what constitutes the ideal batch size, although Elasticsearch documentation recommends using between 5MB and 15MB as a starting point for your own investigations. Conduct performance testing to establish the optimal batch size for your own scenarios and workload mix. +* Make sure that bulk insert requests are distributed across nodes rather than directed to a single node. Directing all requests to a single node can cause memory exhaustion as each bulk insert request being processed is stored in memory in the node. It can also increase network latency as requests are redirected to other nodes. +* Elasticsearch uses a quorum consisting of a majority of the primary and replica nodes when writing data. A write operation is not completed until the quorum reports success. This approach helps to ensure that data is not written if a majority of the nodes are unavailable due to a network partition (failure) event. Using a quorum can slow the performance of write operations. You can disable quorum-based writing by setting the *consistency* parameter to *one* when writing data. The following example adds a new document but completes as soon as the write to the primary shard has completed. + + ```http + PUT /my_index/my_data/104?consistency=one + { + "name": "Bert", + "age": 23 + } + ``` + + Note that as with asynchronous replication, disabling quorum-based writing can lead to inconsistencies between the primary shard and each of the replicas. +* When using quorums, Elasticsearch will wait if insufficient nodes are available before determining that a write operation should be cancelled because a quorum cannot be reached. This wait period is determined by the timeout query parameter (the default is 1 minute). You can modify this setting by using the timeout query parameter. The example below creates a new document and waits for a maximum of 5 seconds for the quorum to respond before cancelling: + + ```http + PUT /my_index/my_data/104?timeout=5s + { + "name": "Sid", + "age": 27 + } + ``` + + Elasticsearch also lets you use your own version numbers [generated externally](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#_version_types). +* Consider disabling the *\_source* field of an index. This field contains a copy of the original JSON document that was used when a document is stored. Saving this field incurs additional storage costs and disk I/O. However, these costs may be marginal depending on the document structure, and you should also be aware that disabling the *\_source* field prevents a client from being able to perform the following operations: + + * Using the Update API to modify a document. + * Performing on the fly highlighting when running queries. + * Reindexing data. + * Debugging queries and aggregations by viewing the original document. + + The following example disables the *\_source* field for the *logs* type in the *systembase* index. + + ```http + PUT /systembase + { + "settings" : { + ... + }, + "logs" : { + "_source": { + "enabled": false + }, + ..., + ... + } + } + ``` + +## General guidelines for conducting data ingestion performance testing with Elasticsearch +The following points highlight some of the items you should consider when running performance tests with Elasticsearch and analyzing the results. + +* Performance testing is necessarily time consuming and costly. At a minimum, gather statistics that measure transfer rates on disk and network, CPU utilization, CPU wait times and disk latency (if possible). This can give you fast feedback on your test effort with good return of investment. +* Leverage any scripting capabilities provided by your load testing tool to collect metrics that might not otherwise be available. For example, Linux has a variety of reliable performance statistics that you can gather using utilities such as *vmstat* and *iostat*. You can use scripting with JMeter to capture this data as part of a test plan. +* Performance engineering is mostly about analyzing statistics based on reliable and repeatable data. Do not stop at high-level metrics that will not give the necessary insights. Educate yourself with the data and make performance engineering a dev ops process with a fast feedback loop. Always look at the statistics comparing trends and past results/configurations. By doing this on a regular basis you will generate data that you will understand, is repeatable with your workloads, and with which you will be able to assess the effects of changes in configuration and deployment. +* Use a tool such as Marvel to monitor cluster and node performance while testing to gain additional insights. JMeter can be effective for capturing raw data for subsequent analysis, but using Marvel can give you a real-time feel for how performance is faring and the possible causes of glitches and slow downs. Additionally, many load test tools do not provide visibility to the inner metrics of Elasticsearch. Use and compare indexing throughput rates, merge segment counts, garbage collector (GC) statistics, and throttling times available in index statistics. Repeat this analysis on a regular basis. +* Compare your load test tool statistics with node statistics in Marvel (disk and network traffic, CPU utilization, memory and thread pool usage) to understand the correlation pattern between the figures reported by the infrastructure and specific Elasticsearch statistics. +* As a general rule consider *one node one shard* as the baseline for performance testing and assess application costs by adding nodes. However, do not depend completely on extrapolating performance based on a small number of nodes and shards. Synchronization and communication costs in the cluster can have unpredictable effects the larger the number of nodes and shards. +* Look at the shard allocation across nodes to compare the statistics. Some nodes will have fewer replicas and shards which will create an imbalance of resource utilization. +* If you are performing load testing, increase the number of threads that your test tool uses to submit work to the cluster until errors occur. For sustainable throughput testing, consider maintaining your test level below the expected peak load. If the error rate exceeds the ceiling, errors will incur cost on backend resources due to recoverability. In these situations, throughput will inevitably diminish. +* To simulate how your system reacts to an unexpectedly large burst of activity, consider running tests that generate an error rate that exceeds your expected peak load. This will give you throughput figures not only in terms of capacity but also the cost of recoverability. +* Use a document count to assess your performance profile, and recycle documents following your workload patterns. Consider that as more documents are added, the performance profile might change. +* Be aware of the SLAs for IOPS and transfer rates limits for the storage you are using. Different storage types (SSD, spinning media) have different transfer rates. +* Remember that CPU performance can drop not only because of disk and network activity, but because backend applications can use locking and communication mechanisms with distributed processing that may cause underutilization of the processor. +* Run performance tests for at least two hours (not a few minutes). Indexing can affect performance in ways which may not be visible immediately. For example, JVM garbage collection statistics and indexing merges can change the performance profile over time. +* Consider how index refreshes might big impact data ingestion throughput and throttling with a cluster. + +## Summary +It is important to understand how to scale your solution as the volumes of data and the number of requests increases. Elasticsearch running on Azure enables vertical and horizontal scaling. You can run on bigger VMs with more resources, and you can distribute an Elasticsearch cluster across a network of VMs. The range of options can be confusing. Is it more cost-effective to implement a cluster on a large number of small VMs, on a cluster with a small number of large VMs, or somewhere in the middle? Also, how many shards should each index contain, and what are the tradeoffs concerning data ingestion versus query performance? The way in which shards are distributed across nodes can have a significant impact on the data ingestion throughput. Using more shards can reduce the amount of internal contention that occurs within a shard, but you must balance this benefit with the overhead that using many shards can impose on a cluster. To answer these questions effectively, you should be prepared to test your system to determine the most appropriate strategy. + +For data ingestion workloads, the performance of the disk I/O subsystem is a critical factor. Using SSDs can boost throughput by reducing the disk latency of write operations. If you don't need vast amounts of disk space on a node, consider using standard VMs with ephemeral storage in place of more expensive VMs that support premium storage. + +## Appendix: the bulk load data ingestion performance test +This appendix describes the performance test performed against the Elasticsearch cluster. The tests were run by using JMeter running on a separate set of VMs. Details the configuration of the test environment are described in [Creating a performance testing environment for Elasticsearch on Azure][Creating a performance testing environment for Elasticsearch on Azure]. To perform your own testing, you can create your own JMeter test plan manually, or you can use the automated test scripts available separately. See [Running the automated Elasticsearch performance tests][Running the automated Elasticsearch performance tests] for further information. + +The data ingestion workload performed a large-scale upload of documents by using the bulk insert API. The purpose of this index was to simulate a repository receiving log data representing system events for subsequent search and analysis. Each document was stored in a single index named *systembase*, and had the type *logs*. All documents had the same fixed schema described by the following table: + +| Field | Datatype | Example | +| --- | --- | --- | +| @timestamp |datetime |2013-12-11T08:01:45.000Z | +| name |string |checkout.payment | +| message |string |Incoming request message | +| severityCode |integer |1 | +| severity |string |info | +| hostname |string |sixshot | +| hostip |string (ip address) |10.0.0.4 | +| pid |int |123 | +| tid |int |4325 | +| appId |string (uuid) |{00000000-0000-0000-000000000000} | +| appName |string |mytestapp | +| appVersion |string |0.1.0.1234 | +| type |int |5 | +| subtype |int |1 | +| correlationId |guid |{00000000-0000-0000-000000000000} | +| os |string |Linux | +| osVersion |string |4.1.1 | +| parameters |[ ] |{key:value,key:value} | + +You can use the following request to create the index. The *number\_of\_replicas*, *refresh\_interval*, and *number\_of\_shards* settings varied from the values shown below in many of the tests. + +> [!IMPORTANT] +> The index was dropped and recreated prior to each test run. +> +> + +```http +PUT /systembase +{ + "settings" : { + "number_of_replicas": 1, + "refresh_interval": "30s", + "number_of_shards": "5" + }, + "logs" : { + "properties" : { + "@timestamp": { + "type": "date", + "index" : "not_analyzed" + }, + "name": { + "type": "string", + "index" : "not_analyzed" + }, + "message": { + "type": "string", + "index" : "not_analyzed" + }, + "severityCode": { + "type": "integer", + "index" : "not_analyzed" + }, + "severity": { + "type": "string", + "index" : "not_analyzed" + }, + "hostname": { + "type": "string", + "index" : "not_analyzed" + }, + "hostip": { + "type": "string", + "index" : "not_analyzed" + }, + "pid": { + "type": "integer", + "index" : "not_analyzed" + }, + "tid": { + "type": "integer", + "index" : "not_analyzed" + }, + "appId": { + "type": "string", + "index" : "not_analyzed" + }, + "appName": { + "type": "string", + "index" : "not_analyzed" + }, + "appVersion": { + "type": "integer", + "index" : "not_analyzed" + }, + "type": { + "type": "integer", + "index" : "not_analyzed" + }, + "subtype": { + "type": "integer", + "index" : "not_analyzed" + }, + "correlationId": { + "type": "string", + "index" : "not_analyzed" + }, + "os": { + "type": "string", + "index" : "not_analyzed" + }, + "osVersion": { + "type": "string", + "index" : "not_analyzed" + }, + "parameters": { + "type": "string", + "index" : "not_analyzed" + } + } + } +} +``` + +Each bulk insert batch contained 1000 documents. Each document was generated based on a combination of random values for the *severityCode*, *hostname*, *hostip*, *pid*, *tid*, *appName*, *appVersion*, *type*, *subtype*, and *correlationId* fields, and a random selection of text from a fixed set of terms for the *name*, *message*, *severity*, *os*, *osVersion*, *parameters*, *data1*, and *data2* fields. The number of client application instances used to upload data was carefully selected to maximize successful input volume. Tests ran for two hours to enable the cluster to settle and reduce the influence of any temporary glitches in the overall results. In this time, some tests uploaded nearly 1.5 billion documents. + +The data was generated dynamically by using a custom JUnit request sampler that was added to a thread group in a JMeter test plan. The JUnit code was created by using the JUnit test case template in the Eclipse IDE. + +> [!NOTE] +> For information on how to create a JUnit test for JMeter, see [Deploying a JMeter JUnit sampler for testing Elasticsearch performance][Deploying a JMeter JUnit sampler for testing Elasticsearch performance]. +> +> + +The following snippet shows the Java code for testing Elasticsearch 1.7.3. Note that the JUnit test class in this example is named *ElasticsearchLoadTest2*: + +```java +/* Java */ +package elasticsearchtest2; + + import static org.junit.Assert.*; + + import org.junit.*; + + import java.util.*; + + import java.io.*; + + import org.elasticsearch.action.bulk.*; + import org.elasticsearch.common.transport.*; + import org.elasticsearch.client.transport.*; + import org.elasticsearch.common.settings.*; + import org.elasticsearch.common.xcontent.*; + + public class ElasticsearchLoadTest2 { + + private String [] names={"checkout","order","search","payment"}; + private String [] messages={"Incoming request from code","incoming operation succeeded with code","Operation completed time","transaction performed"}; + private String [] severity={"info","warning","transaction","verbose"}; + private String [] apps={"4D24BD62-20BF-4D74-B6DC-31313ABADB82","5D24BD62-20BF-4D74-B6DC-31313ABADB82","6D24BD62-20BF-4D74-B6DC-31313ABADB82","7D24BD62-20BF-4D74-B6DC-31313ABADB82"}; + + private String hostname = ""; + private String indexstr = ""; + private String typestr = ""; + private int port = 0; + private int itemsPerInsert = 0; + private String clustername = ""; + private static Random rand=new Random(); + + @Before + public void setUp() throws Exception { + } + + public ElasticsearchLoadTest2(String paras) { + \* Paras is a string containing a set of comma separated values for: + hostname + indexstr + typestr + port + clustername + node + itemsPerInsert + */ + + // Note: No checking/validation is performed + + String delims = "[ ]*,[ ]*"; // comma surrounded by zero or more spaces + String[] items = paras.split(delims); + + hostname = items[0]; + indexstr = items[1]; + typestr = items[2]; + port = Integer.parseInt(items[3]); + clustername = items[4]; + itemsPerInsert = Integer.parseInt(items[5]); + + if (itemsPerInsert == 0) + itemsPerInsert = 1000; + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void BulkBigInsertTest() throws IOException { + + Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", clustername).build(); + + TransportClient client; + client = new TransportClient(settings); + + try { + client.addTransportAddress(new InetSocketTransportAddress(hostname, port)); + BulkRequestBuilder bulkRequest = client.prepareBulk(); + Random random = new Random(); + char[] exmarks = new char[12000]; + Arrays.fill(exmarks, 'x'); + String dataString = new String(exmarks); + + for(int i=1; i < itemsPerInsert; i++){ + random.nextInt(10); + int host=random.nextInt(20); + + bulkRequest.add(client.prepareIndex(indexstr, typestr).setSource(XContentFactory.jsonBuilder().startObject() + .field("@timestamp", new Date()) + .field("name", names[random.nextInt(names.length)]) + .field("message", messages[random.nextInt(messages.length)]) + .field("severityCode", random.nextInt(10)) + .field("severity", severity[random.nextInt(severity.length)]) + .field("hostname", "Hostname"+host) + .field("hostip", "10.1.0."+host) + .field("pid",random.nextInt(10)) + .field("tid",random.nextInt(10)) + .field("appId", apps[random.nextInt(apps.length)]) + .field("appName", "application" + host) + .field("appVersion", random.nextInt(5)) + .field("type", random.nextInt(6)) + .field("subtype", random.nextInt(6)) + .field("correlationId", UUID.randomUUID().toString()) + .field("os", "linux") + .field("osVersion", "14.1.5") + .field("parameters", "{key:value,key:value}") + .field("data1",dataString) + .field("data2",dataString) + .endObject())); + } + + BulkResponse bulkResponse = bulkRequest.execute().actionGet(); + assertFalse(bulkResponse.hasFailures()); + } + finally { + client.close(); + } + } + + @Test + public void BulkDataInsertTest() throws IOException { + Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", clustername).build(); + + TransportClient client; + client = new TransportClient(settings); + + try { + client.addTransportAddress(new InetSocketTransportAddress(hostname, port)); + BulkRequestBuilder bulkRequest = client.prepareBulk(); + + for(int i=1; i< itemsPerInsert; i++){ + rand.nextInt(10); + int host=rand.nextInt(20); + + bulkRequest.add(client.prepareIndex(indexstr, typestr).setSource(XContentFactory.jsonBuilder().startObject() + .field("@timestamp", new Date()) + .field("name", names[rand.nextInt(names.length)]) + .field("message", messages[rand.nextInt(messages.length)]) + .field("severityCode", rand.nextInt(10)) + .field("severity", severity[rand.nextInt(severity.length)]) + .field("hostname", "Hostname" + host) + .field("hostip", "10.1.0."+host) + .field("pid",rand.nextInt(10)) + .field("tid",rand.nextInt(10)) + .field("appId", apps[rand.nextInt(apps.length)]) + .field("appName", "application"+host) + .field("appVersion", rand.nextInt(5)) + .field("type", rand.nextInt(6)) + .field("subtype", rand.nextInt(6)) + .field("correlationId", UUID.randomUUID().toString()) + .field("os", "linux") + .field("osVersion", "14.1.5") + .field("parameters", "{key:value,key:value}") + .endObject())); + } + + BulkResponse bulkResponse = bulkRequest.execute().actionGet(); + assertFalse(bulkResponse.hasFailures()); + } + finally { + client.close(); + } + } + } +``` + +The private *String* arrays *names*, *messages*, *severity*, and *apps* contain a small set of values that items are selected from at random. The remaining data items for each document are generated at runtime. + +The constructor that takes the *String* parameter is invoked from JMeter, and the values passed in the string are specified as part of the JUnit Request sampler configuration. For this JUnit test, the *String* parameter is expected to contain the following information: + +* **Hostname**. This is the name or IP address of the Azure load balancer. The load balancer attempts to distribute the request across the data nodes in the cluster. If you are not using a load balancer you can specify the address of a node in the cluster, but all requests will be directed to that node and that might result in it becoming a bottleneck. +* **Indexstr**. This is the name of the index that the data generated by the JUnit test is added to. If you created the index as described above, this value should be *systembase*. +* **Typestr**. This is the type in the index where the data is stored. If you created the index as described above, this value should be *logs*. +* **Port**. This is the port to connect to on the host. In most cases this should be set to 9300 (the port used by Elasticsearch to listen for client API requests, port 9200 is only used for HTTP requests). +* **Clustername**. This is the name of the Elasticsearch cluster containing the index. +* **ItemsPerInsert**. This is a numeric parameter indicating the number of documents to add in each bulk insert batch. The default batch size is 1000. + +You specify the data for the constructor string in the JUnit Request page used to configure the JUnit sampler in JMeter. The following image shows an example: + +![JUnit Request UI](./images/data-ingestion-image22.png) + +The *BulkInsertTest* and *BigBulkInsertTest* methods perform the actual work of generating and uploading the data. Both methods are very similar. They connect to the Elasticsearch cluster and then create a batch of documents (as determined by the *ItemsPerInsert* constructor string parameter). The documents are added to the index using the Elasticsearch Bulk API. The difference between the two methods is that the *data1* and *data2* string fields in each document are omitted from the upload in the *BulkInsertTest* method, but are filled in with strings of 12000 characters in the *BigBulkInsertTest* method. Note that you select which of these methods to run using the *Test Method* box in the JUnit Request page in JMeter (highlighted in the previous figure). + +> [!NOTE] +> The sample code presented here uses the Elasticsearch 1.7.3 Transport Client library. If you are using Elasticsearch 2.0.0 or later, you must use the appropriate library for the version selected. For more information about the Elasticsearch 2.0.0 Transport Client library, see the [Transport Client](https://www.elastic.co/guide/en/elasticsearch/client/java-api/2.0/transport-client.html) page on the Elasticsearch website. +> +> + +[Configuring Resilience and Recovery on Elasticsearch on Azure]: resilience-and-recovery.md +[Creating a Performance Testing Environment for Elasticsearch on Azure]: performance-testing-environment.md +[Running the Automated Elasticsearch Performance Tests]: automated-performance-tests.md +[Deploying a JMeter JUnit Sampler for Testing Elasticsearch Performance]: jmeter-junit-sampler.md diff --git a/docs/elasticsearch/images/data-ingestion-image1.png b/docs/elasticsearch/images/data-ingestion-image1.png new file mode 100644 index 00000000000..dd388728393 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image1.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image10.png b/docs/elasticsearch/images/data-ingestion-image10.png new file mode 100644 index 00000000000..f03e4d3dbff Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image10.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image11.png b/docs/elasticsearch/images/data-ingestion-image11.png new file mode 100644 index 00000000000..4bc967fd39a Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image11.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image12.png b/docs/elasticsearch/images/data-ingestion-image12.png new file mode 100644 index 00000000000..fd2d7485a34 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image12.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image13.png b/docs/elasticsearch/images/data-ingestion-image13.png new file mode 100644 index 00000000000..09baaba1c5b Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image13.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image14.png b/docs/elasticsearch/images/data-ingestion-image14.png new file mode 100644 index 00000000000..467841fae82 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image14.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image15.png b/docs/elasticsearch/images/data-ingestion-image15.png new file mode 100644 index 00000000000..7bc79e0ca07 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image15.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image16.png b/docs/elasticsearch/images/data-ingestion-image16.png new file mode 100644 index 00000000000..1c3e07d7f01 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image16.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image17.png b/docs/elasticsearch/images/data-ingestion-image17.png new file mode 100644 index 00000000000..156eb6c265f Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image17.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image18.png b/docs/elasticsearch/images/data-ingestion-image18.png new file mode 100644 index 00000000000..4c7bd46c9cf Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image18.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image19.png b/docs/elasticsearch/images/data-ingestion-image19.png new file mode 100644 index 00000000000..a34da0318c1 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image19.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image2.png b/docs/elasticsearch/images/data-ingestion-image2.png new file mode 100644 index 00000000000..817a46e81ed Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image2.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image20.png b/docs/elasticsearch/images/data-ingestion-image20.png new file mode 100644 index 00000000000..820a7ebb037 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image20.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image21.png b/docs/elasticsearch/images/data-ingestion-image21.png new file mode 100644 index 00000000000..640405c6ad7 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image21.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image22.png b/docs/elasticsearch/images/data-ingestion-image22.png new file mode 100644 index 00000000000..e0a7300d1a3 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image22.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image3.png b/docs/elasticsearch/images/data-ingestion-image3.png new file mode 100644 index 00000000000..0444b19af23 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image3.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image4.png b/docs/elasticsearch/images/data-ingestion-image4.png new file mode 100644 index 00000000000..19e196733ab Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image4.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image5.png b/docs/elasticsearch/images/data-ingestion-image5.png new file mode 100644 index 00000000000..caf4e335757 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image5.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image6.png b/docs/elasticsearch/images/data-ingestion-image6.png new file mode 100644 index 00000000000..f5d87b753d2 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image6.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image7.png b/docs/elasticsearch/images/data-ingestion-image7.png new file mode 100644 index 00000000000..c47bffc7032 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image7.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image8.png b/docs/elasticsearch/images/data-ingestion-image8.png new file mode 100644 index 00000000000..f724b1af157 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image8.png differ diff --git a/docs/elasticsearch/images/data-ingestion-image9.png b/docs/elasticsearch/images/data-ingestion-image9.png new file mode 100644 index 00000000000..4a4bf3bef51 Binary files /dev/null and b/docs/elasticsearch/images/data-ingestion-image9.png differ diff --git a/docs/elasticsearch/images/general-clientappinstances.png b/docs/elasticsearch/images/general-clientappinstances.png new file mode 100644 index 00000000000..4d03f6f21c2 Binary files /dev/null and b/docs/elasticsearch/images/general-clientappinstances.png differ diff --git a/docs/elasticsearch/images/general-cluster1.png b/docs/elasticsearch/images/general-cluster1.png new file mode 100644 index 00000000000..8c90ea546da Binary files /dev/null and b/docs/elasticsearch/images/general-cluster1.png differ diff --git a/docs/elasticsearch/images/general-cluster2.png b/docs/elasticsearch/images/general-cluster2.png new file mode 100644 index 00000000000..0d986bedfb5 Binary files /dev/null and b/docs/elasticsearch/images/general-cluster2.png differ diff --git a/docs/elasticsearch/images/general-developmentconfiguration.png b/docs/elasticsearch/images/general-developmentconfiguration.png new file mode 100644 index 00000000000..bd3b2ca539c Binary files /dev/null and b/docs/elasticsearch/images/general-developmentconfiguration.png differ diff --git a/docs/elasticsearch/images/general-startingpoint.png b/docs/elasticsearch/images/general-startingpoint.png new file mode 100644 index 00000000000..81494100d79 Binary files /dev/null and b/docs/elasticsearch/images/general-startingpoint.png differ diff --git a/docs/elasticsearch/images/general-threenodecluster.png b/docs/elasticsearch/images/general-threenodecluster.png new file mode 100644 index 00000000000..226bd0215c9 Binary files /dev/null and b/docs/elasticsearch/images/general-threenodecluster.png differ diff --git a/docs/elasticsearch/images/general-tribenode.png b/docs/elasticsearch/images/general-tribenode.png new file mode 100644 index 00000000000..7cdf3cc3428 Binary files /dev/null and b/docs/elasticsearch/images/general-tribenode.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy10.png b/docs/elasticsearch/images/jmeter-deploy10.png new file mode 100644 index 00000000000..8c724172770 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy10.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy11.png b/docs/elasticsearch/images/jmeter-deploy11.png new file mode 100644 index 00000000000..b02d566dfb9 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy11.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy12.png b/docs/elasticsearch/images/jmeter-deploy12.png new file mode 100644 index 00000000000..cefba123422 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy12.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy13.png b/docs/elasticsearch/images/jmeter-deploy13.png new file mode 100644 index 00000000000..5d7494105b1 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy13.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy14.png b/docs/elasticsearch/images/jmeter-deploy14.png new file mode 100644 index 00000000000..a8da78815d0 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy14.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy15.png b/docs/elasticsearch/images/jmeter-deploy15.png new file mode 100644 index 00000000000..4cf73bf5f0b Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy15.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy16.png b/docs/elasticsearch/images/jmeter-deploy16.png new file mode 100644 index 00000000000..7ae40cf588f Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy16.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy17.png b/docs/elasticsearch/images/jmeter-deploy17.png new file mode 100644 index 00000000000..34698852f42 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy17.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy18.png b/docs/elasticsearch/images/jmeter-deploy18.png new file mode 100644 index 00000000000..1f3698acbae Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy18.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy19.png b/docs/elasticsearch/images/jmeter-deploy19.png new file mode 100644 index 00000000000..71faf595f60 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy19.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy20.png b/docs/elasticsearch/images/jmeter-deploy20.png new file mode 100644 index 00000000000..826dfb60134 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy20.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy21.png b/docs/elasticsearch/images/jmeter-deploy21.png new file mode 100644 index 00000000000..75688517a5f Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy21.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy22.png b/docs/elasticsearch/images/jmeter-deploy22.png new file mode 100644 index 00000000000..acb45f53de5 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy22.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy23.png b/docs/elasticsearch/images/jmeter-deploy23.png new file mode 100644 index 00000000000..49b7d1bea91 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy23.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy24.png b/docs/elasticsearch/images/jmeter-deploy24.png new file mode 100644 index 00000000000..0375bb5d522 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy24.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy25.png b/docs/elasticsearch/images/jmeter-deploy25.png new file mode 100644 index 00000000000..62a72829029 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy25.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy26.png b/docs/elasticsearch/images/jmeter-deploy26.png new file mode 100644 index 00000000000..f06e99fa12f Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy26.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy27.png b/docs/elasticsearch/images/jmeter-deploy27.png new file mode 100644 index 00000000000..5e3a618446d Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy27.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy28.png b/docs/elasticsearch/images/jmeter-deploy28.png new file mode 100644 index 00000000000..d4c2218562d Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy28.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy29.png b/docs/elasticsearch/images/jmeter-deploy29.png new file mode 100644 index 00000000000..3b0ebbafccc Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy29.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy30.png b/docs/elasticsearch/images/jmeter-deploy30.png new file mode 100644 index 00000000000..fe6f7b608ac Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy30.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy31.png b/docs/elasticsearch/images/jmeter-deploy31.png new file mode 100644 index 00000000000..2567bb13f67 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy31.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy32.png b/docs/elasticsearch/images/jmeter-deploy32.png new file mode 100644 index 00000000000..ec9664db439 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy32.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy7.png b/docs/elasticsearch/images/jmeter-deploy7.png new file mode 100644 index 00000000000..2cde426b7b0 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy7.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy8.png b/docs/elasticsearch/images/jmeter-deploy8.png new file mode 100644 index 00000000000..4c5b2820bfc Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy8.png differ diff --git a/docs/elasticsearch/images/jmeter-deploy9.png b/docs/elasticsearch/images/jmeter-deploy9.png new file mode 100644 index 00000000000..47ced30e982 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-deploy9.png differ diff --git a/docs/elasticsearch/images/jmeter-testing1.png b/docs/elasticsearch/images/jmeter-testing1.png new file mode 100644 index 00000000000..759692370a0 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-testing1.png differ diff --git a/docs/elasticsearch/images/jmeter-testing2.png b/docs/elasticsearch/images/jmeter-testing2.png new file mode 100644 index 00000000000..99b69049228 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-testing2.png differ diff --git a/docs/elasticsearch/images/jmeter-testing3.png b/docs/elasticsearch/images/jmeter-testing3.png new file mode 100644 index 00000000000..3c457ff4484 Binary files /dev/null and b/docs/elasticsearch/images/jmeter-testing3.png differ diff --git a/docs/elasticsearch/images/jmeter-testing4.png b/docs/elasticsearch/images/jmeter-testing4.png new file mode 100644 index 00000000000..4c7e067723a Binary files /dev/null and b/docs/elasticsearch/images/jmeter-testing4.png differ diff --git a/docs/elasticsearch/images/performance-image17.png b/docs/elasticsearch/images/performance-image17.png new file mode 100644 index 00000000000..889bb597f1e Binary files /dev/null and b/docs/elasticsearch/images/performance-image17.png differ diff --git a/docs/elasticsearch/images/performance-image18.png b/docs/elasticsearch/images/performance-image18.png new file mode 100644 index 00000000000..bc7f3c7b3f1 Binary files /dev/null and b/docs/elasticsearch/images/performance-image18.png differ diff --git a/docs/elasticsearch/images/performance-image19.png b/docs/elasticsearch/images/performance-image19.png new file mode 100644 index 00000000000..c31c56fe643 Binary files /dev/null and b/docs/elasticsearch/images/performance-image19.png differ diff --git a/docs/elasticsearch/images/performance-image20.png b/docs/elasticsearch/images/performance-image20.png new file mode 100644 index 00000000000..ec1b24694cf Binary files /dev/null and b/docs/elasticsearch/images/performance-image20.png differ diff --git a/docs/elasticsearch/images/performance-image21.png b/docs/elasticsearch/images/performance-image21.png new file mode 100644 index 00000000000..271e9a7da8c Binary files /dev/null and b/docs/elasticsearch/images/performance-image21.png differ diff --git a/docs/elasticsearch/images/performance-image22.png b/docs/elasticsearch/images/performance-image22.png new file mode 100644 index 00000000000..3bc65d29016 Binary files /dev/null and b/docs/elasticsearch/images/performance-image22.png differ diff --git a/docs/elasticsearch/images/performance-structure.png b/docs/elasticsearch/images/performance-structure.png new file mode 100644 index 00000000000..c0f1c94fc11 Binary files /dev/null and b/docs/elasticsearch/images/performance-structure.png differ diff --git a/docs/elasticsearch/images/performance-telnet-server.png b/docs/elasticsearch/images/performance-telnet-server.png new file mode 100644 index 00000000000..8d98aa0326d Binary files /dev/null and b/docs/elasticsearch/images/performance-telnet-server.png differ diff --git a/docs/elasticsearch/images/performance-tests-image1.png b/docs/elasticsearch/images/performance-tests-image1.png new file mode 100644 index 00000000000..57bb8332c95 Binary files /dev/null and b/docs/elasticsearch/images/performance-tests-image1.png differ diff --git a/docs/elasticsearch/images/query-performance1.png b/docs/elasticsearch/images/query-performance1.png new file mode 100644 index 00000000000..60f783e391d Binary files /dev/null and b/docs/elasticsearch/images/query-performance1.png differ diff --git a/docs/elasticsearch/images/query-performance10.png b/docs/elasticsearch/images/query-performance10.png new file mode 100644 index 00000000000..93ab6ab90c6 Binary files /dev/null and b/docs/elasticsearch/images/query-performance10.png differ diff --git a/docs/elasticsearch/images/query-performance11.png b/docs/elasticsearch/images/query-performance11.png new file mode 100644 index 00000000000..98724f20998 Binary files /dev/null and b/docs/elasticsearch/images/query-performance11.png differ diff --git a/docs/elasticsearch/images/query-performance12.png b/docs/elasticsearch/images/query-performance12.png new file mode 100644 index 00000000000..3ee940889e8 Binary files /dev/null and b/docs/elasticsearch/images/query-performance12.png differ diff --git a/docs/elasticsearch/images/query-performance13.png b/docs/elasticsearch/images/query-performance13.png new file mode 100644 index 00000000000..91cca55a7f1 Binary files /dev/null and b/docs/elasticsearch/images/query-performance13.png differ diff --git a/docs/elasticsearch/images/query-performance14.png b/docs/elasticsearch/images/query-performance14.png new file mode 100644 index 00000000000..f3988e0a08a Binary files /dev/null and b/docs/elasticsearch/images/query-performance14.png differ diff --git a/docs/elasticsearch/images/query-performance15.png b/docs/elasticsearch/images/query-performance15.png new file mode 100644 index 00000000000..abe7126390f Binary files /dev/null and b/docs/elasticsearch/images/query-performance15.png differ diff --git a/docs/elasticsearch/images/query-performance16.png b/docs/elasticsearch/images/query-performance16.png new file mode 100644 index 00000000000..6f2692c28a8 Binary files /dev/null and b/docs/elasticsearch/images/query-performance16.png differ diff --git a/docs/elasticsearch/images/query-performance17.png b/docs/elasticsearch/images/query-performance17.png new file mode 100644 index 00000000000..ac4150927ad Binary files /dev/null and b/docs/elasticsearch/images/query-performance17.png differ diff --git a/docs/elasticsearch/images/query-performance18.png b/docs/elasticsearch/images/query-performance18.png new file mode 100644 index 00000000000..551ab2ef67e Binary files /dev/null and b/docs/elasticsearch/images/query-performance18.png differ diff --git a/docs/elasticsearch/images/query-performance19.png b/docs/elasticsearch/images/query-performance19.png new file mode 100644 index 00000000000..4c1a6123143 Binary files /dev/null and b/docs/elasticsearch/images/query-performance19.png differ diff --git a/docs/elasticsearch/images/query-performance2.png b/docs/elasticsearch/images/query-performance2.png new file mode 100644 index 00000000000..abd44bbe47b Binary files /dev/null and b/docs/elasticsearch/images/query-performance2.png differ diff --git a/docs/elasticsearch/images/query-performance3.png b/docs/elasticsearch/images/query-performance3.png new file mode 100644 index 00000000000..a4be04dc35c Binary files /dev/null and b/docs/elasticsearch/images/query-performance3.png differ diff --git a/docs/elasticsearch/images/query-performance4.png b/docs/elasticsearch/images/query-performance4.png new file mode 100644 index 00000000000..98724f20998 Binary files /dev/null and b/docs/elasticsearch/images/query-performance4.png differ diff --git a/docs/elasticsearch/images/query-performance5.png b/docs/elasticsearch/images/query-performance5.png new file mode 100644 index 00000000000..3ee940889e8 Binary files /dev/null and b/docs/elasticsearch/images/query-performance5.png differ diff --git a/docs/elasticsearch/images/query-performance6.png b/docs/elasticsearch/images/query-performance6.png new file mode 100644 index 00000000000..f0a2ec6628a Binary files /dev/null and b/docs/elasticsearch/images/query-performance6.png differ diff --git a/docs/elasticsearch/images/query-performance7.png b/docs/elasticsearch/images/query-performance7.png new file mode 100644 index 00000000000..3ead02d59c1 Binary files /dev/null and b/docs/elasticsearch/images/query-performance7.png differ diff --git a/docs/elasticsearch/images/query-performance8.png b/docs/elasticsearch/images/query-performance8.png new file mode 100644 index 00000000000..12814041e36 Binary files /dev/null and b/docs/elasticsearch/images/query-performance8.png differ diff --git a/docs/elasticsearch/images/query-performance9.png b/docs/elasticsearch/images/query-performance9.png new file mode 100644 index 00000000000..a1f2c42bd45 Binary files /dev/null and b/docs/elasticsearch/images/query-performance9.png differ diff --git a/docs/elasticsearch/images/resilience-testing1.png b/docs/elasticsearch/images/resilience-testing1.png new file mode 100644 index 00000000000..782674263ef Binary files /dev/null and b/docs/elasticsearch/images/resilience-testing1.png differ diff --git a/docs/elasticsearch/images/resilience-testing2.png b/docs/elasticsearch/images/resilience-testing2.png new file mode 100644 index 00000000000..22cdb05b5a0 Binary files /dev/null and b/docs/elasticsearch/images/resilience-testing2.png differ diff --git a/docs/elasticsearch/index.md b/docs/elasticsearch/index.md new file mode 100644 index 00000000000..fb507da5cee --- /dev/null +++ b/docs/elasticsearch/index.md @@ -0,0 +1,897 @@ +--- +title: Get started with Elasticsearch on Azure +description: 'How to install, configure, and run Elasticsearch on Azure.' +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.next: data-ingestion-performance +ms.assetid: 1d539b39-3c85-4e4b-98ae-ab06aed72fca +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +--- +# Run Elasticsearch on Azure + +This article describes how to implement an Elasticsearch cluster using Azure. It touches on best +practices for deploying an Elasticsearch cluster, concentrating on the various functional performance +and management requirements of your system, and considering how your requirements should drive the +configuration and topology that you select. + +> [!NOTE] +> This guidance assumes some basic familiarity with [Elasticsearch][Elasticsearch]. +> +> + +## The structure of Elasticsearch +Elasticsearch is a document database highly optimized to act as a search engine. Documents are +serialized in JSON format. Data is held in indexes, implemented by using [Apache Lucene][Apache Lucene], although +the details are abstracted from view and it is not necessary to fully understand Lucene in order to use +Elasticsearch. + +### Clusters, nodes, indexes, and shards +Elasticsearch implements a clustered architecture that uses sharding to distribute data across +multiple nodes, and replication to provide high availability. Documents are stored in indexes. The user can specify which fields in a document are used to uniquely identify it within an index, or the system can generate a key field and values automatically. The index is used to physically organize documents and is the principal means for locating documents. + +An index contains a set of shards. Documents are evenly dispersed across shards using a hashing +mechanism based on the index key values and the number of shards in the index. + +Indexes can be replicated. In this case each shard in the index is copied. Elasticsearch ensures that +each original shard for an index (referred to as a “primary shard”) and its replica always reside on +different nodes. When a document is added or modified, all write operations are performed on the primary shard first and +then at each replica. + +The figure below shows the essential aspects of an Elasticsearch cluster containing three nodes. An +index has been created that consists of two primary shards with two replicas for each shard (six shards +in all). + +![Three-node cluster](./images/general-cluster1.png) + +*A simple Elasticsearch cluster containing two primary nodes and two sets of replicas* + +In this cluster, primary shard 1 and primary shard 2 are located on separate nodes to help balance the +load across them. The replicas are similarly distributed. If a single node fails, the remaining nodes +have sufficient information to enable the system to continue functioning. If necessary, Elasticsearch +will promote a replica shard to become a primary shard if the corresponding primary shard is unavailable. + +### Node roles +The nodes in an Elasticsearch cluster can perform the following roles: + +* A **data node** which can hold one or more shards that contain index data. +* A **client node** that does not hold index data but that handles incoming requests made by client + applications to the appropriate data node. +* A **master node** that does not hold index data but that performs cluster management operations, such + as maintaining and distributing routing information around the cluster (the list of which nodes contain + which shards), determining which nodes are available, relocating shards as nodes appear and disappear, + and coordinating recovery after node failure. Multiple nodes can be configured as masters, but only + one will actually be elected to perform the master functions. If this node fails, another election + takes place and one of the other eligible master nodes will be elected and take over. + +> [!NOTE] +> The elected master node is critical to the well-being of the cluster. The other nodes +> ping it regularly to ensure that it is still available. If the elected master node is also acting as +> a data node, there is a chance that the node can become busy and fail to respond to these pings. In +> this situation, the master is deemed to have failed and one of the other master nodes is elected in its +> place. +> +> + + The figure below shows a topology containing a mixture of dedicated master, client, and data nodes in an Elasticsearch cluster. + +![Cluster with dedicated master, client, and data nodes](./images/general-cluster2.png) + +*An Elasticsearch cluster showing different types of nodes* + +### Costs and benefits of using client nodes +When an application submits a query to an Elasticsearch cluster, the node to which the application +connects is responsible for directing the query process. The node forwards the request to each data node +and gathers the results, returning the accumulated information to the application. If a query involves +aggregations and other computations, the node to which the application connects performs the necessary +operations after retrieving the data from each of the other nodes. This scatter/gather process can +consume considerable processing and memory resources. + +Using dedicated client nodes to perform these tasks allows data nodes to focus on managing and storing +data. The result is that many scenarios that involve complex queries and aggregations can benefit from +using dedicated client nodes. However, the impact of using dedicated client nodes will likely vary +depending on your scenario, workload, and cluster size. + +> [!NOTE] +> Refer to [Tuning Data Aggregation and Query Performance for Elasticsearch on Azure][Tuning Data Aggregation and Query Performance for Elasticsearch on Azure] for more information on the tuning process. +> +> + +### Connecting to a cluster +Elasticsearch exposes a series of REST APIs for building client applications and sending requests to a +cluster. If you are developing applications using the .NET Framework, two higher levels APIs are +available – [Elasticsearch.Net & NEST][Elasticsearch.Net & NEST]. + +If you are building client applications using Java, you can use the [Node Client API][Node Client API] to create client +nodes dynamically and add them to the cluster. Creating client nodes dynamically is convenient if your +system uses a relatively small number of long-lived connections. Client nodes created by using the Node +API are provided with the cluster routing map (the details of which nodes contain which shards) by the +master node. This information enables the Java application to connect directly to the appropriate nodes +when indexing or querying data, reducing the number of hops that may be necessary when using other APIs. + +The cost of this approach is the overhead of enrolling the client node into the cluster. If a large +number of client nodes appear and disappear quickly, the impact of maintaining and distributing the +cluster routing map can become significant. + +The figure below shows a configuration that uses a load balancer to route requests to a set of client nodes, although the same strategy can be used to connect directly to data nodes if client nodes are not used. + +![Cluster with load balancer](./images/general-clientappinstances.png) + +*Client application instances connecting to an Elasticsearch cluster through the Azure Load Balancer* + +> [!NOTE] +> You can use the [Azure Load Balancer][Azure Load Balancer] to expose the cluster to the public Internet, or +> you can use an [internal load balancer][internal load balancer] if the client applications and cluster are contained entirely +> within the same private virtual network (VNet). +> +> + +### Node discovery +Elasticsearch is based on peer-to-peer communications, so discovering other nodes in a cluster is an +important part in the lifecycle of a node. Node discovery enables new data nodes to be added dynamically +to a cluster, which in turn allows the cluster to scale out transparently. Additionally, if a data node +fails to respond to communications requests from other nodes, a master node can decide that the data node +has failed and take the necessary steps to reallocate the shards that it was holding to other operational +data nodes. + +Elasticsearch node discovery is handled by using a discovery module. The discovery module is a plugin +that can be switched to use a different discovery mechanism. The default discovery module ([Zen][Zen]) +causes a node to issue ping requests to find other nodes on the same network. If other nodes respond, +they gossip to exchange information. A master node can then distribute shards to the new node (if it is a +data node) and rebalance the cluster. The Zen discovery module also handles the master election process and the protocol for detecting node failure. + +Additionally, if you are running Elasticsearch nodes as Azure virtual +machines (VMs), multicast messaging is not supported. For this reasons, you should configure the Zen +discovery to use unicast messaging and provide a list of valid contact nodes in the elasticsearch.yml +configuration file. + +If you are hosting an Elasticsearch cluster within an Azure virtual network, you can specify that the private +DHCP-assigned IP addressed given to each VM in the cluster should remain allocated (static). You can +configure Zen discovery unicast messaging using these static IP addresses. If you are using VMs with +dynamic IP addresses, keep in mind that if a VM stops and restarts it could be allocated a new IP address +making discovery more difficult. To handle this scenario, you can swap the Zen discovery module for the +[Azure Cloud Plugin][Azure Cloud Plugin]. This plugin uses the Azure API to implement the discovery mechanism, which is +based on Azure subscription information. + +> [!NOTE] +> The current version of the Azure Cloud Plugin requires you to install the management +> certificate for your Azure subscription in the Java keystore on the Elasticsearch node, and provide the +> location and credentials for accessing the keystore in the elasticsearch.yml file. This file is held in +> clear text, so it is vitally important that you ensure this file is only accessible by the account +> running the Elasticsearch service. +> +> Additionally, this approach may not be compatible with Azure Resource Manager deployments. For +> these reasons, it is recommended that you use static IP addresses for master nodes, and use these nodes +> to implement Zen discovery unicast messaging across the cluster. In the following configuration +> (taken from the elasticsearch.yml file for a sample data node), the host IP addresses reference +> master nodes in the cluster: +> +> + +```yaml +discovery.zen.ping.multicast.enabled: false +discovery.zen.ping.unicast.hosts: ["10.0.0.10","10.0.0.11","10.0.0.12"] +``` + +## General system guidelines +Elasticsearch can run on a variety of computers, ranging from a single laptop to a cluster of high-end +servers. However, the more resources in terms of memory, computing power, and fast disks that are +available the better the performance. The following sections summarize the basic hardware and software +requirements for running Elasticsearch. + +### Memory requirements +Elasticsearch attempts to store data in-memory for speed. A production server hosting a node for a +typical enterprise or moderate-sized commercial deployment on Azure should have between 14GB and 28GB of +RAM (D3 or D4 VMs). **Spread the load across more nodes rather than creating nodes with more memory** +(Experiments have shown that using larger nodes with more memory can cause extended recovery times in the +event of a failure.) However, although creating clusters with a very large number of small nodes can +increase availability and throughput, it also escalates the effort involved in managing and maintaining +such a system. + +**Allocate 50% of the available memory on a server to the Elasticsearch heap**. If you are using Linux set +the ES_HEAP_SIZE environment variable before running Elasticsearch. Alternatively, if you are using +Windows or Linux, you can specify memory size in the `Xmx` and `Xms` parameters when you start +Elasticseach. Set both of these parameters to the same value to avoid the Java Virtual Machine (JVM) +resizing the heap at runtime. However, **do not allocate more than 30GB**. Use the remaining memory for the operating system file cache. + +> [!NOTE] +> Elasticsearch utilizes the Lucene library to create and manage indexes. Lucene structures +> use a disk-based format, and caching these structures in the file system cache will greatly enhance +> performance. +> +> + +Note that the maximum optimal heap size for Java on a 64 bit machine is just above 30GB. Above this size +Java switches to using an extended mechanism for referencing objects on the heap, which increases the +memory requirements for each object and reduces performance. + +The default Java garbage collector (Concurrent Mark and Sweep) may also perform sub-optimally if the heap +size is above 30GB. It is not currently recommended to switch to a different garbage collector as +Elasticsearch and Lucene have only been tested against the default. + +Do not overcommit memory as swapping main memory to disk will severely impact performance. If possible, +disable swapping completely (the details depend on the operating system). If this is not possible then +enable the *mlockall* setting in the Elasticsearch configuration file (elasticsearch.yml) as follows: + +```yaml +bootstrap.mlockall: true +``` + +This configuration setting causes the JVM to lock its memory and prevents it being swapped out by the operating system. + +### Disk and file system requirements +Use data disks backed by premium storage for storing shards. Disks should be sized to hold the maximum +amount of data anticipated in your shards, although it is possible to add further disks later. You can +extend a shard across multiple disks on a node. + +> [!NOTE] +> Elasticsearch compresses the data for stored fields by using the LZ4 algorithm, and in +> Elasticsearch 2.0 onwards you can change the compression type. You can switch the compression algorithm +> to DEFLATE as used by the *zip* and *gzip* utilities. This compression technique can be more resource +> intensive, but you should consider using it for archived log data. This approach +> can help to reduce index size. +> +> + +It is not essential that all nodes in a cluster have the same disk layout and capacity. However, a node +with a very large disk capacity compared to other nodes in a cluster will attract more data and will +require increased processing power to handle this data. Consequently the node can become "hot" compared +to other nodes, and this can, in turn, affect performance. + +If possible, use RAID 0 (striping). Other forms of RAID that implement parity and mirroring are +unnecessary as Elasticsearch provides its own high availablility solution in the form of replicas. + +> [!NOTE] +> Prior to Elasticsearch 2.0.0, you could also implement striping at the software level by +> specifying multiple directories in the *path.data* configuration setting. In Elasticsearch 2.0.0, this +> form of striping is no longer supported. Instead, different shards may be allocated to different paths, +> but all of the files in a single shard will be written to the same path. If you require striping, you +> should stripe data at the operating system or hardware level. +> +> + +To maximize storage throughput, each **VM should have a dedicated premium storage account**. + +The Lucene library can use a large number of files to store index data, and Elasticsearch can open a +significant number of sockets for communicating between nodes and with clients. Make sure that the +operating system is configured to support an adequate number of open file descriptors (up to 64000 if +sufficient memory is available). Note that the default configuration for many Linux distributions limits +the number of open file descriptors to 1024, which is much too small. + +Elasticsearch uses a combination of memory mapped (mmap) I/O and Java New I/O (NIO) to optimize +concurrent access to data files and indexes. If you are using Linux, you should configure the operating +system to ensure that there is sufficient virtual memory available with space for 256K memory map areas. + +> [!NOTE] +> Many Linux distributions default to using the completely fair queuing (CFQ) scheduler when +> arranging to write data to disk. This scheduler is not optimized for SSDs. Consider reconfiguring the +> operating system to use either the NOOP scheduler or the deadline scheduler, both of which are more +> effective for SSDs. +> +> + +### CPU requirements +Azure VMs are available in a variety of CPU configurations, supporting between 1 and 32 cores. For a +data node, a good starting point is a standard DS-series VM, and select either the DS3 (4 +cores) or D4 (8 cores) SKUs. The DS3 also provides 14GB of RAM, while the DS4 includes 28GB. + +The GS-series (for premium storage) and G-series (for standard storage) use Xeon E5 V3 processors which may be useful for workloads that are heavily compute-intensive, such as large-scale aggregations. For the latest information, visit [Sizes for virtual machines][Sizes for virtual machines]. + +### Network requirements +Elasticsearch requires a network bandwidth of between 1 and 10Gbps, depending on the size and volatility +of the clusters that it implements. Elasticsearch migrates shards between nodes as more nodes are added +to a cluster. Elasticsearch assumes that the communication time between all nodes is roughly equivalent +and does not consider the relative locations of shards held on those nodes. Additionally, replication can +incur significant network I/O between shards. For these reasons, **avoid creating clusters on nodes that +are in different regions**. + +### Software Requirements +You can run Elasticsearch on Windows or on Linux. The Elasticsearch service is deployed as a Java jar +library and has dependencies on other Java libraries that are included in the Elasticsearch package. You +must install the Java 7 (update 55 or later) or Java 8 (update 20 or later) JVM to run Elasticsearch. + +> [!NOTE] +> Other than the *Xmx* and *Xms* memory parameters (specified as command line options to +> the Elasticsearch engine – see [Memory requirements][Memory requirements]) do not modify the default JVM configuration +> settings. Elasticsearch has been designed using the defaults; changing them can cause Elasticsearch to +> become detuned and perform poorly. +> +> + +### Deploying Elasticsearch on Azure +Although it is not difficult to deploy a single instance of Elasticsearch, creating a number of nodes and +installing and configuring Elasticsearch on each one can be a time consuming and error-prone process. If +you are considering running Elasticsearch on Azure VMs, you have three options that can help to reduce the +chances of errors. + +* Using the Azure Resource Manager [template](https://azure.microsoft.com/marketplace/partners/elastic/elasticsearchelasticsearch/) in Azure marketplace. This template is created by Elastic. It allows you to add commercial enhancements such as Shield, Marvel, Watcher, and so on. +* Using the Azure quickstart [template](https://github.com/Azure/azure-quickstart-templates/tree/master/elasticsearch) to build the cluster. This template can create a cluster based on Windows Server 2012 or Ubuntu Linux 14.0.4. It allows you to use experimental features such as Azure File Storage. This template is used for the research and testing tasks in this document. +* Using scripts that can be automated or run unattended. Scripts that can create and deploy an Elasticsearch cluster are available on the [GitHub repository][elasticsearch-scripts] + +## Cluster and node sizing and scalability +Elasticsearch enables a number of deployment topologies, designed to support differing requirements and levels of scale. This section discusses some common topologies, and describes the considerations for implementing clusters based on these topologies. + +### Elasticsearch topologies +The figure below illustrates a starting point for designing an Elasticsearch topology for Azure: + +![Suggested starting point for building an Elasticsearch cluster with Azure](./images/general-startingpoint.png) + +*Suggested starting point for building an Elasticsearch cluster with Azure* + +This topology contains six data nodes together with three client nodes and three master nodes (only one master node is elected, the other two are available for election should the elected master fail.) Each node is implemented as a separate VM. Azure web applications are directed to client nodes via a load balancer. + +In this example, all nodes and the web applications reside in the same virtual network which effectively isolates them from the outside world. If the cluster needs to be available externally (possibly as part of a hybrid solution incorporating on-premises clients), then you can use the Azure Load Balancer to provide a public IP address, but you will need to take additional security precautions to prevent unauthorized access to the cluster. + +The optional "Jump Box" is a VM that is only available to administrators. This VM has a network connection to the virtual network, but also an outward facing network connection to permit administrator logon from an external network (this logon should be protected by using a strong password or certificate). An administrator can log on to the Jump Box, and then connect from there directly to any of the nodes in the cluster. + +Alternative approaches include using a site-to-site VPN between an organization and the virtual network, or using [ExpressRoute][ExpressRoute] circuits to connect to the virtual network. These mechanisms permit administrative access to the cluster without exposing the cluster to the public internet. + +To maintain VM availability, the data nodes are grouped into the same Azure availability set. Similarly, the client nodes are held in another availability set and the master nodes are stored in a third availability set. + +This topology is relatively easy to scale out, simply add more nodes of the appropriate type and ensure that they are configured with the same cluster name in the elasticsearch.yml file. Client nodes also need to be added to the backend pool for the Azure load balancer. + +**Geo-locating clusters** + +**Don’t spread nodes in a cluster across regions as this can impact the performance of inter-node communication** (see [Network requirements][Network requirements]). Geo-locating data close to users in different regions requires creating multiple clusters. In this situation, you need to consider how (or even whether) to synchronize clusters. Possible solutions include: + +[Tribe nodes][Tribe nodes] are similar to a client node except that it can participate in multiple Elasticsearch +clusters and view them all as one big cluster. Data is still managed locally by each cluster (updates are +not propagated across cluster boundaries), but all data is visible. A tribe node can query, create, and +manage documents in any cluster. + +The primary restrictions are that a tribe node cannot be used to create a new index, and index names must +be unique across all clusters. Therefore it is important that you consider how indexes will be named when +you design clusters intended to be accessed from tribe nodes. + +Using this mechanism, each cluster can contain the data that is most likely to be accessed by local +client applications, but these clients can still access and modify remote data although with possible +extended latency. The figure below shows an example of this topology. The tribe node in Cluster 1 is +highlighted; the other clusters can also have tribe nodes although these are not shown on the diagram: + +![A client application accessing multiple clusters through a tribe node](./images/general-tribenode.png) + +*A client application accessing multiple clusters through a tribe node* + +In this example, the client application connects to the tribe node in Cluster 1 (co-located in the same region), but this node is configured to be able to access Cluster 2 and Cluster 3, which might be located in different regions. The client application can send requests that retrieve or modify data in any of the clusters. + +> [!NOTE] +> Tribe nodes require multicast discovery to connect to clusters, which may present a +> security concern. See the section [Node discovery][Node discovery] for more details. +> +> + +* Implementing geo-replication between clusters. In this approach, changes made at each cluster are + propagated in near real-time to clusters located in other data centers. Third-party plugins are available + for Elasticsearch that support this functionality, such as the [PubNub Changes Plugin][PubNub Changes Plugin]. +* Using the [Elasticsearch Snapshot and Restore module][Elasticsearch Snapshot and Restore module]. If the data is very slow-moving and is + modified only by a single cluster, you can consider using snapshots to take a periodic copy of the data + and then restore these snapshots in other clusters (snapshots can be stored in Azure Blob Storage if you + have installed the [Azure Cloud Plugin][Azure Cloud Plugin]). However, this solution does not work well for rapidly + changing data or if data can be changed in more than one cluster. + +**Small-scale topologies** + +Large-scale topologies comprising clusters of dedicated master, client, and data nodes might not be +appropriate for every scenario. If you are building a small-scale production or development system, +consider the 3-node cluster shown in the figure below. + +Client applications connect directly to any available data node in the cluster. The cluster contains +three shards labelled P1-P3 (to allow for growth) plus replicas labelled R1-R3. Using three nodes allows +Elasticsearch to distribute the shards and replicas so that if any single node fails no data will be lost. + +![A 3-node cluster with 3 shards and replicas](./images/general-threenodecluster.png) + +*A 3-node cluster with 3 shards and replicas* + +If you are running a development installation on a standalone machine you can configure a cluster with a +single node that acts as master, client, and data storage. Alternatively, you can start multiple nodes +running as a cluster on the same computer by starting more than one instance of Elasticsearch. The figure +below shows an example. + +![Development configuration](./images/general-developmentconfiguration.png) + +*A development configuration running multiple Elasticsearch nodes on the same machine* + +Note that neither of these standalone configurations are recommended for a production environment as +they can cause contention unless your development machine has a significant amount of memory and several +fast disks. Additionally, they do not provide any high availability guarantees. If the machine fails, all +nodes are lost. + +### Scaling a cluster and data nodes +Elasticsearch can scale in two dimensions: vertically (using bigger, more powerful machines) and +horizontally (spreading the load across machines). + +**Scaling Elasticsearch data nodes vertically** + +If you are hosting an Elasticsearch cluster by using Azure VMs, each node can correspond to a VM. The +limit of vertical scalability for a node is largely governed by the SKU of the VM and the overall +restrictions applied to individual storage accounts and Azure subscriptions. + +The page [Azure subscription and service limits, quotas, and constraints](/azure/azure-subscription-service-limits/) +describes these limits in detail, but as far as building an Elasticsearch cluster is concerned, the items +in the following list are the most pertinent. + +* Each storage account is restricted to 20,000 IOPS. Each VM in the cluster should leverage a + dedicated (preferably premium) storage account. +* The number of data nodes in a virtual network. If you are not using the Azure Resource Manager, there is a + limit of 2048 VM instances per virtual network. While this should prove sufficient for many cases, if you have a + very large configuration with thousands of nodes this could be a limitation. +* Number of storage accounts per subscription per region. You can create up to 100 storage accounts per + Azure subscription in each region. Storage accounts are used to hold virtual disks, and each storage + account has a limit of 500TB of space. +* Number of cores per subscription. The default limit is 20 cores per subscription, but this can be increased up to 10,000 cores by requesting a limit increase through a support ticket. +* The amount of memory per VM size. Smaller size VMs have limited amounts of memory available (D1 + machines have 3.5GB, and D2 machines have 7GB). These machines might not be suitable for scenarios that + require Elasticsearch to cache significant amounts of data to achieve good performance (aggregating data, + or analyzing a large number of documents during data ingestion, for example). +* The maximum number of disks per VM size. This restriction can limit the size and performance of a + cluster. Fewer disks means that less data can be held, and performance can be reduced by having fewer + disks available for striping. +* The number of update domains / fault domains per availability set. If you create VMs using the Azure Resource Manager, + each availability set can be allocated up to 3 fault domains and 20 update domains. This limitation can + impact the resilience of a large cluster that is subjected to frequent rolling updates. + +Additionally, you should probably not consider using VMs with more than 64GB of memory. As described in +the section [Memory requirements][Memory requirements], you should not allocate more than 30GB of RAM on each VM to the JVM +and allow the operating system to utilize the remaining memory for I/O buffering. + +With these restrictions in mind, you should always spread the virtual disks for the VMs in a cluster +across storage accounts to reduce the chances of I/O throttling. In a very large cluster, you may need to +redesign your logical infrastructure and split it into separate functional partitions. For example, you +might need to split the cluster across subscriptions, although this process can lead to further +complications because of the need to connect virtual networks. + +**Scaling an Elasticsearch cluster horizontally** + +Internally within Elasticsearch, the limit of horizontal scalability is determined by the number of +shards defined for each index. Initially, many shards can be allocated to the same node in a cluster, but +as the volume of data grows additional nodes can be added and shards can be distributed across these +nodes. In theory, only when the number of nodes reaches the number of shards will the system cease to +scale horizontally. + +As with vertical scaling, there are some issues that you should consider when contemplating implementing +horizontal scaling, including: + +* The maximum number of VMs that you can connect in an Azure virtual network. This can limit the horizontal + scalability for a very large cluster. You can create a cluster of nodes that spans more than one virtual network to + circumvent this limit, but this approach can lead to reduced performance due to the lack of locality of + each node with its peers. +* The number of disks per VM Size. Different series and SKUs support different numbers of attached disks. + Additionally, you can also consider using the ephemeral storage included with the VM to provide a limited + amount of faster data storage, although there are resiliency and recovery implications that you should + consider (see [Configuring resilience and recovery on Elasticsearch on Azure][elasticsearch-resilience-recovery] for + more information). The D-series, DS-series, Dv2-series, and GS-series of VMs use SSDs for ephemeral + storage. + +You could consider using [Virtual Machine Scale Sets][vmss] to start and stop VMs as demands dictates. However, this approach might not be +appropriate for an Elasticsearch cluster for the following reasons: + +* This approach is best suited for stateless VMs. Each time you add or remove a node from an + Elasticsearch cluster, shards are reallocated to balance the load, and this process can generate + considerable volumes of network traffic and disk I/O and can severely impact data ingestion rates. You + must assess whether this overhead is worth the benefit of the additional processing and memory resources + that become available by dynamically starting more VMs. +* VM startup does not happen instantaneously, and it may take several minutes before additional VMs + become available or they are shut down. Scaling in this way should only be used to handle sustained + changes in demand. +* After scaling out, do you actually need to consider scaling back? Removing a VM from an Elasticsearch + cluster can be a resource intensive process requiring that Elasticsearch recovers the shards and replicas + that are located on that VM and recreates them on one or more of the remaining nodes. Removing several + VMs at the same time could compromise the integrity of the cluster, making recovery difficult. Furthermore, + many Elasticsearch implementations grow over time, but the nature of the data is such that it tends not + to shrink in volume. It is possible to delete documents manually, and documents can also be configured + with a TTL (time to live) after which they expire and get removed, but in most cases it is likely that + the space previously allocated will be quickly reused by new or modified documents. Fragmentation within + an index might occur when documents are removed or changed, in which case you can use the Elasticsearch + HTTP [Optimize][Optimize] API (Elasticsearch 2.0.0 and earlier) or the [Force Merge][Force Merge] API (Elasticsearch 2.1.0 + and later) to perform defragmentation. + +### Determining the number of shards for an index +The number of nodes in a cluster can vary over time, but the number of shards in an index is fixed once +the index has been created. To add or remove shards requires reindexing the data – a process of creating +a new index with the required number of shards and then copying the data from the old index to the new +(you can use aliases to insulate users from the fact that data has been reindexed – see [Tuning data aggregation and query performance for Elasticsearch on Azure][Tuning data aggregation and query performance for Elasticsearch on Azure] for more details). +Therefore, it is important to determine the number of shards that you are likely to require in advance of creating the first index in your cluster. You can perform the following steps to establish this number: + +* Create a single-node cluster using the same hardware configuration that you intend to deploy in + production. +* Create an index that matches the structure that you plan to use in production. Give this index a single + shard and no replicas. +* Add a specific quantity of realistic production data to the index. +* Perform typical queries, aggregations, and other workloads against the index and measure the throughput + and response time. +* If the throughput and response time are within acceptable limits, then repeat the process from step 3 + (add more data). +* When you appear to have reached the capacity of the shard (response times and throughput start becoming + unacceptable), make a note of the volume of documents. +* Extrapolate from the capacity of a single shard to the anticipated number of documents in production to + calculate the required number of shards (you should include some margin of error in these calculations as + extrapolation is not a precise science). + +> [!NOTE] +> Remember that each shard is implemented as a Lucene index that consumes memory, CPU power, +> and file handles. The more shards you have, the more of these resources you will require. +> +> + +Additionally, creating more shards may increase scalability (depending on your workloads and scenario) +and can increase data ingestion throughput, but it might reduce the efficiency of many queries. By +default, a query will interrogate every shard used by an index (you can use [custom routing][custom routing] to modify +this behavior if you know which shards the data you require is located on). + +Following this process can only generate an estimate for the number of shards, and the volume of +documents expected in production might not be known. In this case, you should determine the initial +volume (as above) and the predicted growth rate. Create an appropriate number of shards that can handle +the growth of data for the period until you are willing to reindex the database. + +Other strategies used for scenarios such as event management and logging include using rolling indexes. +Create a new index for the data ingested each day and access this index through an alias that is switched +daily to point to the most recent index. This approach enables you to more easily age-out old data (you +can delete indexes containing information that is no longer required) and keeps the volume of data +manageable. + +Keep in mind that the number of nodes does not have to match the number of shards. For example, if you +create 50 shards, you can spread them across 10 nodes initially, and then add more nodes to scale the +system out as the volume of work increases. Avoid creating an exceptionally large number of shards on a +small number of nodes (1000 shards spread across 2 nodes, for example). Although the system could +theoretically scale to 1000 nodes with this configuration, running 500 shards on a single node risks +crippling the performance of the node. + +> [!NOTE] +> For systems that are data-ingestion heavy, consider using a prime number of shards. The +> default algorithm that Elasticsearch uses for routing documents to shards produces a more even spread +> in this case. +> +> + +### Security +By default, Elasticsearch implements minimal security and does not provide any means of authentication +and authorization. These aspects require configuring the underlying operating system and network, and +using plugins and third-party utilities. Examples include [Shield][Shield], and [Search Guard][Search Guard]. + +> [!NOTE] +> Shield is a plugin provided by Elastic for user authentication, data encryption, +> role-based access control, IP filtering, and auditing. It may be necessary to configure the underlying +> operating system to implement further security measures, such as disk encryption. +> +> + +In a production system, you should consider how to: + +* Prevent unauthorized access to the cluster. +* Identify and authenticate users. +* Authorize the operations that authenticated users can perform. +* Protect the cluster from rogue or damaging operations. +* Protect the data from unauthorized access. +* Meet regulatory requirements for commercial data security (if appropriate). + +### Securing access to the cluster +Elasticsearch is a network service. The nodes in an Elasticsearch cluster listen for incoming client requests using HTTP, and communicate with each other using a TCP channel. You should take steps to prevent unauthorized clients or services from being able to send requests over both the HTTP and TCP paths. Consider the following items. + +* Define network security groups to limit the inbound and outbound network traffic for a virtual network or VM to + specific ports only. +* Change the default ports used for client web access (9200) and programmatic network access (9300). Use + a firewall to protect each node from malicious Internet traffic. +* Depending on the location and connectivity of clients, place the cluster on a private subnet with no + direct access to the Internet. If the cluster must be exposed outside the subnet, route all requests + through a bastion server or proxy sufficiently hardened to protect the cluster. + +If you must provide direct access to nodes, use an [nginx](http://nginx.org/en/) proxy server configure and +HTTPS authentication. + +> [!NOTE] +> Using a proxy server such as nginx, you can also restrict access to functionality. For +> example, you can configure nginx to only allow requests to the \_search endpoint if you need to prevent +> clients from performing other operations. +> +> + +If you require more comprehensive network access security, use the Shield or Search Guard plugins. + +### Identifying and authenticating users +All requests made by clients to the cluster should be authenticated. Additionally, you should prevent +unauthorized nodes from joining the cluster as these can provide a backdoor into the system that bypasses +authentication. + +Elasticsearch plugins are available that can perform different types of authentication, including: + +* **HTTP basic authentication**. Usernames and passwords are included in each request. All requests must be + encrypted by using SSL/TLS or an equivalent level of protection. +* **LDAP and Active Directory integration**. This approach requires that clients are assigned roles + in LDAP or AD groups. +* **Native authentication**. Uses identities defined within the Elasticsearch cluster itself. +* **TLS authentication**. Use TLS authentication within a cluster to authenticate all nodes. +* **IP filtering**. Use IP filtering to prevent clients from unauthorized subnets from connecting, and also preventing nodes from these subnets joining the cluster. + +### Authorizing client requests +Authorization depends on the Elasticsearch plugin used to provide this service. For example, a plugin that provides basic authentication typically provides features that define the level of authentication, whereas a plugin that uses LDAP or AD will typically associate clients with roles, and then assign access rights to those roles. When using any plugin, you should consider the following points: + +* Do you need to restrict the operations that a client can perform? For example, should a client be able + to monitor the status of the cluster, or create and delete indexes? +* Should the client be restricted to specific indexes? This is useful in a multitenant : No hyphen per style guide.>>situation where + tenants may be assigned their own specific set of indexes, and these indexes should be inaccessible to + other tenants. +* Should the client by able to read and write data to an index? A client may be able to perform searches + that retrieve data using an index but must be prevented from adding or deleting data from that index, for + example. + +Currently, most security plugins scope operations to the cluster or index level, and not to subsets of +documents within indexes. This is for efficiency reasons. It is therefore not easy to limit requests to +specific documents within a single index. If you require this level of granularity, save documents in +separate indexes and use aliases that group indexes together. + +For example, in a personnel system, if user A requires access to all documents that contain information +about employees in department X, user B requires access to all documents that contain information about +employees in department Y, and user C requires access to all documents that contain information about +employees in both departments, create two indexes (for department X and department Y), and an alias that +references both indexes. Grant user A read access to the first index, grant user B read access to the +second index, and grant user C read access to both indexes through the alias. For more information, see +[Faking Index per User with Aliases][Faking Index per User with Aliases]. + +### Protecting the cluster +The cluster can become vulnerable to misuse if it is not protected carefully. + +**Disable dynamic query scripting in Elasticsearch** queries as they can lead to security vulnerabilities. Use native scripts in preference to query scripting; a native script is an Elasticsearch plugin written in Java and compiled into a JAR file. + +Dynamic query scripting is now disabled by default; do not reenable it unless you have a very good reason to do so. + +**Avoid exposing query-string searches to users** as this type of searching allows users to perform +resource-intensive queries unhindered. These searches could severely impact the performance of the +cluster and can render the system open to DOS attacks. Additionally, query-string searching can expose +potentially private information. + +**Prevent operations from consuming a lot of memory** as these can cause out-of-memory exceptions +resulting in Elasticsearch failing on a node. Long-running resource intensive operations can also be used +to implement DOS attacks. Examples include: + +Avoid Search requests that attempt to load very large fields into memory (if a query sorts, scripts, or facets on these fields), such as: + +* Searches that query multiple indexes at the same time. +* Searches that retrieve a large number of fields. These searches can exhaust memory by causing a vast amount of field data to be cached. By default, the field data cache is unlimited in size, but you can set the [indices.fielddata.cache.*](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-fielddata.html) properties in the elasticsearch.yml configuration file to limit the resources available. You can also configure the [field data circuit breaker][field data circuit breaker] to help prevent the cached data from a single field from exhausting memory, and the [request circuit breaker][request circuit breaker] to stop individual queries from monopolizing memory. The cost of setting these parameters is the increased likelihood of some queries failing or timing out. + +> [!NOTE] +> Using Doc Values can reduce the memory requirements of indexes by saving field data to +> disk rather than loading it into memory. This can help to reduce the chances of memory exhaustion on a +> node but with a reduction in speed. +> +> Elasticsearch always assumes that it has enough memory to perform its current workload. If +> this is not the case, then the Elasticsearch service can crash. Elasticsearch provides endpoints that +> return information about resource usage (the HTTP [cat APIs][cat APIs]), and you should monitor this +> information carefully. +> +> + +**Waiting for too long to flush an in-progress memory segment**. This can exhaust in-memory buffer space. +If necessary, [configure the translog][configure the translog] to reduce the thresholds at which data is flushed to disk. + +**Creating indexes with large amounts of metadata**. An index that contains documents with a large +variation in field names can consume a lot of memory. For more information, see [Mapping Explosion][Mapping Explosion]. + +The definition of a long-running or query intensive operation is highly scenario-specific. The workload typically expected by one cluster might have a completely different profile from the workload on another. Determining which operations are unacceptable requires significant research and testing of your applications. + +Be proactive, detect and stop malicious activities before they cause significant damage or data loss. +Consider using a security monitoring and notification system that can quickly detect unusual patterns of +data access and raise alerts when, for example, user login requests fail, unexpected nodes join or leave +the cluster, or operations are taking longer than expected. Tools that can perform these tasks include +Elasticearch [Watcher][Watcher]. + +### Protecting the data +You can protect data inflight by using SSL/TLS, but Elasticsearch does not provide any built-in form of +data encryption for information that is stored on disk. Remember that this information is held in +ordinary disk files, and any user with access to these files may be able to compromise the data that they +hold, for example by copying them to their own cluster. Consider the following points: + +* Protect the files used by Elasticsearch to hold the data. Do not allow arbitrary read or write access + to identities other than the Elasticsearch service. +* Encrypt the data held in these files by using an encrypting file system. + +> [!NOTE] +> Azure now supports disk encryption for Linux and Windows VMs. For more information, see +> [Azure Disk Encryption for Windows and Linux IaaS VMs Preview](/azure/security/azure-security-disk-encryption/). +> +> + +### Meeting regulatory requirements +Regulatory requirements are primarily concerned with auditing operations to maintain a history of events, +and ensuring the privacy of these operations to help prevent them being monitored (and replayed) by an +external agency. In particular, you should consider how to: + +* Track all requests (successful or not), and all attempts to access the system. +* Encrypt communications made by clients to the cluster as well as node-to-node communications performed + by the cluster. You should implement SSL/TLS for all cluster communications. Elasticsearch also supports + pluggable ciphers if your organization has requirements distinct from those available through SSL/TLS. +* Store all audit data securely. The volume of audit information can grow very rapidly and must be + protected robustly to prevent tampering of audit information. +* Safely archive audit data. + +### Monitoring +Monitoring is important both at the operating system level and at the Elasticsearch level. + +You can perform monitoring at the operating system level using operating-system specific tools. Under +Windows, this includes items such as Performance Monitor with the appropriate performance counters, while +under Linux you can use tools such as *vmstat*, *iostat*, and *top*. The key items to monitor at the +operating system level include CPU utilization, disk I/O volumes, disk I/O wait times, and network traffic. +In a well-tuned Elasticsearch cluster, CPU utilization by the Elasticsearch process should be high, and +disk I/O wait times should be minimal. + +At the software level, you should monitor the throughput and response times of requests, together with +the details of requests that fail. Elasticsearch provides a number of APIs that you can use to examine +the performance of different aspects of a cluster. The two most important APIs are *_cluster/health* and +*_nodes/stats*. The *_cluster/health* API can be used to provide a snapshot of the overall health of the +cluster, as well as providing detailed information for each index, as shown in the following example: + +`GET _cluster/health?level=indices` + +The example output shown below was generated using this API: + +```json +{ + "cluster_name": "elasticsearch", + "status": "green", + "timed_out": false, + "number_of_nodes": 6, + "number_of_data_nodes": 3, + "active_primary_shards": 10, + "active_shards": 20, + "relocating_shards": 0, + "initializing_shards": 0, + "unassigned_shards": 0, + "delayed_unassigned_shards": 0, + "number_of_pending_tasks": 0, + "number_of_in_flight_fetch": 0, + "indices": { + "systwo": { + "status": "green", + "number_of_shards": 5, + "number_of_replicas": 1, + "active_primary_shards": 5, + "active_shards": 10, + "relocating_shards": 0, + "initializing_shards": 0, + "unassigned_shards": 0 + }, + "sysfour": { + "status": "green", + "number_of_shards": 5, + "number_of_replicas": 1, + "active_primary_shards": 5, + "active_shards": 10, + "relocating_shards": 0, + "initializing_shards": 0, + "unassigned_shards": 0 + } + } +} +``` + +This cluster contains two indexes named *systwo* and *sysfour*. Key statistics to monitor for each index are the status, active_shards, and unassigned_shards. The status should be green, the number of active_shards should reflect the number_of_shards and number_of_replicas, and unassigned_shards should be zero. + +If the status is red, then part of the index is missing or has become corrupt. You can verify this if the *active_shards* setting is less than *number_of_shards* - (*number_of_replicas* + 1) and unassigned_shards is non-zero. Note that a status of yellow indicates that an index is in a transitional state, either as the result of adding more replicas or shards being relocated. The status should switch to green when the transition has completed. + +If it stays yellow for an extended period or changes to red, you should look to see whether any significant I/O events (such as a disk or network failure) have occurred at the operating system level. + +The \_nodes/stats API emits extensive information about each node in the cluster: + +`GET _nodes/stats` + +The output generated includes details about how indexes are stored on each node (including the sizes and +numbers of documents), time spent performing indexing, querying, searching, merging, caching, +operating system and process information, statistics about the JVM (including garbage collection +performance), and thread pools. For more information, see [Monitoring Individual Nodes][Monitoring Individual Nodes]. + +If a significant proportion of Elasticsearch requests are failing with *EsRejectedExecutionException* +error messages, then Elasticsearch is failing to keep up with the work being sent its way. In this +situation, you need to identify the bottleneck that is causing Elasticsearch to fall behind. Consider the +following items: + +* If the bottleneck is due to a resource constraint, such as insufficient memory allocated to the JVM + causing an excessive number of garbage collections, then consider allocating additional resources (in + this case, configure the JVM to use more memory, up to 50% of the available storage on the node – see + [Memory requirements][Memory requirements]). +* If the cluster is showing large I/O wait times and the merge statistics gathered for an index by using + the \_node/stats API contain large values then the index is write-heavy. Revisit the points raised in [Optimizing resources for indexing operations](./data-ingestion-performance.md#optimizing-resources-for-indexing-operations) to + tune indexing performance. +* Throttle client applications that are performing data ingestion operations and determine the effect + that this has on performance. If this approach shows significant improvement, then consider either + retaining the throttle, or scaling out by spreading the load for write-heavy indexes across more nodes. + For more information, see [Tuning data ingestion performance for Elasticsearch on Azure][Tuning data ingestion performance for Elasticsearch on Azure]. +* If the searching statistics for an index indicate that queries are taking a long time then consider how + the queries are optimized. Note that you can use the *query_time_in_millis* and *query_total* values reported by the search statistics to calculate a rough guide to query efficiency; the equation *query_time_in_millis* / *query_total* will give you an average time for each query. + +### Tools for monitoring Elasticsearch +A variety of tools are available for performing everyday monitoring of Elasticsearch in production. These +tools typically use the underlying Elasticsearch APIs to gather information and present the details in a +manner that is easier to observe than the raw data. Common examples include [Elasticsearch-Head][Elasticsearch-Head], +[Bigdesk][Bigdesk], [Kopf][Kopf], and [Marvel][Marvel]. + +Elasticsearch-Head, Bigdesk, and Kopf run as plugins for the Elasticsearch software. More recent versions +of Marvel can run independently, but require [Kibana][Kibana] to provide a data capture and hosting +environment. The advantage of using Marvel with Kibana is that you can implement monitoring in a separate +environment from the Elasticsearch cluster, enabling you to explore problems with Elasticsearch that +might not be possible if the monitoring tools run as part of the Elasticsearch software. For example, if +Elasticsearch repeatedly fails or is running very slowly, tools that run as Elasticsearch plugins will +also be affected, making monitoring and diagnosis more difficult. + +At the operating system level, you can use tools such as the Log Analytics feature of [Azure Operations Management Suite][Azure Operations Management Suite] or [Azure Diagnostics with the Azure Portal][Azure Diagnostics with the Azure Portal] to capture performance data for VMs +hosting Elasticsearch nodes. Another approach is to use [Logstash][Logstash] to capture performance and log data, +store this information in a separate Elasticsearch cluster (don't use the same cluster that you are using + or your application), and then use Kibana to visualize the data. For more information, see [Microsoft Azure Diagnostics with ELK][Microsoft Azure Diagnostics with ELK]. + +### Tools for testing Elasticsearch performance +Other tools are available if you are benchmarking Elasticsearch or subjecting a cluster to performance +testing. These tools are intended to be used in a development or test environment rather than production. +A frequently-used example is [Apache JMeter][Apache JMeter]. + +JMeter was used to perform benchmarking and other load tests described in documents related to this +guidance. [Creating a performance testing environment for Elasticsearch on Azure][Creating a performance testing environment for Elasticsearch on Azure] describes in detail how JMeter was configured and used. + +[Running Elasticsearch on Azure]: index.md +[Tuning Data Ingestion Performance for Elasticsearch on Azure]: data-ingestion-performance.md +[Creating a Performance Testing Environment for Elasticsearch on Azure]: performance-testing-environment.md +[Implementing a JMeter Test Plan for Elasticsearch]: jmeter-test-plan.md +[Deploying a JMeter JUnit Sampler for Testing Elasticsearch Performance]: jmeter-junit-sampler.md +[Tuning Data Aggregation and Query Performance for Elasticsearch on Azure]: data-aggregation-and-query-performance.md +[Configuring Resilience and Recovery on Elasticsearch on Azure]: resilience-and-recovery.md +[Running the Automated Elasticsearch Resiliency Tests]: resilience-and-recovery + +[Apache JMeter]: http://jmeter.apache.org/ +[Apache Lucene]: https://lucene.apache.org/ +[Azure Disk Encryption for Windows and Linux IaaS VMs Preview]: /azure/azure-security-disk-encryption/ +[Azure Load Balancer]: /azure/load-balancer/load-balancer-overview/ +[ExpressRoute]: /azure/expressroute/expressroute-introduction/ +[internal load balancer]: /azure/load-balancer/load-balancer-internal-overview/ +[Sizes for Virtual Machines]: /azure/virtual-machines/virtual-machines-linux-sizes/ + +[Memory Requirements]: #memory-requirements +[Network Requirements]: #network-requirements +[Node Discovery]: #node-discovery +[Query Tuning]: #query-tuning + +[elasticsearch-scripts]: https://github.com/mspnp/azure-guidance/tree/master/scripts/ps +[A Highly Available Cloud Storage Service with Strong Consistency]: http://blogs.msdn.com/b/windowsazurestorage/archive/2011/11/20/windows-azure-storage-a-highly-available-cloud-storage-service-with-strong-consistency.aspx +[Azure Cloud Plugin]: https://www.elastic.co/blog/azure-cloud-plugin-for-elasticsearch +[Azure Diagnostics with the Azure Portal]: https://azure.microsoft.com/blog/windows-azure-virtual-machine-monitoring-with-wad-extension/ +[Azure Operations Management Suite]: https://www.microsoft.com/server-cloud/operations-management-suite/overview.aspx +[Azure Quickstart Templates]: https://azure.microsoft.com/documentation/templates/ +[Bigdesk]: http://bigdesk.org/ +[cat APIs]: https://www.elastic.co/guide/en/elasticsearch/reference/1.7/cat.html +[configure the translog]: https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-translog.html +[custom routing]: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-routing-field.html +[Elasticsearch]: https://www.elastic.co/products/elasticsearch +[Elasticsearch-Head]: https://mobz.github.io/elasticsearch-head/ +[Elasticsearch.Net & NEST]: http://nest.azurewebsites.net/ +[elasticsearch-resilience-recovery]: resilience-and-recovery.md +[Elasticsearch Snapshot and Restore module]: https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html +[Faking Index per User with Aliases]: https://www.elastic.co/guide/en/elasticsearch/guide/current/faking-it.html +[field data circuit breaker]: https://www.elastic.co/guide/en/elasticsearch/reference/current/circuit-breaker.html#fielddata-circuit-breaker +[Force Merge]: https://www.elastic.co/guide/en/elasticsearch/reference/2.1/indices-forcemerge.html +[gossiping]: https://en.wikipedia.org/wiki/Gossip_protocol +[Kibana]: https://www.elastic.co/downloads/kibana +[Kopf]: https://github.com/lmenezes/elasticsearch-kopf +[Logstash]: https://www.elastic.co/products/logstash +[Mapping Explosion]: https://www.elastic.co/blog/found-crash-elasticsearch#mapping-explosion +[Marvel]: https://www.elastic.co/products/marvel +[Microsoft Azure Diagnostics with ELK]: http://aka.ms/AzureDiagnosticsElk +[Monitoring Individual Nodes]: https://www.elastic.co/guide/en/elasticsearch/guide/current/_monitoring_individual_nodes.html#_monitoring_individual_nodes +[nginx]: http://nginx.org/en/ +[Node Client API]: https://www.elastic.co/guide/en/elasticsearch/client/java-api/current/client.html +[Optimize]: https://www.elastic.co/guide/en/elasticsearch/reference/1.7/indices-optimize.html +[PubNub Changes Plugin]: http://www.pubnub.com/blog/quick-start-realtime-geo-replication-for-elasticsearch/ +[request circuit breaker]: https://www.elastic.co/guide/en/elasticsearch/reference/current/circuit-breaker.html#request-circuit-breaker +[Search Guard]: https://github.com/floragunncom/search-guard +[Shield]: https://www.elastic.co/products/shield +[Transport Client API]: https://www.elastic.co/guide/en/elasticsearch/client/java-api/current/transport-client.html +[tribe nodes]: https://www.elastic.co/blog/tribe-node +[vmss]: https://azure.microsoft.com/documentation/services/virtual-machine-scale-sets/ +[Watcher]: https://www.elastic.co/products/watcher +[Zen]: https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-zen.html diff --git a/docs/elasticsearch/jmeter-junit-sampler.md b/docs/elasticsearch/jmeter-junit-sampler.md new file mode 100644 index 00000000000..4b19dee6427 --- /dev/null +++ b/docs/elasticsearch/jmeter-junit-sampler.md @@ -0,0 +1,230 @@ +--- +title: Deploy a JMeter JUnit sampler to test Elasticsearch performance +description: >- + How to use a JUnit sampler to generate and upload data to an Elasticsearch + cluster. +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.prev: implement-jmeter-test-plan +pnp.series.next: automated-resilience-tests +ms.assetid: 6824878e-ee95-4763-b3ef-58af9a7220d5 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +--- +# Deploy a JMeter JUnit sampler for testing performance +[!INCLUDE [header](../_includes/header.md)] + +This document describes how to create and use a JUnit sampler that can generate and upload data to an Elasticsearch cluster as part of a JMeter test plan. This approach provides a highly flexible approach to load testing that can generate large quantities of test data without depending on external data files. + +> [!NOTE] +> The load tests used to assess the performance of data ingestion described in [Tuning data ingestion performance for Elasticsearch](./data-ingestion-performance.md) were constructed using this approach. The details of the JUnit code are described in that document. +> +> + +For testing data ingestion performance, the JUnit code was developing using Eclipse (Mars), and dependencies were resolved using Maven. The following procedures describe the step by step process for installing Eclipse, configuring Maven, creating a JUnit test, and deploying this test as a JUnit Request sampler in a JMeter test. + +> [!NOTE] +> For detailed information on the structure and configuration of the test environment, see [Creating a performance testing environment for Elasticsearch on Azure][Creating a performance testing environment for Elasticsearch on Azure]. +> +> + +## Installing prerequisites +You will need the [Java Runtime Environment](http://www.java.com/en/download/ie_manual.jsp) on your development machine. +You will also need to install the [Eclipse IDE for Java Developers](https://www.eclipse.org/downloads/index.php?show_instructions=TRUE). + +> [!NOTE] +> If you are using the JMeter master VM described in [Creating a performance testing environment for Elasticsearch on Azure][Creating a performance testing environment for Elasticsearch on Azure] as your development environment, download the Windows 32 Bit version of the Eclipse Installer. +> +> + +## Creating a JUnit test project for load testing Elasticsearch +Start the Eclipse IDE if it is not already running, and then close the **Welcome** page. On the **File** menu, +click, and then click **Java Project**. + +![Eclipse menu](./images/jmeter-deploy7.png) + +In the **New Java Project** window, enter a project name, select **Use default JRE**, and then click **Finish**. + +![New Java Project dialog](./images/jmeter-deploy8.png) + +In the **Package Explorer** window, expand the node named after your project. Verify that it contains a +folder named **src** and a reference to the JRE you specified. + +![Eclipse Package Explorer](./images/jmeter-deploy9.png) + +Right-click the **src** folder, click **New**, and then click **JUnit Test Case**. + +![New JUnit Test Case menu item](./images/jmeter-deploy10.png) + +In the **New JUnit Test Case** window, select **New Junit 4 test**, enter a name for the package (this can +be the same as the name of the project, although by convention it should start with a lower case letter), +a name for the test class, and select the options that generate the method stubs required for your test. +Leave the **Class under test** box empty, and then click **Finish**. + +![New JUnit Test Case dialog](./images/jmeter-deploy11.png) + +If the following **New JUnit Test Case** dialog box appears, select the option to add the JUnit 4 library +to the build path and then click **OK**. + +![New JUnit Test Case dialog](./images/jmeter-deploy12.png) + +Verify that the skeleton code for the JUnit test is generated and displayed in the Java editor window. + +![Java editor window](./images/jmeter-deploy13.png) + +In the **Package Explorer**, right-click the node for your project, click **Configure**, and then click +**Convert to Maven Project**. + +> [!NOTE] +> Using Maven enables you to more easily manage external dependencies (such as the +> Elasticsearch Java client libraries) a project depends on. +> +> + +![Convert to Maven Project menu item](./images/jmeter-deploy14.png) + +In the **Create new POM** dialog box, in the **Packaging** drop-down list, select **jar**, and then click +**Finish**. + +![Create new POM dialog](./images/jmeter-deploy15.png) + +The pane that appears below the project object model (POM) editor might display the warning "Build path specifies execution environment J2SE-1.5. There are no JREs installed in the workspace that are strictly compatible with this environment", depending on which version of Java is installed on your development machine. If you have a version of Java that is later than version 1.5 you can safely ignore this warning. + +![Eclipse warnings](./images/jmeter-deploy16.png) + +In the POM editor, expand **Properties** and then click **Create**. + +![POM editor](./images/jmeter-deploy17.png) + +In the **Add Property** dialog box, in the **Name** box type *es.version*, in the **Value** box type *1.7.2*, +and then click **OK**. This is the version of the Elasticsearch Java client library to use (this version +may be superseded in the future, and defining the version as a POM property and referencing this property elsewhere within the project enables the version to be changed quickly.) + +![Add Property dialog](./images/jmeter-deploy18.png) + +Click the **Dependencies** tab at the base of the POM editor, and then click **Add** next to the **Dependencies** +list. + +![Dependencies tab](./images/jmeter-deploy19.png) + +In the **Select Dependency** dialog box, in the **Group Id** box type *org.elasticsearch*, in the **Artifact Id** +box type *elasticsearch*, in the **Version** box type *\${es.version}*, and then click **OK**. Information +about the Java Elasticsearch client library is held in the online Maven Central repository, and this +configuration will automatically download the library and its dependencies when the project is built. + +![Select Dependency dialog](./images/jmeter-deploy20.png) + +On **File** menu, click **Save All**. This action will save and build the project, downloading the +dependencies specified by Maven. Verify that the Maven Dependencies folder appears in Package Explorer. +Expand this folder to view the jar files downloaded to support the Elasticsearch Java client library. + +![Package Explorer](./images/jmeter-deploy21.png) + +## Importing an existing JUnit test project into Eclipse +This procedure assumes that you have downloaded a Maven project that was previously created by using +Eclipse. + +Start the Eclipse IDE. On the **File** menu, click **Import**. + +![Import menu item](./images/jmeter-deploy22.png) + +In the **Select** window, expand the **Maven** folder, click **Existing Maven Projects**, and then click **Next**. + +![Import menu](./images/jmeter-deploy23.png) + +In the **Maven Projects** window, specify the folder holding the project (the folder containing the +pom.xml file), click **Select All**, and then click **Finish**. + +![Import Maven Projects dialog](./images/jmeter-deploy24.png) + +In the **Package Explorer** window, expand the node corresponding to your project. Verify that the project +contains a folder named **src**. This folder contains the source code for the JUnit test. The project can +be compiled and deployed following the instructions below. + +![Package explorer](./images/jmeter-deploy25.png) + +## Deploying a JUnit test to JMeter +This procedure assumes you have created a project named LoadTest containing a JUnit Test class named +`BulkLoadTest.java` that accepts configuration parameters passed in as a single string to a +constructor (this is the mechanism that JMeter expects). + +In the Eclipse IDE, in **Package Explorer**, right-click the project node, and then click **Export**. + +![Export menu item](./images/jmeter-deploy26.png) + +In the **Export Wizard**, on the **Select** page, expand the **Java** node, click **JAR file**, and then click +**Next**. + +![Export dialog](./images/jmeter-deploy27.png) + +On the **JAR File Specification** page, in the **Select the resources to export** box, expand the project +deselect **.project**, and deselect **pom.xml**. In the **JAR file** box, provide a file name and location +for the JAR (it should be given the .jar file extension), and then click **Finish**. + +![JAR Export dialog](./images/jmeter-deploy28.png) + +Using Windows Explorer, copy the JAR file you have just created to the JMeter master JVM and save it +in the apache-jmeter-2.13\\lib\\junit folder underneath the folder where you have installed JMeter +(see the procedure "Creating the JMeter master virtual machine" in [Creating a performance testing environment for Elasticsearch on Azure](./performance-testing-environment.md) for more information.) + +Return to Eclipse, expand the **Package Explorer** window and make a note of all the JAR files and their +locations listed in the Maven Dependencies folder for the project. Note that the files displayed +in the following image might vary, depending on which version of Elasticsearch you are using: + +![Package Explorer](./images/jmeter-deploy29.png) + +Using Windows Explorer, copy each JAR file referenced in the Maven Dependencies folder to the +apache-jmeter-2.13\\lib\\junit folder on the JMeter master VM. + +If the lib\\junit folder already contains older versions of these JAR files then remove them. +If you leave them in place then the JUnit test might not work as references could be resolved to the +wrong JARs. + +On the JMeter master VM, stop JMeter if is currently running. Start JMeter. In JMeter, right-click +**Test Plan**, click **Add**, click **Threads (Users)**, and then click **Thread Group**. + +![Thread Group menu item](./images/jmeter-deploy30.png) + +Under the **Test Plan** node, right-click **Thread Group**, click **Add**, click **Sampler**, and then click +**JUnit Request**. + +![JUnit Request menu item](./images/jmeter-deploy31.png) + +On the **JUnit Request** page, select **Search for JUnit4 annotations (instead of JUnit 3)**. In the +**Classname** drop-down list, select your JUnit load test class (it will be listed in the form +*<package>.<class>*), in the **Test Method** drop-down list select the JUnit test method +(this is the method that actually performs the work associated with the test and should have been marked +with the *@test* annotation in the Eclipse project), and enter any values to be passed to the constructor +in the **Constructor String Label** box. The details shown in the following image are just examples; +your **Classname**, **Test Method*, and **Constructor String Label** will probably differ from those shown. + +![JUnit Request UI](./images/jmeter-deploy32.png) + +If your class does not appear in the **Classname** drop-down list, it probably means that the JAR was +not exported properly or has not been placed in the lib\\junit folder, or some of the dependent JARs +are missing from the lib\\junit folder. If this occurs, export the project from Eclipse again and +ensure that you have selected the **src** resource, copy the JAR to the lib\\junit folder, and then +verify that you have copied all of the dependent JARs listed by Maven to the lib folder. + +Close JMeter. There is no need to save the test plan. Copy the JAR file containing the JUnit test class +to the /home/<username>/apache-jmeter-2.13/lib/junit folder on each of the JMeter subordinate +VMs (*<username>* is the name of the administrative user you specified when you created the VM, +see the procedure "Creating the JMeter subordinate virtual machines" in [Creating a performance testing environment for Elasticsearch on Azure](./performance-testing-environment.md) for more information.) + +Copy the dependent JAR files required by the JUnit test class to the +/home/<username>/apache-jmeter-2.13/lib/junit folder on each of the JMeter subordinate VMs. +Make sure to remove any older versions of JAR files from this folder first. + +You can use the `pscp` utility to copy files from a Windows computer to Linux. + +[Creating a Performance Testing Environment for Elasticsearch on Azure]: ./performance-testing-environment.md diff --git a/docs/elasticsearch/jmeter-test-plan.md b/docs/elasticsearch/jmeter-test-plan.md new file mode 100644 index 00000000000..07d7e1bdbe3 --- /dev/null +++ b/docs/elasticsearch/jmeter-test-plan.md @@ -0,0 +1,161 @@ +--- +title: Implement a JMeter test plan for Elasticsearch +description: How to run performance tests for Elasticsearch with JMeter. +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.prev: performance-testing-environment +pnp.series.next: deploy-jmeter-junit-sampler +ms.assetid: 6901e64f-267d-473d-8478-d8c8b94ea106 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +--- +# Implement a JMeter test plan +[!INCLUDE [header](../_includes/header.md)] + +The performance tests conducted against Elasticsearch were implemented using JMeter test plans +together with Java code incorporated as a JUnit test for performing tasks such as uploading data into +the cluster. The test plans and JUnit code are described in [Tuning data ingestion performance for Elasticsearch on Azure][Tuning data ingestion performance for Elasticsearch on Azure], and +[Tuning data aggregation and query performance for Elasticsearch on Azure][Tuning data aggregation and query performance for Elasticsearch on Azure]. + +The purpose of this document is to summarize the key experience gained from constructing and running +these test plans. The [JMeter Best Practices](http://jmeter.apache.org/usermanual/best-practices.html) +page on the Apache JMeter website contains more generalized advice on using JMeter effectively. + +## Implementing a JMeter test plan +The following list summarizes the items you should consider when creating a JMeter test plan: + +* Create a separate thread group for each test you wish to perform. A test can contain multiple steps, + including logic controllers, timers, pre- and post-processors, samplers, and listeners. +* Avoid creating too many threads in a thread group. An excessive number of threads will cause + JMeter to fail with "Out of memory" exceptions. It is better to add more JMeter subordinate servers + each running a smaller number of threads than attempt to run a large number of threads on a single + JMeter server. + +![Thread group UI](./images/jmeter-testing1.png) + +* To assess the performance of the cluster, incorporate the + [Perfmon Metrics Collector](http://jmeter-plugins.org/wiki/PerfMon/) plugin into the test plan. This is a JMeter listener that is available as one of the standard JMeter plugins. Save the raw performance data to a set of files in comma-separated values (CSV) format and process them when the test is complete. This is more efficient and imposes less strain on JMeter than attempting to process the data as it is captured. + +![PerfMon Metrics Collector UI](./images/jmeter-testing2.png) + +You can use a tool such Excel to import the data and generate a range of graphs for analytical purposes. + +Consider capturing the following information: + +* CPU utilization for every node in the Elasticsearch cluster. +* The number of bytes read per second from disk for every node. +* If possible, the percentage of CPU time spent waiting for I/O to be performed on each node. + This is not always possible for Windows VMs, but for Linux you can create a custom metric (an EXEC metric) + that runs the following shell command to invoke *vmstat* on a node: + +```Shell +sh:-c:vmstat 1 5 | awk 'BEGIN { line=0;total=0;}{line=line+1;if(line>1){total=total+\$16;}}END{print total/4}' +``` + +Field 16 in the output from *vmstat* contains the CPU time spent waiting for I/O. For more information +about how this statement works, see the [vmstat command](http://linuxcommand.org/man_pages/vmstat8.html). + +* The number of bytes sent and received across the network to each node. +* Use separate Aggregate Report listeners to record the performance and frequency of successful and + failed operations. Capture success and failure data to different files. + +![Aggregate Report UI](./images/jmeter-testing3.png) + +* Keep each JMeter test case as simple as possible to enable you to directly correlate performance + with specific test actions. For test cases that require complex logic, consider encapsulating + this logic in a JUnit test and use the JUnit Request sampler in JMeter to run the test. +* Use the HTTP Request sampler to perform HTTP operations, such as GET, POST, PUT, or DELETE. + For example, you can run Elasticsearch searches by using a POST query and providing the query + details in the *Body Data* box: + +![HTTP Request UI](./images/jmeter-testing4.png) + +* For ease of repeatability and reuse, parameterize test JMeter test plans. You can then use + scripting to automate the running of test plans. + +## Implementing a JUnit test +You can incorporate complex code into a JMeter test plan by creating one or more JUnit tests. +You can write a JUnit test by using a Java integrated development environment (IDE) such as Eclipse. [Deploying a JMeter JUnit sampler for testing Elasticsearch performance][Deploying a JMeter JUnit sampler for testing Elasticsearch performance] +provides information on how to set up an appropriate development environment. + +The following list summarizes some best practices you should follow when writing the code for a +JUnit test: + +* Use the test class constructor to pass initialization parameters into the test. JMeter can use a + constructor that takes a single string argument. In the constructor, parse this argument into its + individual elements, as shown by the following code example: + +```Java +private String hostName = ""; +private String indexName = ""; +private String typeName = ""; +private int port = 0; +private String clusterName = ""; +private int itemsPerBatch = 0; + +/\* JUnit test class constructor \*/ +public ElasticsearchLoadTest2(String params) { + /* params is a string containing a set of comma separated values for: + hostName + indexName + typeName + port + clustername + itemsPerBatch + */ + + /* Parse the parameter string into an array of string items */ + String delims = "\[ \]\*,\[ \]\*"; // comma surrounded by zero or more spaces + String\[\] items = params.split(delims); + + /* Note: Parameter validation code omitted */ + + /* Use the parameters to populate variables used by the test */ + hostName = items[0]; + indexName = items[1]; + typeName = items[2]; + port = Integer.parseInt(items[3]); + clusterName = items[4]; + itemsPerBatch = Integer.parseInt(items[5]); + + if(itemsPerBatch == 0) + itemsPerBatch = 1000; +} +``` + +* Avoid I/O operations or other time-consuming operations in the constructor or setup test class, because they execute each time the JUnit test runs. (The same JUnit test can run many thousands of times for each performance test executed from JMeter.) +* Consider using one-time setup for expensive test case initialization. +* If the test requires a large number of input parameters, store test configuration information + in a separate configuration file and pass the location of this file into the constructor. +* Avoid hard coding file paths in the load test code. These can cause failures due to differences + between operating systems such as Windows and Linux. +* Use assertions to indicate failures in JUnit test methods so that you can track them with + JMeter and use them as a business metrics. If possible, pass back information concerning the cause + of the failure, as shown in bold in the following code example: + +```Java +@Test +public void bulkInsertTest() throws IOException { + ... + BulkResponse bulkResponse = bulkRequest.execute().actionGet(); + assertFalse( + bulkResponse.buildFailureMessage(), bulkResponse.hasFailures()); + ... +} +``` + + +[Running Elasticsearch on Azure]: index.md +[Tuning Data Ingestion Performance for Elasticsearch on Azure]: data-ingestion-performance.md +[Deploying a JMeter JUnit Sampler for Testing Elasticsearch Performance]: jmeter-junit-sampler.md +[Tuning Data Aggregation and Query Performance for Elasticsearch on Azure]: data-aggregation-and-query-performance.md diff --git a/docs/elasticsearch/performance-testing-environment.md b/docs/elasticsearch/performance-testing-environment.md new file mode 100644 index 00000000000..1eb8d7b9bc4 --- /dev/null +++ b/docs/elasticsearch/performance-testing-environment.md @@ -0,0 +1,501 @@ +--- +title: Create a performance testing environment for Elasticsearch +description: >- + How to set up an environment for testing the performance of an Elasticsearch + cluster. +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.prev: resilience-and-recovery +pnp.series.next: implement-jmeter-test-plan +ms.assetid: fc696fdd-f50f-49b4-8263-f0ef077febd7 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +--- +# Create a performance testing environment +[!INCLUDE [header](../_includes/header.md)] + +This document describes how to set up an environment for testing the performance of an Elasticsearch +cluster. This configuration was used to test the performance of data ingestion and query workloads, +as described in [Tuning data ingestion performance for Elasticsearch on Azure][Tuning data ingestion performance for Elasticsearch on Azure]. + +The performance testing process used [Apache JMeter](http://jmeter.apache.org/), with the +[standard set](http://jmeter-plugins.org/wiki/StandardSet/) of plugins installed in a master/subordinate +configuration using a set of dedicated VMs (not part of the Elasticsearch cluster) specifically configured +for the purpose. + +The [PerfMon Server Agent](http://jmeter-plugins.org/wiki/PerfMonAgent/) was installed on each +Elasticsearch node. The following sections provide instructions for recreating the test environment to +enable you to conduct your own performance testing with JMeter. These instructions assume that you have +already created an Elasticsearch cluster with nodes connected using an Azure virtual network. + +Note that the test environment also runs as a set of Azure VMs managed by using a single Azure +resource group. + +[Marvel](https://www.elastic.co/products/marvel) was also installed and configured to enable the +internal aspects of the Elasticsearch cluster to be monitored and analyzed more easily. +If the JMeter statistics showed a peak or trough in performance, information available through Marvel +can be invaluable to help determine the cause of the fluctuations. + +The following image shows the structure of the entire system. + +![Elasticsearch architecture](./images/performance-structure.png) + +Note the following points: + +* The JMeter master VM runs Windows Server to provide the GUI environment for the JMeter console. + The JMeter Master VM provides the GUI (the *jmeter* application) to enable a tester to create tests, + run tests, and visualize the results. This VM coordinates with the JMeter server VMs that actually + send the requests that constitute the tests. +* The JMeter subordinate VMs run Ubuntu Server (Linux), there is no GUI requirement for these VMs. + The JMeter server VMs run the JMeter server software (the *jmeter-server* application) to send + requests to the Elasticsearch cluster. +* Dedicated client nodes were not used, although dedicated master nodes were. +* The number of data nodes in the cluster can vary, depending on the scenario being tested. +* All nodes in the Elasticsearch cluster run Marvel to observe performance at runtime, and the + JMeter Server Agent to collect monitoring data for later analysis. +* When testing Elasticsearch 2.0.0 and later, one of the data nodes also runs Kibana. This is + required by the version of Marvel that runs on Elasticsearch 2.0.0 and later. + +## Creating an Azure resource group for the virtual machines +The JMeter master needs to be able to connect directly to each of the nodes in the Elasticsearch cluster +to gather performance data. If the JMeter VNet is distinct from the Elasticsearch cluster VNet, then this +entails configuring each Elasticsearch node with a public IP address. If this is a problem with your +Elasticsearch configuration, then consider implementing the JMeter VMs in the same VNet as the +Elasticsearch cluster by using the same resource group, in which case you can omit this first procedure. + +First, [create a resource group](/azure/azure-resource-manager/resource-group-template-deploy-portal/). +This document assumes that your resource group is named *JMeterPerformanceTest*. If you wish to run the +JMeter VMs in the same VNet as the Elasticsearch cluster, use the same resource group as that cluster +instead of creating a new one. + +## Creating the JMeter master virtual machine +Next [create a Windows VM](/azure/virtual-machines/virtual-machines-windows-hero-tutorial/?toc=%2fazure%2fvirtual-machines%2fwindows%2ftoc.json) using the +*Windows Server 2008 R2 SP1* image. We recommend selecting a VM size with sufficient cores and memory +to run the performance tests. Ideally this will be a machine with at least 2 cores and 3.5GB of RAM +(A2 Standard or bigger). + + + +We recommend that you disable the diagnostics. When creating the VM in the portal, this is done on the +*Settings* blade in the *Monitoring* section under *Diagnostics*. Leave the other settings at their +default values. + +Verify that the VM and all the associated resources have been created successfully by +[examining the resource group](/azure/azure-resource-manager/resource-group-portal/#manage-resource-groups) in the portal. +The resources listed should consist of a VM, a network security group, and a public IP address all with +the same name, and network interface and storage account with names based on that of the VM. + +## Creating the JMeter subordinate virtual machines +Now [create a Linux VM](/azure/virtual-machines/virtual-machines-linux-quick-create-portal/?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json) using the +*Ubuntu Server 14.04 LTS* image. As with the JMeter master VM, select a VM size with sufficient cores +and memory to run the performance tests. Ideally this will be a machine with at least 2 cores, and at +least 3.5GB of RAM (Standard A2 or bigger). + +Again, we recommend that you disable the diagnostics. + +You can create as many subordinate VMs as you wish. + +## Installing JMeter server on the JMeter subordinate VMs +The JMeter subordinate VMs are running Linux and by default you cannot connect to them by opening a +remote desktop connection (RDP). Instead, you can +[use PuTTY to open a command line window](/azure/virtual-machines/virtual-machines-linux-mac-create-ssh-keys/?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json) on each VM. + +Once you've connected to one of the subordinate VMs, we'll use Bash to setup JMeter. + +First, install the Java Runtime Environment required to run JMeter. + +```bash +sudo add-apt-repository ppa:webupd8team/java +sudo apt-get update +sudo apt-get install oracle-java8-installer +``` + +Now, download the JMeter software packaged as a zip file. + +```bash +wget http://apache.mirror.anlx.net/jmeter/binaries/apache-jmeter-2.13.zip +``` + +Install the unzip command, then use it to expand the JMeter software. The software is copied to a folder +named **apache-jmeter-2.13**. + +```bash +sudo apt-get install unzip +unzip apache-jmeter-2.13.zip +``` + +Change to the *bin* directory holding the JMeter executables, and make the *jmeter-server* and +*jmeter* programs executable. + +```bash +cd apache-jmeter-2.13/bin +chmod u+x jmeter-server +chmod u+x jmeter +``` + +Now, we need to edit the file `jmeter.properties` located in the current folder (use the text +editor you're most familiar with, such as *vi* or *vim*). Locate the following lines: + +```yaml +... +client.rmi.localport=0 +... +server.rmi.localport=4000 +... +``` + +Uncomment (remove the leading \## characters) and modify these lines as shown below, then save the +file and close the editor: + +```yaml +... +client.rmi.localport=4441 +... +server.rmi.localport=4440 +``` + +Now, run the following commands to open port 4441 to incoming TCP traffic (this is the port you have just configured *jmeter-server* to listen on): + +```bash +sudo iptables -A INPUT -m state --state NEW -m tcp -p tcp --dport 4441 -j ACCEPT +``` + +Download the zip file containing the standard collection of plugins for JMeter (these plugins provide +performance monitoring counters) and then unzip the file to the **apache-jmeter-2.13** folder. Unzipping the file in this location places +the plugins in the correct folder. + +If you are prompted to replace the LICENSE file, type A (for all): + +```bash +wget http://jmeter-plugins.org/downloads/file/JMeterPlugins-Standard-1.3.0.zip +unzip JMeterPlugins-Standard-1.3.0.zip +``` + +Use `nohup` to launch the JMeter server in the background. It should respond by displaying a +process ID and a message indicating that it has created a remote object and is ready to start +receiving commands. Run the following command in the ~/apache-jmeter-2.13/bin directory. + +```bash +nohup jmeter-server & +``` + +> [!NOTE] +> If the VM is shutdown then the JMeter server program is terminated. You will need to +> connect to the VM and restart it again manually. Alternatively, you can configure the system to run +> the *jmeter-server* command automatically on startup by adding the following commands to the +> `/etc/rc.local` file (before the *exit 0* command): +> +> + +```bash +sudo -u bash << eoc +cd /home//apache-jmeter-2.13/bin +nohup ./jmeter-server & +eoc +``` + +Replace `` with your login name. + +You may find it useful to keep the terminal window open so that you can monitor the progress of the +JMeter server while testing is in progress. + +You will need to repeat these steps for each JMeter subordinate VM. + +## Installing the JMeter Server Agent on the Elasticsearch nodes +This procedure assumes that you have login access to the Elasticsearch nodes. If you have created the +cluster using the Resource Manager template, you can connect to each node through the jump box VM, as illustrated +in the Elasticsearch topology section of [Running Elasticsearch on Azure](./index.md). You can connect to the jump box using PuTTY as well. + +From there, you can use the *ssh* command to log in to each of the nodes in the Elasticsearch cluster. + +Log in to one of the Elasticsearch nodes as an administrator. At the Bash command prompt, enter the +following commands to create a folder for holding the JMeter Server Agent and move to that folder: + +```bash +mkdir server-agent +cd server-agent +``` + +Run the following commands to install the *unzip* command (if it is not already installed), +download the JMeter Server Agent software, and unzip it: + +```bash +sudo apt-get install unzip +wget http://jmeter-plugins.org/downloads/file/ServerAgent-2.2.1.zip +unzip ServerAgent-2.2.1.zip +``` + +Run the following command to configure the firewall and enable TCP traffic to pass through +port 4444 (this is the port used by the JMeter Server Agent): + +```bash +sudo iptables -A INPUT -m state --state NEW -m tcp -p tcp --dport 4444 -j ACCEPT +``` + +Run the following command to start the JMeter Server Agent in the background: + +```bash +nohup ./startAgent.sh & +``` + +The JMeter Server Agent should respond with messages indicating that it has started and is +listening on port 4444. Press Enter to obtain a command prompt, and then run the following command. + +```bash +telnet 4444 +``` + +Replace `` with the name of your node. (You can find the name of your node by running the `hostname` command.) This command opens a telnet connection to port 4444 on your local machine. You can use this +connection to verify that the JMeter Server Agent is running correctly. + +If the JMeter Server Agent is not running, you will receive the response + +`*telnet: Unable to connect to remote host: Connection refused*.` + +If the JMeter Server Agent is running and port 4444 has been configured correctly, you should see +the following response: + +![JMeter Server Agent](./images/performance-telnet-server.png) + +> [!NOTE] +> The telnet session does not provide any sort of prompt once it has connected. +> +> + +In the telnet session, type the following command: + +``` +test +``` + +If the JMeter Server Agent is configured and listening correctly, it should indicate that it +received the command and respond with the message *Yep*. + +> [!NOTE] +> You can type in other commands to obtain performance monitoring data. For example, +> the command `metric-single:cpu:idle` will give you the current proportion of the time that the CPU +> is idle (this is a snapshot). For a complete list of commands, visit the +> [PerfMon Server Agent](http://jmeter-plugins.org/wiki/PerfMonAgent/) page. : Back to calling it he Perfmon Server Agent.>> +> +> + +In the telnet session, type the following command to quit the session and return to the Bash +command prompt: + +``` +exit +``` + +> [!NOTE] +> As with the JMeter subordinate VMs, if you log out, or if this machine is shutdown +> and restarted then the JMeter Server Agent will need to be restarted manually by using the +> `startAgent.sh` command. If you want the JMeter Server Agent to start automatically, add the +> following command to the end of the `/etc/rc.local` file, before the *exit 0* command. +> Replace `` with your login name: +> +> + +```bash +sudo -u bash << eoc +cd /home//server-agent +nohup ./startAgent.sh & +eoc +``` + +You can now either repeat this entire process for every other node in the Elasticsearch cluster, or +you can use the `scp` command to copy the server-agent folder and contents to every +other node and use the `ssh` command start the JMeter Server Agent as shown below. e +Replace `` with your username, and `` with the name of the node where you +wish to copy and run the software (you may be asked to provide your password as you run each command): + +```bash +scp -r \~/server-agent @:\~ +ssh sudo iptables -A INPUT -m state --state NEW -m tcp -p tcp --dport 4444 -j ACCEPT +ssh -n -f 'nohup \~/server-agent/startAgent.sh' +``` + +## Installing and configuring JMeter on the JMeter master VM +In the Azure portal, click **Resource groups**. In the **Resource groups** blade, click the resource group containing the JMeter master and subordinate VMs. In the **Resource group** blade, click the **JMeter master VM**. In the virtual machine blade, on the toolbar, click **Connect**. Open the RDP file when prompted by the web +browser. Windows creates a remote desktop connection to your VM. Enter the username and password for the VM when prompted. + +In the VM, using Internet Explorer, go to the [Download Java for Windows](http://www.java.com/en/download/ie_manual.jsp) +page. Follow the instructions to download and run the Java installer. + +In the web browser, go to the [Download Apache JMeter](http://jmeter.apache.org/download_jmeter.cgi) +page and download the zip containing the most recent binary. Save the zip in a convenient location on your VM. + +Go to the [Custom JMeter Plugins](http://jmeter-plugins.org/) site and download the Standard Set of plugins. +Save the zip in the same folder as the JMeter download from the previous step. + +In Windows Explorer, go to the folder containing the apache-jmeter-*xxx* zip file, where *xxx* is the +current version of JMeter. Extract the files into the current folder. + +Extract the files in the JMeterPlugins-Standard-*yyy*.zip file, where *yyy* is the current version of the plugins, into the apache-jmeter-*xxx* folder. This will add the plugins to the correct folder for +JMeter. You can safely merge the lib folders, and overwrite the license and readme files if prompted. + +Go to the apache-jmeter-*xxx*/bin folder and edit the jmeter.properties file using Notepad. In the +`jmeter.properties` file, find the section labelled *Remote hosts and RMI configuration*. In this +section of the file, find the following line: + +```yaml +remote_hosts=127.0.0.1 +``` + +Change this line and replace the IP address 127.0.0.1 with a comma separated list of IP addresses or +host names for each of the JMeter subordinate servers. For example: + +```yaml +remote_hosts=JMeterSub1,JMeterSub2 +``` + +Find the following line, then remove the `#` character at the start of this line, and modify the value +of the client.rmi.localport settings from: + +```yaml +#client.rmi.localport=0 +``` + +to: + +```yaml +client.rmi.localport=4440 +``` + +Save the file and close Notepad. + +In the Windows toolbar, click **Start**, click **Administrative Tools**, and then click +**Windows Firewall with Advanced Security**. In the Windows Firewall with Advanced Security window, +in the left pane, right-click **Inbound Rules**, and then click **New Rule**. + +In the **New Inbound Rule Wizard**, on the **Rule Type** page, select **Port**, and then click **Next**. On +the Protocols and Ports page, select **TCP**, select **Specific local ports**, in the text box type +`4440-4444`, and then click **Next**. On the Action page, select **Allow the connection**, and then click **Next**. On the Profile page, leave all options checked and then click **Next**. On the Name page, in the **Name** text box type *JMeter*, and then click **Finish**. Close the Windows Firewall with Advanced Security window. + +In Windows Explorer, in the apache-jmeter-*xx*/bin folder, double-click the *jmeter* Windows batch +file to start the GUI. The user interface should appear: + +![Apache JMeter UI](./images/performance-image17.png) + +In the menu bar, click **Run**, click **Remote Start**, and verify that the two JMeter subordinate +machines are listed: + +![Remote Start menu item](./images/performance-image18.png) + +You are now ready to begin performance testing. + +## Installing and configuring Marvel +The Elasticsearch Quickstart Template for Azure will install and configure the appropriate version of +Marvel automatically if you set the MARVEL and KIBANA parameters to true ("yes") when building the cluster: + +![Elasticsearch quickstart parameters](./images/performance-image19.png) + +If you are adding Marvel to an existing cluster you need to perform the installation manually, and the process is different depending on whether you are using Elasticsearch version 1.7.x or 2.x, as described in the following procedures. + +### Installing Marvel with Elasticsearch 1.73 or earlier +If you are using Elasticsearch 1.7.3 or earlier, perform the following steps *on every node* in the +cluster: + +* Log in to the node and move to the Elasticsearch home directory. On Linux, the typical home directory + is `/usr/share/elasticsearch`. +* Run the following command to download and install the Marvel plugin for Elasticsearch: + +```bash +sudo bin/plugin -i elasticsearch/marvel/latest +``` + +* Stop and restart Elasticsearch on the node: + +```bash +sudo service elasticsearch restart +``` + +* To verify that Marvel was installed correctly, open a web browser and go to the + URL `http://:9200/_plugin/marvel`. Replace `` with the name or IP address of + any Elasticsearch server in the cluster. Verify that a page similar to that shown below appears: + +![Marvel UI](./images/performance-image20.png) + +### Installing Marvel with Elasticsearch 2.0.0 or later +If you are using Elasticsearch 2.0.0 or later, perform the following tasks *on every node* in the cluster: + +Log in to the node and move to the Elasticsearch home directory (typically `/usr/share/elasticsearch`) Run +the following commands to download and install the Marvel plugin for Elasticsearch: + +```bash +sudo bin/plugin install license +sudo bin/plugin install marvel-agent +``` + +Stop and restart Elasticsearch on the node: + +```bash +sudo service elasticsearch restart +``` + +In the following procedure, replace `` with 4.2.2 if you are using Elasticsearch 2.0.0 +or Elasticsearch 2.0.1, or with 4.3.1 if you are using Elasticsearch 2.1.0 or later. Replace +`` with 2.0.0 if you are using Elasticsearch 2.0.0 or Elasticsearch 2.0.1, or +with 2.1.0 if you are using Elasticsearch 2.1.0 or later. Perform the following tasks *on one node* +in the cluster: + +Log in to the node and download the appropriate build of Kibana for your version of Elasticsearch +from the [Elasticsearch download web site](https://www.elastic.co/downloads/past-releases), then extract +the package: + +```bash +wget https://download.elastic.co/kibana/kibana/kibana--linux-x64.tar.gz +tar xvzf kibana--linux-x64.tar.gz +``` + +Open port 5601 to accept incoming requests: + +```bash +sudo iptables -A INPUT -m state --state NEW -m tcp -p tcp --dport 5601 -j ACCEPT +``` + +Move to the Kibana config folder (`kibana--linux-x64/config`), edit the `kibana.yml` +file, and add the following line. Replace `` with the name or IP address of an Elasticsearch +server in the cluster: + +```yaml +elasticsearch.url: "http://:9200" +``` + +Move to the Kibana bin folder (`kibana--linux-x64/bin`), and run the following +command to integrate the Marvel plugin into Kibana: + +```bash +sudo ./kibana plugin --install elasticsearch/marvel/ +``` + +Start Kibana: + +```bash +sudo nohup ./kibana & +``` + +To verify the Marvel installation, open a web browser and go to the URL +`http://:5601/app/marvel`. Replace `` with the name or IP address +of the server running Kibana. + +Verify that a page similar to that shown below appears (the name of your cluster will likely +vary from that shown in the image). + +![Marvel](./images/performance-image21.png) + +Click the link that corresponds to your cluster (elasticsearch210 in the image above). A page +similar to that shown below should appear: + +![Elasticsearch cluster](./images/performance-image22.png) + +[Tuning Data Ingestion Performance for Elasticsearch on Azure]: ./data-ingestion-performance.md diff --git a/docs/elasticsearch/resilience-and-recovery.md b/docs/elasticsearch/resilience-and-recovery.md new file mode 100644 index 00000000000..3816b5d0b8d --- /dev/null +++ b/docs/elasticsearch/resilience-and-recovery.md @@ -0,0 +1,325 @@ +--- +title: Configure resilience and recovery on Elasticsearch on Azure +description: Considerations related to resiliency and recovery for Elasticsearch. +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +editor: '' +tags: '' +pnp.series.title: Elasticsearch on Azure +pnp.series.prev: data-aggregation-and-query-performance +pnp.series.next: performance-testing-environment +ms.assetid: 2da4d716-5bba-4ae8-bedf-d40c49f4c2c7 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 09/22/2016 +ms.author: masashin +--- +# Configure resilience and recovery +[!INCLUDE [header](../_includes/header.md)] + +A key feature of Elasticsearch is the support that it provides for resiliency in the event of node failures and/or network partition events. Replication is the most obvious way in which you can improve the resiliency of any cluster, enabling Elasticsearch to ensure that more than one copy of any data item is available on different nodes in case one node should become inaccessible. If a node becomes temporarily unavailable, other nodes containing replicas of data from the missing node can serve the missing data until the problem is resolved. In the event of a longer term issue, the missing node can be replaced with a new one, and Elasticsearch can restore the data to the new node from the replicas. + +Here we summarize the resiliency and recovery options available with Elasticsearch when hosted in Azure, and describe some important aspects of an Elasticsearch cluster that you should consider to minimize the chances of data loss and extended data recovery times. + +This article also illustrates some sample tests that were performed to show the effects of different types of failures on an Elasticsearch cluster, and how the system responds as it recovers. + +An Elasticsearch cluster uses replicas to maintain availability and improve read performance. Replicas should be stored on different VMs from the primary shards that they replicate. The intention is that if the VM hosting a data node fails or becomes unavailable, the system can continue functioning using the VMs holding the replicas. + +## Using dedicated master nodes +One node in an Elasticsearch cluster is elected as the master node. The purpose of this node is to perform cluster management operations such as: + +* Detecting failed nodes and switching over to replicas. +* Relocating shards to balance node workload. +* Recovering shards when a node is brought back online. + +You should consider using dedicated master nodes in critical clusters, and ensure that there are 3 dedicated nodes whose only role is to be master. This configuration reduces the amount of resource intensive work that these nodes have to perform (they do not store data or handle queries) and helps to improve cluster stability. Only one of these nodes will be elected, but the others will contain a copy of the system state and can take over should the elected master fail. + +## Controlling high availability with Azure – update domains and fault domains +Different VMs can share the same physical hardware. In an Azure datacenter, a single rack can host a number of VMs, and all of these VMs share a common power source and network switch. A single rack-level failure can therefore impact a number of VMs. Azure uses the concept of fault domains to try and spread this risk. A fault domain roughly corresponds to a group of VMs that share the same rack. To ensure that a rack-level failure does not crash a node and the nodes holding all of its replicas simultaneously, you should ensure that the VMs are distributed across fault domains. + +Similarly, VMs can be taken down by the [Azure Fabric Controller](https://azure.microsoft.com/documentation/videos/fabric-controller-internals-building-and-updating-high-availability-apps/) to perform planned maintenance and operating system upgrades. Azure allocates VMs to update domains. When a planned maintenance event occurs, only VMs in a single update domain are effected at any one time. VMs in other update domains are left running until the VMs in the update domain being updated are brought back online. Therefore, you also need to ensure that VMs hosting nodes and their replicas belong to different update domains wherever possible. + +> [!NOTE] +> For more information about fault domains and update domains, see [Manage the availability of virtual machines](/azure/virtual-machines/virtual-machines-linux-manage-availability/?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json). +> +> + +You cannot explicitly allocate a VM to a specific update domain and fault domain. This allocation is controlled by Azure when VMs are created. However, you can specify that VMs should be created as part of an availability set. VMs in the same availability set will be spread across update domains and fault domains. If you create VMs manually, Azure creates each availability set with two fault domains and five update domains. VMs are allocated to these fault domains and update domains, cycling round as further VMs are provisioned, as follows: + +| VM | Fault domain | Update domain | +| --- | --- | --- | +| 1 |0 |0 | +| 2 |1 |1 | +| 3 |0 |2 | +| 4 |1 |3 | +| 5 |0 |4 | +| 6 |1 |0 | +| 7 |0 |1 | + +> [!IMPORTANT] +> If you create VMs using the Azure Resource Manager, each availability set can be allocated up to 3 fault domains and 20 update domains. This is a compelling reason for using the Resource Manager. +> +> + +In general, place all VMs that serve the same purpose in the same availability set, but create different availability sets for VMs that perform different functions. With Elasticsearch this means that you should consider creating at least separate availability sets for: + +* VMs hosting data nodes. +* VMs hosting client nodes (if you are using them). +* VMs hosting master nodes. + +Additionally, you should ensure that each node in a cluster is aware of the update domain and fault domain it belongs to. This information can help to ensure that Elasticsearch does not create shards and their replicas in the same fault and update domains, minimizing the possibility of a shard and its replicas from being taken down at the same time. You can configure an Elasticsearch node to mirror the hardware distribution of the cluster by configuring [shard allocation awareness](https://www.elastic.co/guide/en/elasticsearch/reference/current/allocation-awareness.html#allocation-awareness). For example, you could define a pair of custom node attributes called *faultDomain* and *updateDomain* in the elasticsearch.yml file, as follows: + +```yaml +node.faultDomain: \${FAULTDOMAIN} +node.updateDomain: \${UPDATEDOMAIN} +``` + +In this case, the attributes are set using the values held in the *\${FAULTDOMAIN}* and *\${UPDATEDOMAIN}* environment variables when Elasticsearch is started. You also need to add the following entries to the Elasticsearch.yml file to indicate that *faultDomain* and *updateDomain* are allocation awareness attributes, and specify the sets of acceptable values for these attributes: + +```yaml +cluster.routing.allocation.awareness.force.updateDomain.values: 0,1,2,3,4 +cluster.routing.allocation.awareness.force.faultDomain.values: 0,1 +cluster.routing.allocation.awareness.attributes: updateDomain, faultDomain +``` + +You can use shard allocation awareness in conjunction with [shard allocation filtering](https://www.elastic.co/guide/en/elasticsearch/reference/2.0/shard-allocation-filtering.html#shard-allocation-filtering) to specify explicitly which nodes can host shards for any given index. + +If you need to scale beyond the number of fault domains and update domains in an availability set, you can create VMs in additional availability sets. However, you need to understand that nodes in different availability sets can be taken down for maintenance simultaneously. Try to ensure that each shard and at least one of its replicas are contained within the same availability set. + +> [!NOTE] +> There is currently a limit of 100 VMs per availability set. For more information, see [Azure subscription and service limits, quotas, and constraints](/azure/azure-subscription-service-limits/). +> +> + +### Backup and restore +Using replicas does not provide complete protection from catastrophic failure (such as accidentally deleting the entire cluster). You should ensure that you back up the data in a cluster regularly, and that you have a tried and tested strategy for restoring the system from these backups. + +Use the Elasticsearch [snapshot and restore APIs](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html) : Elastic doesn't cap these.>> to backup and restore indexes. Snapshots can be saved to a shared filesystem. Alternatively, plugins are available that can write snapshots to the Hadoop distributed file system (HDFS) (the [HDFS plugin](https://github.com/elasticsearch/elasticsearch-hadoop/tree/master/repository-hdfs)) or to Azure storage (the [Azure plugin](https://github.com/elasticsearch/elasticsearch-cloud-azure#azure-repository)). + +Consider the following points when selecting the snapshot storage mechanism: + +* You can use [Azure File storage](https://azure.microsoft.com/services/storage/files/) to implement a shared filesystem that is accessible from all nodes. +* Only use the HDFS plugin if you are running Elasticsearch in conjunction with Hadoop. +* The HDFS plugin requires you to disable the Java Security Manager running inside the Elasticsearch instance of the Java virtual machine (JVM). +* The HDFS plugin supports any HDFS-compatible file system provided that the correct Hadoop configuration is used with Elasticsearch. + +## Handling intermittent connectivity between nodes +Intermittent network glitches, VM reboots after routine maintenance at the datacenter, and other similar events can cause nodes to become temporarily inaccessible. In these situations, where the event is likely to be short lived, the overhead of rebalancing the shards occurs twice in quick succession (once when the failure is detected and again when the node become visible to the master) can become a significant overhead that impacts performance. You can prevent temporary node inaccessibility from causing the master to rebalance the cluster by setting the *delayed\_timeout* property of an index, or for all indexes. The example below sets the delay to 5 minutes: + +```http +PUT /_all/settings +{ + "settings": { + "index.unassigned.node_left.delayed_timeout": "5m" + } +} +``` + +For more information, see [Delaying allocation when a node leaves](https://www.elastic.co/guide/en/elasticsearch/reference/current/delayed-allocation.html). + +In a network that is prone to interruptions, you can also modify the parameters that configure a master to detect when another node is no longer accessible. These parameters are part of the [zen discovery](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-zen.html#modules-discovery-zen) module provided with Elasticsearch, and you can set them in the Elasticsearch.yml file. For example, the *discovery.zen.fd.ping.retries* parameter specifies how many times a master node will attempt to ping another node in the cluster before deciding that it has failed. This parameter defaults to 3, but you can modify it as follows: + +```yaml +discovery.zen.fd.ping_retries: 6 +``` + +## Controlling recovery +When connectivity to a node is restored after a failure, any shards on that node will need to be recovered to bring them up to date. By default, Elasticsearch recovers shards in the following order: + +* By reverse index creation date. Newer indexes are recovered before older indexes. +* By reverse index name. Indexes that have names that are alphanumerically greater than others will be restored first. + +If some indexes are more critical than others, but do not match these criteria you can override the precedence of indexes by setting the *index.priority* property. Indexes with a higher value for this property will be recovered before indexes that have a lower value: + +```http +PUT low_priority_index +{ + "settings": { + "index.priority": 1 + } +} + +PUT high_priority_index +{ + "settings": { + "index.priority": 10 + } +} +``` + +For more information, see [Index Recovery Prioritization](https://www.elastic.co/guide/en/elasticsearch/reference/2.0/recovery-prioritization.html#recovery-prioritization). + +You can monitor the recovery process for one or more indexes using the *\_recovery* API: + +```http +GET /high_priority_index/_recovery?pretty=true +``` + +For more information, see [Indices Recovery](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-recovery.html#indices-recovery). + +> [!NOTE] +> A cluster with shards that require recovery will have a status of *yellow* to indicate that not all shards are currently available. When all the shards are available, the cluster status should revert to *green*. A cluster with a status of *red* indicates that one or more shards are physically missing, it may be necessary to restore data from a backup. +> +> + +## Preventing split brain +A split brain can occur if the connections between nodes fail. If a master node becomes unreachable to part of the cluster, an election will take place in the network segment that remains contactable and another node will become the master. In an ill-configured cluster, it is possible for each part of the cluster to have different masters resulting in data inconsistencies or corruption. This phenomenon is known as a *split brain*. + +You can reduce the chances of a split brain by configuring the *minimum\_master\_nodes* property of the discovery module, in the elasticsearch.yml file. This property specifies how many nodes must be available to enable the election of a master. The following example sets the value of this property to 2: + +```yaml +discovery.zen.minimum_master_nodes: 2 +``` + +This value should be set to the lowest majority of the number of nodes that are able to fulfil the master role. For example, if your cluster has 3 master nodes, *minimum\_master\_nodes* should be set to 2. If you have 5 master nodes, *minimum\_master\_nodes* should be set to 3. Ideally, you should have an odd number of master nodes. + +> [!NOTE] +> It is possible for a split brain to occur if multiple master nodes in the same cluster are started simultaneously. While this occurrence is rare, you can prevent it by starting nodes serially with a short delay (5 seconds) between each one. +> +> + +## Handling rolling updates +If you are performing a software upgrade to nodes yourself (such as migrating to a newer release or performing a patch), you may need to perform work on individual nodes that requires taking them offline while keeping the remainder of the cluster available. In this situation, consider implementing the following process. + +1. Ensure that shard reallocation is delayed sufficiently to prevent the elected master from rebalancing shards from a missing node across the remainder of the cluster. By default, shard reallocation is delayed for 1 minute, but you can increase the duration if a node is likely to be unavailable for a longer period. The following example increases the delay to 5 minutes: + + ```http + PUT /_all/_settings + { + "settings": { + "index.unassigned.node_left.delayed_timeout": "5m" + } + } + ``` + + > [!IMPORTANT] + > You can also disable shard reallocation completely by setting the *cluster.routing.allocation.enable* of the cluster to *none*. However, you should avoid using this approach if new indexes are likely to be created while the node is offline as this can cause index allocation to fail resulting in a cluster with red status. + > + > +2. Stop Elasticsearch on the node to be maintained. If Elasticsearch is running as a service, you may be able to halt the process in a controlled manner by using an operating system command. The following example shows how to halt the Elasticsearch service on a single node running on Ubuntu: + + ```bash + service elasticsearch stop + ``` + + Alternatively, you can use the Shutdown API directly on the node: + + ```http + POST /_cluster/nodes/_local/_shutdown + ``` +3. Perform the necessary maintenance on the node +4. Restart the node and wait for it to join the cluster. +5. Re-enable shard allocation: + + ```http + PUT /_cluster/settings + { + "transient": { + "cluster.routing.allocation.enable": "all" + } + } + ``` + +> [!NOTE] +> If you need to maintain more than one node, repeat steps 2–4 on each node before re-enabling shard allocation. +> +> + +If you can, stop indexing new data during this process. This will help to minimize recovery time when nodes are brought back online and rejoin the cluster. + +Beware of automated updates to items such as the JVM (ideally, disable automatic updates for these items), especially when running Elasticsearch under Windows. The Java update agent can download the most recent version of Java automatically, but may require Elasticsearch to be restarted for the update to take effect. This can result in uncoordinated temporary loss of nodes, depending on how the Java Update agent is configured. This can also result in different instances of Elasticsearch in the same cluster running different versions of the JVM which may cause compatibility issues. + +## Testing and analyzing Elasticsearch resilience and recovery +This section describes a series of tests that were performed to evaluate the resilience and recovery of an Elasticsearch cluster containing three data nodes and three master nodes. + +The following scenarios were tested: + +* Node failure and restart with no data loss. A data node is stopped and restarted after 5 minutes. Elasticsearch was configured not to reallocate missing shards in this interval, so no additional I/O is incurred in moving shards around. When the node restarts, the recovery process brings the shards on that node back up to date. +* Node failure with catastrophic data loss. A data node is stopped and the data that it holds is erased to simulate catastrophic disk failure. The node is then restarted (after 5 minutes), effectively acting as a replacement for the original node. The recovery process requires rebuilding the missing data for this node, and may involve relocating shards held on other nodes. +* Node failure and restart with no data loss, but with shard reallocation. A data node is stopped and the shards that it holds are reallocated to other nodes. The node is then restarted and more reallocation occurs to rebalance the cluster. +* Rolling updates. Each node in the cluster is stopped and restarted after a short interval to simulate machines being rebooted after a software update. Only one node is stopped at any one time. Shards are not reallocated while a node is down. + +Each scenario was subject to the same workload including a mixture of data ingestion tasks, aggregations, and filter queries while nodes were taken offline and recovered. The bulk insert operations in the workload each stored 1000 documents and were performed against one index while the aggregations and filter queries used a separate index containing several millions documents. This was to enable the performance of queries to be assessed separately from the bulk inserts. Each index contained five shards and one replica. + +The following sections summarize the results of these tests, noting any degradation in performance while a node is offline or being recovered, and any errors that were reported. The results are presented graphically, highlighting the points at which one or more nodes are missing and estimating the time taken for the system to fully recover and achieve a similar level of performance that was present prior to the nodes being taken offline. + +> [!NOTE] +> The test harnesses used to perform these tests are available online. You can adapt and use these harnesses to verify the resilience and recoverability of your own cluster configurations. For more information, see [Running the automated Elasticsearch resiliency tests][Running the automated Elasticsearch resiliency tests]. +> +> + +## Node failure and restart with no data loss: results + + +The results of this test are shown in the file [ElasticsearchRecoveryScenario1.pdf](https://github.com/mspnp/azure-guidance/blob/master/figures/Elasticsearch/ElasticSearchRecoveryScenario1.pdf). The graphs show performance profile of the workload and physical resources for each node in the cluster. The initial part of the graphs show the system running normally for approximately 20 minutes, at which point node 0 is shut down for 5 minutes before being restarted. The statistics for a further 20 minutes are illustrated; the system takes approximately 10 minutes to recover and stabilize. This is illustrated by the transaction rates and response times for the different workloads. + +Note the following points: + +* During the test, no errors were reported. No data was lost, and all operations completed successfully. +* The transaction rates for all three types of operation (bulk insert, aggregate query, and filter query) dropped and the average response times increased while node 0 was offline. +* During the recovery period, the transaction rates and response times for the aggregate query and filter query operations were gradually restored. The performance for bulk insert recovered for a short while before diminishing. However, this is likely due to the volume of data causing the index used by the bulk insert to grow, and the transaction rates for this operation can be seen to slow down even before node 0 is taken offline. +* The CPU utilization graph for node 0 shows reduced activity during the recovery phase, this is due to the increased disk and network activity caused by the recovery mechanism, the node has to catch up with any data it has missed while it is offline and update the shards that it contains. +* The shards for the indexes are not distributed exactly equally across all nodes. There are two indexes containing 5 shards and 1 replica each, making a total of 20 shards. Two nodes will therefore contain 6 shards while the other two hold 7 each. This is evident in the CPU utilization graphs during the initial 20-minute period, node 0 is less busy than the other two. After recovery is complete, some switching seems to occur as node 2 appears to become the more lightly loaded node. + +## Node failure with catastrophic data loss: results + + +The results of this test are depicted in the file [ElasticsearchRecoveryScenario2.pdf](https://github.com/mspnp/azure-guidance/blob/master/figures/Elasticsearch/ElasticSearchRecoveryScenario2.pdf). As with the first test, the initial part of the graphs shows the system running normally for approximately 20 minutes, at which point node 0 is shut down for 5 minutes. During this interval, the Elasticsearch data on this node is removed, simulating catastrophic data loss, before being restarted. Full recovery appears to take 12-15 minutes before the levels of performance seen before the test are restored. + +Note the following points: + +* During the test, no errors were reported. No data was lost, and all operations completed successfully. +* The transaction rates for all three types of operation (bulk insert, aggregate query, and filter query) dropped and the average response times increased while node 0 was offline. At this point, the performance profile of the test is similar to the first scenario. This is not surprising as, to this point, the scenarios are the same. +* During the recovery period, the transaction rates and response times were restored, although during this time there was a lot more volatility in the figures. This is most probably due to the additional work that the nodes in the cluster are performing, providing the data to restore the missing shards. This additional work is evident in the CPU utilization, disk activity, and network activity graphs. +* The CPU utilization graph for nodes 0 and 1 shows reduced activity during the recovery phase, this is due to the increased disk and network activity caused by the recovery process. In the first scenario, only the node being recovered exhibited this behavior, but in this scenario it seems likely that most of the missing data for node 0 is being restored from node 1. +* The I/O activity for node 0 is actually reduced compared to the first scenario. This could be due to the I/O efficiencies of simply copying the data for an entire shard rather than the series of smaller I/O requests required to bring an existing shard up to date. +* The network activity for all three nodes indicate bursts of activity as data is transmitted and received between nodes. In scenario 1, only node 0 exhibited as much network activity, but this activity seemed to be sustained for a longer period. Again, this difference could be due to the efficiencies of transmitting the entire data for a shard as a single request rather than the series of smaller requests received when recovering a shard. + +## Node failure and restart with shard reallocation: results + + +The file [ElasticsearchRecoveryScenario3.pdf](https://github.com/mspnp/azure-guidance/blob/master/figures/Elasticsearch/ElasticSearchRecoveryScenario3.pdf) illustrates the results of this test. As with the first test, the initial part of the graphs show the system running normally for approximately 20 minutes, at which point node 0 is shut down for 5 minutes. At this point, the Elasticsearch cluster attempts to recreate the missing shards and rebalance the shards across the remaining nodes. After 5 minutes node 0 is brought back online, and once again the cluster has to rebalance the shards. Performance is restored after 12-15 minutes. + +Note the following points: + +* During the test, no errors were reported. No data was lost, and all operations completed successfully. +* The transaction rates for all three types of operation (bulk insert, aggregate query, and filter query) dropped and the average response times increased significantly while node 0 was offline compared to the previous two tests. This is due to the increased cluster activity recreating the missing shards and rebalancing the cluster as evidenced by the raised figures for disk and network activity for nodes 1 and 2 in this period. +* During the period after node 0 is brought back online, the transaction rates and response times remain volatile. +* The CPU utilization and disk activity graphs for node 0 shows very reduced initial action during the recovery phase. This is because at this point, node 0 is not serving any data. After a period of approximately 5 minutes, the node bursts into action > as shown by the sudden increase in network, disk, and CPU activity. This is most likely caused by the cluster redistributing shards across nodes. Node 0 then shows normal activity. + +## Rolling updates: results + + +The results of this test, in the file [ElasticsearchRecoveryScenario4.pdf](https://github.com/mspnp/azure-guidance/blob/master/figures/Elasticsearch/ElasticSearchRecoveryScenario4.pdf), show how each node is taken offline and then brought back up again in succession. Each node is shut down for 5 minutes before being restarted at which point the next node in sequence is stopped. + +Note the following points: + +* While each node is cycled, the performance in terms of throughput and response times remains reasonably even. +* Disk activity increases for each node for a short time as it is brought back online. This is most probably due to the recovery process rolling forward any changes that have occurred while the node was down. +* When a node is taken offline, spikes in network activity occur in the remaining nodes. Spikes also occur when a node is restarted. +* After the final node is recycled, the system enters a period of significant volatility. This is most likely caused by the recovery process having to synchronize changes across every node and ensure that all replicas and their corresponding shards are consistent. At one point, this effort causes successive bulk insert operations to timeout and fail. The errors reported each case were: + +``` +Failure -- BulkDataInsertTest17(org.apache.jmeter.protocol.java.sampler.JUnitSampler$AnnotatedTestCase): java.lang.AssertionError: failure in bulk execution: +[1]: index [systwo], type [logs], id [AVEg0JwjRKxX_sVoNrte], message [UnavailableShardsException[[systwo][2] Primary shard is not active or isn't assigned to a known node. Timeout: [1m], request: org.elasticsearch.action.bulk.BulkShardRequest@787cc3cd]] + +``` + +Subsequent experimentation showed that introducing a delay of a few minutes between cycling each node eliminated this error, so it was most likely caused by contention between the recovery process attempting to restore several nodes simultaneously and the bulk insert operations trying to store thousands of new documents. + +## Summary +The tests performed indicated that: + +* Elasticsearch was highly resilient to the most common modes of failure likely to occur in a cluster. +* Elasticsearch can recover quickly if a well-designed cluster is subject to catastrophic data loss on a node. This can happen if you configure Elasticsearch to save data to ephemeral storage and the node is subsequently reprovisioned after a restart. These results show that even in this case, the risks of using ephemeral storage are most likely outweighed by the performance benefits that this class of storage provides. +* In the first three scenarios, no errors occurred in concurrent bulk insert, aggregation, and filter query workloads while a node was taken offline and recovered. +* Only the last scenario indicated potential data loss, and this loss only affected new data being added. It is good practice in applications performing data ingestion to mitigate this likelihood by retrying insert operations that have failed as the type of error reported is highly likely to be transient. +* The results of the last test also show that if you are performing planned maintenance of the nodes in a cluster, performance will benefit if you allow several minutes between cycling one node and the next. In an unplanned situation (such as the datacenter recycling nodes after performing an operating system update), you have less control over how and when nodes are taken down and restarted. The contention that arises when Elasticsearch attempts to recover the state of the cluster after sequential node outages can result in timeouts and errors. + +[Manage the Availability of Virtual Machines]: /azure/articles/virtual-machines/virtual-machines-linux-manage-availability/ +[Running the Automated Elasticsearch Resiliency Tests]: automated-resilience-tests.md diff --git a/docs/elasticsearch/toc.md b/docs/elasticsearch/toc.md new file mode 100644 index 00000000000..7e6abdc1943 --- /dev/null +++ b/docs/elasticsearch/toc.md @@ -0,0 +1,10 @@ +# Elasticsearch on Azure +## [Get started](./index.md) +## [Tune data ingestion performance](./data-ingestion-performance.md) +## [Tune data aggregation and query performance](./data-aggregation-and-query-performance.md) +## [Configure resilience and recovery](./resilience-and-recovery.md) +## [Create a performance testing environment](./performance-testing-environment.md) +## [Implement a JMeter test plan](./jmeter-test-plan.md) +## [Deploy a JMeter JUnit sampler for testing](./jmeter-junit-sampler.md) +## [Run the resiliency tests](./automated-resilience-tests.md) +## [Run the performance tests](./automated-performance-tests.md) \ No newline at end of file diff --git a/docs/index.liquid.md b/docs/index.liquid.md new file mode 100644 index 00000000000..b47abdfb440 --- /dev/null +++ b/docs/index.liquid.md @@ -0,0 +1,27 @@ +--- +title: {{ title }} +description: {{ description }} +layout: LandingPage +--- + +
    +
    +

    {{ title }}

    +
    {{ frontmatter }}
    + +
    +
      + {%- for item in series %} +
    • + {% include 'two-column-card' %} +
    • + {%- endfor %} +
    +
    +
    \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000000..b13a7faec91 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,207 @@ +--- +title: Azure Architecture Center +description: Azure Guidance from AzureCAT and patterns & practices +layout: LandingPage +--- + +
    +
    +

    Azure Architecture Center

    +
    + +
    + +
    +
    \ No newline at end of file diff --git a/docs/index.yml b/docs/index.yml new file mode 100644 index 00000000000..f86b461b215 --- /dev/null +++ b/docs/index.yml @@ -0,0 +1,59 @@ +--- +title: Azure Architecture Center +description: Azure Guidance from AzureCAT and patterns & practices +fontmatter: The Azure Architecture Center is a collection of best practices, reference implementations, and scenario-based guidance to help you build highly scalable and resilient solutions on Microsoft Azure. These resources are designed for an engineering audience, including developers, solution architects, and IT pro. + +featured: + +- title: Architectural Blueprints + description: A set of recommended architectures for the cloud. Each architecture includes best practices, prescriptive steps, and deployable solution. + url: /azure/architecture/blueprints + thumbnail: azure-arch-1.svg + +- title: Cloud Design Patterns + description: Design patterns for developers and solution architects. Each pattern describes a problem, a pattern that addresses the problem, and an example based on Azure. + url: /azure/architecture/patterns/ + thumbnail: azure-arch-3.svg + +- title: Best Practices for Cloud Applications + description: Best practices for cloud applications, covering aspects such as auto-scaling, caching, data partitioning, API design, and others. + url: /azure/architecture/best-practices/ + thumbnail: azure-arch-4.svg + +series: + +- title: Design Review Checklists + description: Checklists to assist developers and solution architects during the design process. + url: /azure/architecture/checklist/ + thumbnail: checklist.svg + +- title: Designing for Resiliency + description: Learn how to design resiliency applications for Azure. + url: /azure/architecture/resiliency + thumbnail: resiliency.svg + +- title: Run Elasticsearch on Azure + description: Learn how to deploy, manage, and tune an Elasticsearch cluster on Azure. + url: /azure/architecture/elasticsearch + thumbnail: elasticsearch.svg + +- title: Azure for AWS Professionals + description: Leverage your AWS experiences in Microsoft Azure. + url: /azure/architecture/aws-professional + thumbnail: aws-professional.svg + +- title: Identity Management for Multitenant Applications + description: Understand the best practices for multitenancy, when using Azure AD for identity management. + url: /azure/architecture/multitenant-identity + thumbnail: multitenant-identity.svg + +- title: Azure Customer Advisory Team + description: The AzureCAT team's blog + url: https://blogs.msdn.microsoft.com/azurecat/ + thumbnail: azurecat.svg + +- title: SQL Server Customer Advisory Team + description: The SQLCAT team's blog + url: https://blogs.msdn.microsoft.com/sqlcat/ + thumbnail: sqlcat.svg +--- \ No newline at end of file diff --git a/docs/multitenant-identity/adfs.md b/docs/multitenant-identity/adfs.md new file mode 100644 index 00000000000..00de8fd1106 --- /dev/null +++ b/docs/multitenant-identity/adfs.md @@ -0,0 +1,201 @@ +--- +title: Federate with a customer's AD FS +description: How to federate with a customer's AD FS in a multitenant application +services: '' +documentationcenter: na +author: JohnPWSharp +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: be7470d8-b511-4757-93c9-2dee701a0bdc +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 06/02/2016 +ms.author: v-josha +pnp.series.prev: token-cache +pnp.series.next: client-assertion +--- +# Federate with a customer's AD FS + +This article describes how a multi-tenant SaaS application can support authentication via Active Directory Federation Services (AD FS), in order to federate with a customer's AD FS. + +## Overview +Azure Active Directory (Azure AD) makes it easy to sign in users from Azure AD tenants, including Office365 and Dynamics CRM Online customers. But what about customers who use on-premise Active Directory on a corporate intranet? + +One option is for these customers to sync their on-premise AD with Azure AD, using [Azure AD Connect]. However, some customers may be unable to use this approach, due to corporate IT policy or other reasons. In that case, another option is to federate through Active Directory Federation Services (AD FS). + +To enable this scenario: + +* The customer must have an Internet-facing AD FS farm. +* The SaaS provider deploys their own AD FS farm. +* The customer and the SaaS provider must set up [federation trust]. This is a manual process. + +There are three main roles in the trust relation: + +* The customer's AD FS is the [account partner], responsible for authenticating users from the customer's AD, and creating security tokens with user claims. +* The SaaS provider's AD FS is the [resource partner], which trusts the account partner and receives the user claims. +* The application is configured as a relying party (RP) in the SaaS provider's AD FS. + + ![Federation trust](./images/federation-trust.png) + +> [!NOTE] +> In this article, we assume the application uses OpenID connect as the authentication protocol. Another option is to use WS-Federation. +> +> For OpenID Connect, the SaaS provider must use AD FS 4.0 running in Windows Server 2016, which is currently in Technical Preview. AD FS 3.0 does not support OpenID Connect. +> +> ASP.NET Core 1.0 does not include out-of-the-box support for WS-Federation. +> +> + +For an example of using WS-Federation with ASP.NET 4, see the [active-directory-dotnet-webapp-wsfederation sample][active-directory-dotnet-webapp-wsfederation]. + +## Authentication flow +1. When the user clicks "sign in", the application redirects to an OpenID Connect endpoint on the SaaS provider's AD FS. +2. The user enters his or her organizational user name ("`alice@corp.contoso.com`"). AD FS uses home realm discovery to redirect to the customer's AD FS, where the user enters their credentials. +3. The customer's AD FS sends user claims to the SaaS provider's AD FS, using WF-Federation (or SAML). +4. Claims flow from AD FS to the app, using OpenID Connect. This requires a protocol transition from WS-Federation. + +## Limitations +At the time of this writing, the application receives a limited set of claims in the OpenID id_token, as listed in the following table. AD FS 4.0 is in still preview, so this set might change. It is not currently possible to define additional claims: + +| Claim | Description | +| --- | --- | +| aud |Audience. The application for which the the claims were issued. | +| authenticationinstant |[Authentication instant]. The time at which authentication occurred. | +| c_hash |Code hash value. This is a hash of the token contents. | +| exp |[Expiration time]. The time after which the token will no longer be accepted. | +| iat |Issued at. The time when the token was issued. | +| iss |Issuer. The value of this claim is always the resource partner's AD FS. | +| name |User name. Example: `john@corp.fabrikam.com`. | +| nameidentifier |[Name identifier]. The identifier for the name of the entity for which the token was issued. | +| nonce |Session nonce. A unique value generated by AD FS to help prevent replay attacks. | +| upn |User principal name (UPN). Example: john@corp.fabrikam.com | +| pwd_exp |Password expiration period. The number of seconds until the user's password or a similar authentication secret, such as a PIN. expires. | + +> [!NOTE] +> The "iss" claim contains the AD FS of the partner (typically, this claim will identify the SaaS provider as the issuer). It does not identify the customer's AD FS. You can find the customer's domain as part of the UPN. +> +> + +The rest of this article describes how to set up the trust relationship between the RP (the app) and the account partner (the customer). + +## AD FS deployment +The SaaS provider can deploy AD FS either on-premise or on Azure VMs. For security and availability, the following guidelines are important: + +* Deploy at least two AD FS servers and two AD FS proxy servers to achieve the best availability of the AD FS service. +* Domain controllers and AD FS servers should never be exposed directly to the Internet and should be in a virtual network with direct access to them. +* Web application proxies (previously AD FS proxies) must be used to publish AD FS servers to the Internet. + +To set up a similar topology in Azure requires the use of Virtual networks, NSG’s, azure VM’s and availability sets. For more details, see [Guidelines for Deploying Windows Server Active Directory on Azure Virtual Machines][active-directory-on-azure]. + +## Configure OpenID Connect authentication with AD FS +The SaaS provider must enable OpenID Connect between the application and AD FS. To do so, add an application group in AD FS. You can find detailed instructions in this [blog post], under " Setting up a Web App for OpenId Connect sign in AD FS." + +Next, configure the OpenID Connect middleware. The metadata endpoint is `https://domain/adfs/.well-known/openid-configuration`, where domain is the SaaS provider's AD FS domain. + +Typically you might combine this with other OpenID Connect endpoints (such as AAD). You'll need two different sign-in buttons or some other way to distinguish them, so that the user is sent to the correct authentication endpoint. + +## Configure the AD FS Resource Partner +The SaaS provider must do the following for each customer that wants to connect via ADFS: + +1. Add a claims provider trust. +2. Add claims rules. +3. Enable home-realm discovery. + +Here are the steps in more detail. + +### Add the claims provider trust +1. In Server Manager, click **Tools**, and then select **AD FS Management**. +2. In the console tree, under **AD FS**, right click **Claims Provider Trusts**. Select **Add Claims Provider Trust**. +3. Click **Start** to start the wizard. +4. Select the option "Import data about the claims provider published online or on a local network". Enter the URI of the customer's federation metadata endpoint. (Example: `https://contoso.com/FederationMetadata/2007-06/FederationMetadata.xml`.) You will need to get this from the customer. +5. Complete the wizard using the default options. + +### Edit claims rules +1. Right-click the newly added claims provider trust, and select **Edit Claims Rules**. +2. Click **Add Rule**. +3. Select "Pass Through or Filter an Incoming Claim" and click **Next**. + ![Add Transform Claim Rule Wizard](./images/edit-claims-rule.png) +4. Enter a name for the rule. +5. Under "Incoming claim type", select **UPN**. +6. Select "Pass through all claim values". + ![Add Transform Claim Rule Wizard](./images/edit-claims-rule2.png) +7. Click **Finish**. +8. Repeat steps 2 - 7, and specify **Anchor Claim Type** for the incoming claim type. +9. Click **OK** to complete the wizard. + +### Enable home-realm discovery +Run the following PowerShell script: + +``` +Set-ADFSClaimsProviderTrust -TargetName "name" -OrganizationalAccountSuffix @("suffix") +``` + +where "name" is the friendly name of the claims provider trust, and "suffix" is the UPN suffix for the customer's AD (example, "corp.fabrikam.com"). + +With this configuration, end users can type in their organizational account, and AD FS automatically selects the corresponding claims provider. See [Customizing the AD FS Sign-in Pages], under the section "Configure Identity Provider to use certain email suffixes". + +## Configure the AD FS Account Partner +The customer must do the following: + +1. Add a relying party (RP) trust. +2. Adds claims rules. + +### Add the RP trust +1. In Server Manager, click **Tools**, and then select **AD FS Management**. +2. In the console tree, under **AD FS**, right click **Relying Party Trusts**. Select **Add Relying Party Trust**. +3. Select **Claims Aware** and click **Start**. +4. On the **Select Data Source** page, select the option "Import data about the claims provider published online or on a local network". Enter the URI of the SaaS provider's federation metadata endpoint. + ![Add Relying Party Trust Wizard](./images/add-rp-trust.png) +5. On the **Specify Display Name** page, enter any name. +6. On the **Choose Access Control Policy** page, choose a policy. You could permit everyone in the organization, or choose a specific security group. + ![Add Relying Party Trust Wizard](./images/add-rp-trust2.png) +7. Enter any parameters required in the **Policy** box. +8. Click **Next** to complete the wizard. + +### Add claims rules +1. Right-click the newly added relying party trust, and select **Edit Claim Issuance Policy**. +2. Click **Add Rule**. +3. Select "Send LDAP Attributes as Claims" and click **Next**. +4. Enter a name for the rule, such as "UPN". +5. Under **Attribute store**, select **Active Directory**. + ![Add Transform Claim Rule Wizard](./images/add-claims-rules.png) +6. In the **Mapping of LDAP attributes** section: + * Under **LDAP Attribute**, select **User-Principal-Name**. + * Under **Outgoing Claim Type**, select **UPN**. + ![Add Transform Claim Rule Wizard](./images/add-claims-rules2.png) +7. Click **Finish**. +8. Click **Add Rule** again. +9. Select "Send Claims Using a Custom Rule" and click **Next**. +10. Enter a name for the rule, such as "Anchor Claim Type". +11. Under **Custom rule**, enter the following: + + ``` + EXISTS([Type == "http://schemas.microsoft.com/ws/2014/01/identity/claims/anchorclaimtype"])=> + issue (Type = "http://schemas.microsoft.com/ws/2014/01/identity/claims/anchorclaimtype", + Value = "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/upn"); + ``` + + This rule issues a claim of type `anchorclaimtype`. The claim tells the relying party to use UPN as the user's immutable ID. +12. Click **Finish**. +13. Click **OK** to complete the wizard. + + + +[Azure AD Connect]: /azure/active-directory/active-directory-aadconnect/ +[federation trust]: https://technet.microsoft.com/library/cc770993(v=ws.11).aspx +[account partner]: https://technet.microsoft.com/library/cc731141(v=ws.11).aspx +[resource partner]: https://technet.microsoft.com/library/cc731141(v=ws.11).aspx +[Authentication instant]: https://msdn.microsoft.com/library/system.security.claims.claimtypes.authenticationinstant%28v=vs.110%29.aspx +[Expiration time]: http://tools.ietf.org/html/draft-ietf-oauth-json-web-token-25#section-4.1. +[Name identifier]: https://msdn.microsoft.com/library/system.security.claims.claimtypes.nameidentifier(v=vs.110).aspx +[active-directory-on-azure]: https://msdn.microsoft.com/library/azure/jj156090.aspx +[blog post]: http://www.cloudidentity.com/blog/2015/08/21/OPENID-CONNECT-WEB-SIGN-ON-WITH-ADFS-IN-WINDOWS-SERVER-2016-TP3/ +[Customizing the AD FS Sign-in Pages]: https://technet.microsoft.com/library/dn280950.aspx +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps +[client assertion]: client-assertion.md +[active-directory-dotnet-webapp-wsfederation]: https://github.com/Azure-Samples/active-directory-dotnet-webapp-wsfederation diff --git a/docs/multitenant-identity/app-roles.md b/docs/multitenant-identity/app-roles.md new file mode 100644 index 00000000000..ba53b5bb61f --- /dev/null +++ b/docs/multitenant-identity/app-roles.md @@ -0,0 +1,176 @@ +--- +title: Application roles +description: How to perform authorization using application roles +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: fabacd3a-3549-4d60-8809-8ee5a5ed562c +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 02/16/2016 +ms.author: mwasson +pnp.series.prev: signup +pnp.series.next: authorize +--- +# Application roles + +[![GitHub](../_images/github.png) Sample code][sample application] + +Application roles are used to assign permissions to users. For example, the [Tailspin Surveys][Tailspin] application defines the following roles: + +* Administrator. Can perform all CRUD operations on any survey that belongs to that tenant. +* Creator. Can create new surveys. +* Reader. Can read any surveys that belong to that tenant. + +You can see that roles ultimately get translated into permissions, during [authorization]. But the first question is how to assign and manage roles. We identified three main options: + +* [Azure AD App Roles](#roles-using-azure-ad-app-roles) +* [Azure AD security groups](#roles-using-azure-ad-security-groups) +* [Application role manager](#roles-using-an-application-role-manager). + +## Roles using Azure AD App Roles +This is the approach that we used in the Tailspin Surveys app. + +In this approach, The SaaS provider defines the application roles by adding them to the application manifest. After a customer signs up, an admin for the customer's AD directory assigns users to the roles. When a user signs in, the user's assigned roles are sent as claims. + +> [!NOTE] +> If the customer has Azure AD Premium, the admin can assign a security group to a role, and members of the group will inherit the app role. This is a convenient way to manage roles, because the group owner doesn't need to be an AD admin. +> +> + +Advantages of this approach: + +* Simple programming model. +* Roles are specific to the application. The role claims for one application are not sent to another application. +* If the customer removes the application from their AD tenant, the roles go away. +* The application doesn't need any extra Active Directory permissions, other than reading the user's profile. + +Drawbacks: + +* Customers without Azure AD Premium cannot assign security groups to roles. For these customers, all user assignments must be done by an AD administrator. +* If you have a backend web API, which is separate from the web app, then role assignments for the web app don't apply to the web API. For more discussion of this point, see [Securing a backend web API]. + +### Implementation +**Define the roles.** The SaaS provider declares the app roles in the [application manifest]. For example, here is the manifest entry for the Surveys app: + +``` +"appRoles": [ + { + "allowedMemberTypes": [ + "User" + ], + "description": "Creators can create Surveys", + "displayName": "SurveyCreator", + "id": "1b4f816e-5eaf-48b9-8613-7923830595ad", + "isEnabled": true, + "value": "SurveyCreator" + }, + { + "allowedMemberTypes": [ + "User" + ], + "description": "Administrators can manage the Surveys in their tenant", + "displayName": "SurveyAdmin", + "id": "c20e145e-5459-4a6c-a074-b942bbd4cfe1", + "isEnabled": true, + "value": "SurveyAdmin" + } +], +``` + +The `value` property appears in the role claim. The `id` property is the unique identifier for the defined role. Always generate a new GUID value for `id`. + +**Assign users**. When a new customer signs up, the application is registered in the customer's AD tenant. At this point, an AD admin for that tenant can assign users to roles. + +> [!NOTE] +> As noted earlier, customers with Azure AD Premium can also assign security groups to roles. +> +> + +The following screenshot from the Azure portal shows three users. Alice was assigned directly to a role. Bob inherited a role as a member of a security group named "Surveys Admin", which is assigned to a role. Charles is not assigned to any role. + +![Assigned users](./images/role-assignments.png) + +> [!NOTE] +> Alternatively, the application can assign roles programmatically, using the Azure AD Graph API. However, this requires the application to obtain write permissions for the customer's AD directory. An application with those permissions could do a lot of mischief — the customer is trusting the app not to mess up their directory. Many customers might be unwilling to grant this level of access. +> +> + +**Get role claims**. When a user signs in, the application receives the user's assigned role(s) in a claim with type `http://schemas.microsoft.com/ws/2008/06/identity/claims/role`. + +A user can have multiple roles, or no role. In your authorization code, don't assume the user has exactly one role claim. Instead, write code that checks whether a particular claim value is present: + +```csharp +if (context.User.HasClaim(ClaimTypes.Role, "Admin")) { ... } +``` + +## Roles using Azure AD security groups +In this approach, roles are represented as AD security groups. The application assigns permissions to users based on their security group memberships. + +Advantages: + +* For customers who do not have Azure AD Premium, this approach enables the customer to use security groups to manage role assignments. + +Disadvantages: + +* Complexity. Because every tenant sends different group claims, the app must keep track of which security groups correspond to which application roles, for each tenant. +* If the customer removes the application from their AD tenant, the security groups are left in their AD directory. + +### Implementation +In the application manifest, set the `groupMembershipClaims` property to "SecurityGroup". This is needed to get group membership claims from AAD. + +``` +{ + // ... + "groupMembershipClaims": "SecurityGroup", +} +``` + +When a new customer signs up, the application instructs the customer to create security groups for the roles needed by the application. The customer then needs to enter the group object IDs into the application. The application stores these in a table that maps group IDs to application roles, per tenant. + +> [!NOTE] +> Alternatively, the application could create the groups programmatically, using the Azure AD Graph API. This would be less error prone. However, it requires the application to obtain "read and write all groups" permissions for the customer's AD directory. Many customers might be unwilling to grant this level of access. +> +> + +When a user signs in: + +1. The application receives the user's groups as claims. The value of each claim is the object ID of a group. +2. Azure AD limits the number of groups sent in the token. If the number of groups exceeds this limit, Azure AD sends a special "overage" claim. If that claim is present, the application must query the Azure AD Graph API to get all of the groups to which that user belongs. For details, see [Authorization in Cloud Applications using AD Groups], under the section titled "Groups claim overage". +3. The application looks up the object IDs in its own database, to find the corresponding application roles to assign to the user. +4. The application adds a custom claim value to the user principal that expresses the application role. For example: `survey_role` = "SurveyAdmin". + +Authorization policies should use the custom role claim, not the group claim. + +## Roles using an application role manager +With this approach, application roles are not stored in Azure AD at all. Instead, the application stores the role assignments for each user in its own DB — for example, using the **RoleManager** class in ASP.NET Identity. + +Advantages: + +* The app has full control over the roles and user assignments. + +Drawbacks: + +* More complex, harder to maintain. +* Cannot use AD security groups to manage role assignments. +* Stores user information in the application database, where it can get out of sync with the tenant's AD directory, as users are added or removed. + +There are many existing examples for this approach. For example, see [Create an ASP.NET MVC app with auth and SQL DB and deploy to Azure App Service]. + +[**Next**][authorization] + + +[Tailspin]: tailspin.md + +[authorization]: authorize.md +[Securing a backend web API]: web-api.md +[Create an ASP.NET MVC app with auth and SQL DB and deploy to Azure App Service]: /azure/app-service-web/web-sites-dotnet-deploy-aspnet-mvc-app-membership-oauth-sql-database/ +[application manifest]: /azure/active-directory/active-directory-application-manifest/ +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps diff --git a/docs/multitenant-identity/authenticate.md b/docs/multitenant-identity/authenticate.md new file mode 100644 index 00000000000..12127ba5605 --- /dev/null +++ b/docs/multitenant-identity/authenticate.md @@ -0,0 +1,252 @@ +--- +title: Authentication in multitenant applications +description: How a multitenant application can authenticate users from Azure AD +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: f5805ca8-d399-45fb-9f54-ca0766bbb31b +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 05/23/2016 +ms.author: mwasson +pnp.series.prev: tailspin +pnp.series.next: claims +--- +# Authenticate using Azure AD and OpenID Connect + +[![GitHub](../_images/github.png) Sample code][sample application] + +The Surveys application uses the OpenID Connect (OIDC) protocol to authenticate users with Azure Active Directory (Azure AD). The Surveys application is built with ASP.NET Core 1.0, which has built-in middleware for OIDC. The following diagram shows what happens when the user signs in, at a high level. + +![Authentication flow](./images/auth-flow.png) + +1. The user clicks the "sign in" button in the app. This action is handled by an MVC controller. +2. The MVC controller returns a **ChallengeResult** action. +3. The middleware intercepts the **ChallengeResult** and creates a 302 response, which redirects the user to the Azure AD sign-in page. +4. The user authenticates with Azure AD. +5. Azure AD sends an ID token to the application. +6. The middleware validates the ID token. At this point, the user is now authenticated inside the application. +7. The middleware redirects the user back to application. + +## Register the app with Azure AD +To enable OpenID Connect, the SaaS provider registers the application inside their own Azure AD tenant. + +To register the application, follow the steps in [Integrating Applications with Azure Active Directory](/azure/active-directory/active-directory-integrating-applications/), in the section [Adding an Application](/azure/active-directory/active-directory-integrating-applications/#adding-an-application). + +In the **Configure** page: + +* Note the client ID. +* Under **Application is Multi-Tenant**, select **Yes**. +* Set **Reply URL** to a URL where Azure AD will send the authentication response. You can use the base URL of your app. + * Note: The URL path can be anything, as long as the host name matches your deployed app. + * You can set multiple reply URLs. During development, you can use a `localhost` address, for running the app locally. +* Generate a client secret: Under **keys**, click on the drop down that says **Select duration** and pick either 1 or 2 years. The key will be visible when you click **Save**. Be sure to copy the value, because it's not shown again when you reload the configuration page. + +## Configure the auth middleware +This section describes how to configure the authentication middleware in ASP.NET Core 1.0 for multitenant authentication with OpenID Connect. + +In your startup class, add the OpenID Connect middleware: + +```csharp +app.UseOpenIdConnectAuthentication(options => +{ + options.AutomaticAuthenticate = true; + options.AutomaticChallenge = true; + options.ClientId = [client ID]; + options.Authority = "https://login.microsoftonline.com/common/"; + options.CallbackPath = [callback path]; + options.PostLogoutRedirectUri = [application URI]; + options.SignInScheme = CookieAuthenticationDefaults.AuthenticationScheme; + options.TokenValidationParameters = new TokenValidationParameters + { + ValidateIssuer = false + }; + options.Events = [event callbacks]; +}); +``` + +> [!NOTE] +> See [Startup.cs](https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Web/Startup.cs). +> +> + +For more information about the startup class, see [Application Startup](https://docs.asp.net/en/latest/fundamentals/startup.html) in the ASP.NET Core 1.0 documentation. + +Set the following middleware options: + +* **ClientId**. The application's client ID, which you got when you registered the application in Azure AD. +* **Authority**. For a multitenant application, set this to `https://login.microsoftonline.com/common/`. This is the URL for the Azure AD common endpoint, which enables users from any Azure AD tenant to sign in. For more information about the common endpoint, see [this blog post](http://www.cloudidentity.com/blog/2014/08/26/the-common-endpoint-walks-like-a-tenant-talks-like-a-tenant-but-is-not-a-tenant/). +* In **TokenValidationParameters**, set **ValidateIssuer** to false. That means the app will be responsible for validating the issuer value in the ID token. (The middleware still validates the token itself.) For more information about validating the issuer, see [Issuer validation](claims.md#issuer-validation). +* **CallbackPath**. Set this equal to the path in the Reply URL that you registered in Azure AD. For example, if the reply URL is `http://contoso.com/aadsignin`, **CallbackPath** should be `aadsignin`. If you don't set this option, the default value is `signin-oidc`. +* **PostLogoutRedirectUri**. Specify a URL to redirect users after the sign out. This should be a page that allows anonymous requests — typically the home page. +* **SignInScheme**. Set this to `CookieAuthenticationDefaults.AuthenticationScheme`. This setting means that after the user is authenticated, the user claims are stored locally in a cookie. This cookie is how the user stays logged in during the browser session. +* **Events.** Event callbacks; see [Authentication events](#authentication-events). + +Also add the Cookie Authentication middleware to the pipeline. This middleware is responsible for writing the user claims to a cookie, and then reading the cookie during subsequent page loads. + +```csharp +app.UseCookieAuthentication(options => +{ + options.AutomaticAuthenticate = true; + options.AutomaticChallenge = true; + options.AccessDeniedPath = "/Home/Forbidden"; +}); +``` + +## Initiate the authentication flow +To start the authentication flow in ASP.NET MVC, return a **ChallengeResult** from the contoller: + +```csharp +[AllowAnonymous] +public IActionResult SignIn() +{ + return new ChallengeResult( + OpenIdConnectDefaults.AuthenticationScheme, + new AuthenticationProperties + { + IsPersistent = true, + RedirectUri = Url.Action("SignInCallback", "Account") + }); +} +``` + +This causes the middleware to return a 302 (Found) response that redirects to the authentication endpoint. + +## User login sessions +As mentioned, when the user first signs in, the Cookie Authentication middleware writes the user claims to a cookie. After that, HTTP requests are authenticated by reading the cookie. + +By default, the cookie middleware writes a [session cookie][session-cookie], which gets deleted once the user closes the browser. The next time the user next visits the site, they will have to sign in again. However, if you set **IsPersistent** to true in the **ChallengeResult**, the middleware writes a persistent cookie, so the user stays logged in after closing the browser. You can configure the cookie expiration; see [Controlling cookie options][cookie-options]. Persistent cookies are more convenient for the user, but may be inappropriate for some applications (say, a banking application) where you want the user to sign in every time. + +## About the OpenID Connect middleware +The OpenID Connect middleware in ASP.NET hides most of the protocol details. This section contains some notes about the implementation, that may be useful for understanding the protocol flow. + +First, let's examine the authentication flow in terms of ASP.NET (ignoring the details of the OIDC protocol flow between the app and Azure AD). The following diagram shows the process. + +![Sign-in flow](./images/sign-in-flow.png) + +In this diagram, there are two MVC controllers. The Account controller handles sign-in requests, and the Home controller serves up the home page. + +Here is the authentication process: + +1. The user clicks the "Sign in" button, and the browser sends a GET request. For example: `GET /Account/SignIn/`. +2. The account controller returns a `ChallengeResult`. +3. The OIDC middleware returns an HTTP 302 response, redirecting to Azure AD. +4. The browser sends the authentication request to Azure AD +5. The user signs in to Azure AD, and Azure AD sends back an authentication response. +6. The OIDC middleware creates a claims principal and passes it to the Cookie Authentication middleware. +7. The cookie middleware serializes the claims principal and sets a cookie. +8. The OIDC middleware redirects to the application's callback URL. +9. The browser follows the redirect, sending the cookie in the request. +10. The cookie middleware deserializes the cookie to a claims principal and sets `HttpContext.User` equal to the claims principal. The request is routed to an MVC controller. + +### Authentication ticket +If authentication succeeds, the OIDC middleware creates an authentication ticket, which contains a claims principal that holds the user's claims. You can access the ticket inside the **AuthenticationValidated** or **TicketReceived** event. + +> [!NOTE] +> Until the entire authentication flow is completed, `HttpContext.User` still holds an anonymous principal, *not* the authenticated user. The anonymous principal has an empty claims collection. After authentication completes and the app redirects, the cookie middleware deserializes the authentication cookie and sets `HttpContext.User` to a claims principal that represents the authenticated user. +> +> + +### Authentication events +During the authentication process, the OpenID Connect middleware raises a series of events: + +* **RedirectToAuthenticationEndpoint**. Called right before the middleware redirects to the authentication endpoint. You can use this event to modify the redirect URL; for example, to add request parameters. See [Adding the admin consent prompt](signup.md#adding-the-admin-consent-prompt) for an example. +* **AuthorizationResponseReceived**. Called after the middleware receives the authentication response from the identity provider (IDP), but before the middleware validates the response. +* **AuthorizationCodeReceived**. Called with the authorization code. +* **TokenResponseReceived**. Called after the middleware gets an access token from the IDP. Applies only to authorization code flow. +* **AuthenticationValidated**. Called after the middleware validates the ID token. At this point, the application has a set of validated claims about the user. You can use this event to perform additional validation on the claims, or to transform claims. See [Working with claims](claims.md). +* **UserInformationReceived**. Called if the middleware gets the user profile from the user info endpoint. Applies only to authorization code flow, and only when `GetClaimsFromUserInfoEndpoint = true` in the middleware options. +* **TicketReceived**. Called when authentication is completed. This is the last event, assuming that authentication succeeds. After this event is handled, the user is signed into the app. +* **AuthenticationFailed**. Called if authentication fails. Use this event to handle authentication failures — for example, by redirecting to an error page. + +To provide callbacks for these events, set the **Events** option on the middleware. There are two different ways to declare the event handlers: Inline with lambdas, or in a class that derives from **OpenIdConnectEvents**. + +Inline with lambdas: + +```csharp +app.UseOpenIdConnectAuthentication(options => +{ + // Other options not shown. + + options.Events = new OpenIdConnectEvents + { + OnTicketReceived = (context) => + { + // Handle event + return Task.FromResult(0); + }, + // other events + } +}); +``` + +Deriving from **OpenIdConnectEvents**: + +```csharp +public class SurveyAuthenticationEvents : OpenIdConnectEvents +{ + public override Task TicketReceived(TicketReceivedContext context) + { + // Handle event + return base.TicketReceived(context); + } + // other events +} + +// In Startup.cs: +app.UseOpenIdConnectAuthentication(options => +{ + // Other options not shown. + + options.Events = new SurveyAuthenticationEvents(); +}); +``` + +The second approach is recommended if your event callbacks have any substantial logic, so they don't clutter your startup class. Our reference implementation uses this approach; see [SurveyAuthenticationEvents.cs](https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Web/Security/SurveyAuthenticationEvents.cs). + +### OpenID connect endpoints +Azure AD supports [OpenID Connect Discovery](https://openid.net/specs/openid-connect-discovery-1_0.html), wherein the identity provider (IDP) returns a JSON metadata document from a [well-known endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfig). The metadata document contains information such as: + +* The URL of the authorization endpoint. This is where the app redirects to authenticate the user. +* The URL of the "end session" endpoint, where the app goes to log out the user. +* The URL to get the signing keys, which the client uses to validate the OIDC tokens that it gets from the IDP. + +By default, the OIDC middleware knows how to fetch this metadata. Set the **Authority** option in the middleware, and the middleware constructs the URL for the metadata. (You can override the metadata URL by setting the **MetadataAddress** option.) + +### OpenID connect flows +By default, the OIDC middleware uses hybrid flow with form post response mode. + +* *Hybrid flow* means the client can get an ID token and an authorization code in the same round-trip to the authorization server. +* *Form post reponse mode* means the authorization server uses an HTTP POST request to send the ID token and authorization code to the app. The values are form-urlencoded (content type = "application/x-www-form-urlencoded"). + +When the OIDC middleware redirects to the authorization endpoint, the redirect URL includes all of the query string parameters needed by OIDC. For hybrid flow: + +* client_id. This value is set in the **ClientId** option +* scope = "openid profile", which means it's an OIDC request and we want the user's profile. +* response_type = "code id_token". This specifies hybrid flow. +* response_mode = "form_post". This specifies form post response. + +To specify a different flow, set the **ResponseType** property on the options. For example: + +```csharp +app.UseOpenIdConnectAuthentication(options => +{ + options.ResponseType = "code"; // Authorization code flow + + // Other options +} +``` + +[**Next**][claims] + +[claims]: claims.md +[cookie-options]: https://docs.asp.net/en/latest/security/authentication/cookie.html#controlling-cookie-options +[session-cookie]: https://en.wikipedia.org/wiki/HTTP_cookie#Session_cookie +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps diff --git a/docs/multitenant-identity/authorize.md b/docs/multitenant-identity/authorize.md new file mode 100644 index 00000000000..d714d210efb --- /dev/null +++ b/docs/multitenant-identity/authorize.md @@ -0,0 +1,274 @@ +--- +title: Authorization in multitenant applications +description: How to perform authorization in a multitenant application +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 84e4a9eb-c599-42b0-908d-cd88b69b5138 +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 06/02/2016 +ms.author: mwasson +pnp.series.prev: app-roles +pnp.series.next: web-api +--- +# Role-based and resource-based authorization + +[![GitHub](../_images/github.png) Sample code][sample application] + +Our [reference implementation] is an ASP.NET Core 1.0 application. In this article we'll look at two general approaches to authorization, using the authorization APIs provided in ASP.NET Core 1.0. + +* **Role-based authorization**. Authorizing an action based on the roles assigned to a user. For example, some actions require an administrator role. +* **Resource-based authorization**. Authorizing an action based on a particular resource. For example, every resource has an owner. The owner can delete the resource; other users cannot. + +A typical app will employ a mix of both. For example, to delete a resource, the user must be the resource owner *or* an admin. + +## Role-Based Authorization +The [Tailspin Surveys][Tailspin] application defines the following roles: + +* Administrator. Can perform all CRUD operations on any survey that belongs to that tenant. +* Creator. Can create new surveys +* Reader. Can read any surveys that belong to that tenant + +Roles apply to *users* of the application. In the Surveys application, a user is either an administrator, creator, or reader. + +For a discussion of how to define and manage roles, see [Application roles]. + +Regardless of how you manage the roles, your authorization code will look similar. ASP.NET Core 1.0 introduces an abstraction called [authorization policies][policies]. With this feature, you define authorization policies in code, and then apply those policies to controller actions. The policy is decoupled from the controller. + +### Create policies +To define a policy, first create a class that implements `IAuthorizationRequirement`. It's easiest to derive from `AuthorizationHandler`. In the `Handle` method, examine the relevant claim(s). + +Here is an example from the Tailspin Surveys application: + +```csharp +public class SurveyCreatorRequirement : AuthorizationHandler, IAuthorizationRequirement +{ + protected override void Handle(AuthorizationContext context, SurveyCreatorRequirement requirement) + { + if (context.User.HasClaim(ClaimTypes.Role, Roles.SurveyAdmin) || + context.User.HasClaim(ClaimTypes.Role, Roles.SurveyCreator)) + { + context.Succeed(requirement); + } + } +} +``` + +> [!NOTE] +> See [SurveyCreatorRequirement.cs] +> +> + +This class defines the requirement for a user to create a new survey. The user must be in the SurveyAdmin or SurveyCreator role. + +In your startup class, define a named policy that includes one or more requirements. If there are multiple requirements, the user must meet *every* requirement to be authorized. The following code defines two policies: + +```csharp +services.AddAuthorization(options => +{ + options.AddPolicy(PolicyNames.RequireSurveyCreator, + policy => + { + policy.AddRequirements(new SurveyCreatorRequirement()); + policy.AddAuthenticationSchemes(CookieAuthenticationDefaults.AuthenticationScheme); + }); + + options.AddPolicy(PolicyNames.RequireSurveyAdmin, + policy => + { + policy.AddRequirements(new SurveyAdminRequirement()); + policy.AddAuthenticationSchemes(CookieAuthenticationDefaults.AuthenticationScheme); + }); +}); +``` + +> [!NOTE] +> See [Startup.cs] +> +> + +This code also sets the authentication scheme, which tells ASP.NET which authentication middleware should run if authorization fails. In this case, we specify the cookie authentication middleware, because the cookie authentication middleware can redirect the user to a "Forbidden" page. The location of the Forbidden page is set in the AccessDeniedPath option for the cookie middleware; see [Configuring the authentication middleware]. + +### Authorize controller actions +Finally, to authorize an action in an MVC controller, set the policy in the `Authorize` attribute: + +```csharp +[Authorize(Policy = "SurveyCreatorRequirement")] +public IActionResult Create() +{ + // ... +} +``` + +In earlier versions of ASP.NET, you would set the **Roles** property on the attribute: + +```csharp +// old way +[Authorize(Roles = "SurveyCreator")] + +``` + +This is still supported in ASP.NET Core 1.0, but it has some drawbacks compared with authorization policies: + +* It assumes a particular claim type. Policies can check for any claim type. Roles are just a type of claim. +* The role name is hard-coded into the attribute. With policies, the authorization logic is all in one place, making it easier to update or even load from configuration settings. +* Policies enable more complex authorization decisions (e.g., age >= 21) that can't be expressed by simple role membership. + +## Resource based authorization +*Resource based authorization* occurs whenever the authorization depends on a specific resource that will be affected by an operation. In the Tailspin Surveys application, every survey has an owner and zero-to-many contributors. + +* The owner can read, update, delete, publish, and unpublish the survey. +* The owner can assign contributors to the survey. +* Contributors can read and update the survey. + +Note that "owner" and "contributor" are not application roles; they are stored per survey, in the application database. To check whether a user can delete a survey, for example, the app checks whether the user is the owner for that survey. + +In ASP.NET Core 1.0, implement resource-based authorization by deriving from **AuthorizationHandler** and overriding the **Handle** method. + +```csharp +public class SurveyAuthorizationHandler : AuthorizationHandler +{ + protected override void Handle(AuthorizationContext context, OperationAuthorizationRequirement operation, Survey resource) + { + } +} +``` + +Notice that this class is strongly typed for Survey objects. Register the class for DI on startup: + +```csharp +services.AddSingleton(factory => +{ + return new SurveyAuthorizationHandler(); +}); +``` + +To perform authorization checks, use the **IAuthorizationService** interface, which you can inject into your controllers. The following code checks whether a user can read a survey: + +```csharp +if (await _authorizationService.AuthorizeAsync(User, survey, Operations.Read) == false) +{ + return new HttpStatusCodeResult(403); +} +``` + +Because we pass in a `Survey` object, this call will invoke the `SurveyAuthorizationHandler`. + +In your authorization code, a good approach is to aggregate all of the user's role-based and resource-based permissions, then check the aggregate set against the desired operation. +Here is an example from the Surveys app. The application defines several permission types: + +* Admin +* Contributor +* Creator +* Owner +* Reader + +The application also defines a set of possible operations on surveys: + +* Create +* Read +* Update +* Delete +* Publish +* Unpublsh + +The following code creates a list of permissions for a particular user and survey. Notice that this code looks at both the user's app roles, and the owner/contributor fields in the survey. + +```csharp +protected override void Handle(AuthorizationContext context, OperationAuthorizationRequirement operation, Survey resource) +{ + var permissions = new List(); + string userTenantId = context.User.GetTenantIdValue(); + int userId = ClaimsPrincipalExtensions.GetUserKey(context.User); + string user = context.User.GetUserName(); + + if (resource.TenantId == userTenantId) + { + // Admin can do anything, as long as the resource belongs to the admin's tenant. + if (context.User.HasClaim(ClaimTypes.Role, Roles.SurveyAdmin)) + { + context.Succeed(operation); + return; + } + + if (context.User.HasClaim(ClaimTypes.Role, Roles.SurveyCreator)) + { + permissions.Add(UserPermissionType.Creator); + } + else + { + permissions.Add(UserPermissionType.Reader); + } + + if (resource.OwnerId == userId) + { + permissions.Add(UserPermissionType.Owner); + } + } + if (resource.Contributors != null && resource.Contributors.Any(x => x.UserId == userId)) + { + permissions.Add(UserPermissionType.Contributor); + } + if (ValidateUserPermissions[operation](permissions)) + { + context.Succeed(operation); + } +} +``` + +> [!NOTE] +> See [SurveyAuthorizationHandler.cs]. +> +> + +In a multi-tenant application, you must ensure that permissions don't "leak" to another tenant's data. In the Surveys app, the Contributor permission is allowed across tenants — you can assign someone from another tenant as a contriubutor. The other permission types are restricted to resources that belong to that user's tenant. To enforce this requirement, the code checks the tenant ID before granting the permission. (The `TenantId` field as assigned when the survey is created.) + +The next step is to check the operation (read, update, delete, etc) against the permissions. The Surveys app implements this step by using a lookup table of functions: + +```csharp +static readonly Dictionary, bool>> ValidateUserPermissions + = new Dictionary, bool>> + + { + { Operations.Create, x => x.Contains(UserPermissionType.Creator) }, + + { Operations.Read, x => x.Contains(UserPermissionType.Creator) || + x.Contains(UserPermissionType.Reader) || + x.Contains(UserPermissionType.Contributor) || + x.Contains(UserPermissionType.Owner) }, + + { Operations.Update, x => x.Contains(UserPermissionType.Contributor) || + x.Contains(UserPermissionType.Owner) }, + + { Operations.Delete, x => x.Contains(UserPermissionType.Owner) }, + + { Operations.Publish, x => x.Contains(UserPermissionType.Owner) }, + + { Operations.UnPublish, x => x.Contains(UserPermissionType.Owner) } + }; +``` + + +[**Next**][web-api] + + +[Tailspin]: tailspin.md + +[Application roles]: app-roles.md +[policies]: https://docs.asp.net/en/latest/security/authorization/policies.html +[rbac]: https://docs.asp.net/en/latest/security/authorization/resourcebased.html +[reference implementation]: tailspin.md +[SurveyCreatorRequirement.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Security/Policy/SurveyCreatorRequirement.cs +[Startup.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Web/Startup.cs +[Configuring the authentication middleware]: authenticate.md#configure-the-auth-middleware +[SurveyAuthorizationHandler.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Security/Policy/SurveyAuthorizationHandler.cs +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps +[web-api]: web-api.md diff --git a/docs/multitenant-identity/claims.md b/docs/multitenant-identity/claims.md new file mode 100644 index 00000000000..4ed6f40365c --- /dev/null +++ b/docs/multitenant-identity/claims.md @@ -0,0 +1,133 @@ +--- +title: Work with claim-based identities in multitenant applications +description: How a use claims for issuer validation and authorization +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 0cf1f970-216b-4175-80f3-e81d155cdf79 +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 05/23/2016 +ms.author: mwasson +pnp.series.prev: authenticate +pnp.series.next: signup +--- +# Work with claims-based identities + +[![GitHub](../_images/github.png) Sample code][sample application] + +## Claims in Azure AD +When a user signs in, Azure AD sends an ID token that contains a set of claims about the user. A claim is simply a piece of information, expressed as a key/value pair. For example, `email`=`bob@contoso.com`. Claims have an issuer — in this case, Azure AD — which is the entity that authenticates the user and creates the claims. You trust the claims because you trust the issuer. (Conversely, if you don't trust the issuer, don't trust the claims!) + +At a high level: + +1. The user authenticates. +2. The IDP sends a set of claims. +3. The app normalizes or augments the claims (optional). +4. The app uses the claims to make authorization decisions. + +In OpenID Connect, the set of claims that you get is controlled by the [scope parameter] of the authentication request. However, Azure AD issues a limited set of claims through OpenID Connect; see [Supported Token and Claim Types]. If you want more information about the user, you'll need to use the Azure AD Graph API. + +Here are some of the claims from AAD that an app might typically care about: + +| Claim type in ID token | Description | +| --- | --- | +| aud |Who the token was issued for. This will be the application's client ID. Generally, you shouldn't need to worry about this claim, because the middleware automatically validates it. Example: `"91464657-d17a-4327-91f3-2ed99386406f"` | +| groups |A list of AAD groups of which the user is a member. Example: `["93e8f556-8661-4955-87b6-890bc043c30f", "fc781505-18ef-4a31-a7d5-7d931d7b857e"]` | +| iss |The [issuer] of the OIDC token. Example: `https://sts.windows.net/b9bd2162-77ac-4fb2-8254-5c36e9c0a9c4/` | +| name |The user's display name. Example: `"Alice A."` | +| oid |The object identifier for the user in AAD. This value is the immutable and non-reusable identifier of the user. Use this value, not email, as a unique identifier for users; email addresses can change. If you use the Azure AD Graph API in your app, object ID is that value used to query profile information. Example: `"59f9d2dc-995a-4ddf-915e-b3bb314a7fa4"` | +| roles |A list of app roles for the user. Example: `["SurveyCreator"]` | +| tid |Tenant ID. This value is a unique identifier for the tenant in Azure AD. Example: `"b9bd2162-77ac-4fb2-8254-5c36e9c0a9c4"` | +| unique_name |A human readable display name of the user. Example: `"alice@contoso.com"` | +| upn |User principal name. Example: `"alice@contoso.com"` | + +This table lists the claim types as they appear in the ID token. In ASP.NET Core 1.0, the OpenID Connect middleware converts some of the claim types when it populates the Claims collection for the user principal: + +* oid > `http://schemas.microsoft.com/identity/claims/objectidentifier` +* tid > `http://schemas.microsoft.com/identity/claims/tenantid` +* unique_name > `http://schemas.xmlsoap.org/ws/2005/05/identity/claims/name` +* upn > `http://schemas.xmlsoap.org/ws/2005/05/identity/claims/upn` + +## Claims transformations +During the authentication flow, you might want to modify the claims that you get from the IDP. In ASP.NET Core 1.0, you can perform claims transformation inside of the **AuthenticationValidated** event from the OpenID Connect middleware. (See [Authentication events].) + +Any claims that you add during **AuthenticationValidated** are stored in the session authentication cookie. They don't get pushed back to Azure AD. + +Here are some examples of claims transformation: + +* **Claims normalization**, or making claims consistent across users. This is particularly relevant if you are getting claims from multiple IDPs, which might use different claim types for similar information. + For example, Azure AD sends a "upn" claim that contains the user's email. Other IDPs might send an "email" claim. The following code converts the "upn" claim into an "email" claim: + + ```csharp + var email = principal.FindFirst(ClaimTypes.Upn)?.Value; + if (!string.IsNullOrWhiteSpace(email)) + { + identity.AddClaim(new Claim(ClaimTypes.Email, email)); + } + ``` +* Add **default claim values** for claims that aren't present — for example, assigning a user to a default role. In some cases this can simplify authorization logic. +* Add **custom claim types** with application-specific information about the user. For example, you might store some information about the user in a database. You could add a custom claim with this information to the authentication ticket. The claim is stored in a cookie, so you only need to get it from the database once per login session. On the other hand, you also want to avoid creating excessively large cookies, so you need to consider the trade-off between cookie size versus database lookups. + +After the authentication flow is complete, the claims are available in `HttpContext.User`. At that point, you should treat them as a read-only collection — e.g., use them to make authorization decisions. + +## Issuer validation +In OpenID Connect, the issuer claim ("iss") identifies the IDP that issued the ID token. Part of the OIDC authentication flow is to verify that the issuer claim matches the actual issuer. The OIDC middleware handles this for you. + +In Azure AD, the issuer value is unique per AD tenant (`https://sts.windows.net/`). Therefore, an application should do an additional check, to make sure the issuer represents a tenant that is allowed to sign in to the app. + +For a single-tenant application, you can just check that the issuer is your own tenant. In fact, the OIDC middleware does this automatically by default. In a multi-tenant app, you need to allow for multiple issuers, corresponding to the different tenants. Here is a general approach to use: + +* In the OIDC middleware options, set **ValidateIssuer** to false. This turns off the automatic check. +* When a tenant signs up, store the tenant and the issuer in your user DB. +* Whenever a user signs in, look up the issuer in the database. If the issuer isn't found, it means that tenant hasn't signed up. You can redirect them to a sign up page. +* You could also blacklist certain tenants; for example, for customers that didn't pay their subscription. + +For a more detailed discussion, see [Sign-up and tenant onboarding in a multitenant application][signup]. + +## Using claims for authorization +With claims, a user's identity is no longer a monolithic entity. For example, a user might have an email address, phone number, birthday, gender, etc. Maybe the user's IDP stores all of this information. But when you authenticate the user, you'll typically get a subset of these as claims. In this model, the user's identity is simply a bundle of claims. When you make authorization decisions about a user, you will look for particular sets of claims. In other words, the question "Can user X perform action Y" ultimately becomes "Does user X have claim Z". + +Here are some basic patterns for checking claims. + +* To check that the user has a particular claim with a particular value: + + ```csharp + if (User.HasClaim(ClaimTypes.Role, "Admin")) { ... } + ``` + This code checks whether the user has a Role claim with the value "Admin". It correctly handles the case where the user has no Role claim or multiple Role claims. + + The **ClaimTypes** class defines constants for commonly-used claim types. However, you can use any string value for the claim type. +* To get a single value for a claim type, when you expect there to be at most one value: + + ```csharp + string email = User.FindFirst(ClaimTypes.Email)?.Value; + ``` +* To get all the values for a claim type: + + ```csharp + IEnumerable groups = User.FindAll("groups"); + ``` + +For more information, see [Role-based and resource-based authorization in multitenant applications][authorization]. + +[**Next**][signup] + + + + +[scope parameter]: http://nat.sakimura.org/2012/01/26/scopes-and-claims-in-openid-connect/ +[Supported Token and Claim Types]: /azure/active-directory/active-directory-token-and-claims/ +[issuer]: http://openid.net/specs/openid-connect-core-1_0.html#IDToken +[Authentication events]: authenticate.md#authentication-events +[signup]: signup.md +[Claims-Based Authorization]: https://docs.asp.net/en/latest/security/authorization/claims.html +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps +[authorization]: authorize.md diff --git a/docs/multitenant-identity/client-assertion.md b/docs/multitenant-identity/client-assertion.md new file mode 100644 index 00000000000..804f838e8ae --- /dev/null +++ b/docs/multitenant-identity/client-assertion.md @@ -0,0 +1,159 @@ +--- +title: Use client assertion to get access tokens from Azure AD +description: How to use client assertion to get access tokens from Azure AD. +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 63b83cca-20ff-4f0f-8eed-3ac9cf4f9935 +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 05/23/2016 +ms.author: mwasson +pnp.series.prev: adfs +pnp.series.next: key-vault +--- +# Use client assertion to get access tokens from Azure AD + +[![GitHub](../_images/github.png) Sample code][sample application] + +## Background +When using authorization code flow or hybrid flow in OpenID Connect, the client exchanges an authorization code for an access token. During this step, the client has to authenticate itself to the server. + +![Client secret](./images/client-secret.png) + +One way to authenticate the client is by using a client secret. That's how the [Tailspin Surveys][Surveys] application is configured by default. + +Here is an example request from the client to the IDP, requesting an access token. Note the `client_secret` parameter. + +``` +POST https://login.microsoftonline.com/b9bd2162xxx/oauth2/token HTTP/1.1 +Content-Type: application/x-www-form-urlencoded + +resource=https://tailspin.onmicrosoft.com/surveys.webapi + &client_id=87df91dc-63de-4765-8701-b59cc8bd9e11 + &client_secret=i3Bf12Dn... + &grant_type=authorization_code + &code=PG8wJG6Y... +``` + +The secret is just a string, so you have to make sure not to leak the value. The best practice is to keep the client secret out of source control. When you deploy to Azure, store the secret in an [app setting][configure-web-app]. + +However, anyone with access to the Azure subscription can view the app settings. Further, there is always a temptation to check secrets into source control (e.g., in deployment scripts), share them by email, and so on. + +For additional security, you can use [client assertion] instead of a client secret. With client assertion, the client uses an X.509 certificate to prove the token request came from the client. The client certificate is installed on the web server. Generally, it will be easier to restrict access to the certificate, than to ensure that nobody inadvertently reveals a client secret. For more information about configuring certificates in a web app, see [Using Certificates in Azure Websites Applications][using-certs-in-websites] + +Here is a token request using client assertion: + +``` +POST https://login.microsoftonline.com/b9bd2162xxx/oauth2/token HTTP/1.1 +Content-Type: application/x-www-form-urlencoded + +resource=https://tailspin.onmicrosoft.com/surveys.webapi + &client_id=87df91dc-63de-4765-8701-b59cc8bd9e11 + &client_assertion_type=urn:ietf:params:oauth:client-assertion-type:jwt-bearer + &client_assertion=eyJhbGci... + &grant_type=authorization_code + &code= PG8wJG6Y... +``` + +Notice that the `client_secret` parameter is no longer used. Instead, the `client_assertion` parameter contains a JWT token that was signed using the client certificate. The `client_assertion_type` parameter specifies the type of assertion — in this case, JWT token. The server validates the JWT token. If the JWT token is invalid, the token request returns an error. + +> [!NOTE] +> X.509 certificates are not the only form of client assertion; we focus on it here because it is supported by Azure AD. +> +> + +## Using client assertion in the Surveys application +This section shows how to configure the Tailspin Surveys application to use client assertion. In these steps, you will generate a self-signed certificate that is suitable for development, but not for production use. + +1. Run the PowerShell script [/Scripts/Setup-KeyVault.ps1][Setup-KeyVault] as follows: + + ``` + .\Setup-KeyVault.ps -Subject [subject] + ``` + + For the `Subject` parameter, enter any name, such as "surveysapp". The script generates a self-signed certificate and stores it in the "Current User/Personal" certificate store. +2. The output from the script is a JSON fragment. Add this to the application manifest of the web app, as follows: + + 1. Log into the [Azure management portal][azure-management-portal] and navigate to your Azure AD directory. + 2. Click **Applications**. + 3. Select the Surveys application. + 4. Click **Manage Manifest** and select **Download Manifest**. + 5. Open the manifest JSON file in a text editor. Paste the output from the script into the `keyCredentials` property. It should look similar to the following: + + ``` + "keyCredentials": [ + { + "type": "AsymmetricX509Cert", + "usage": "Verify", + "keyId": "29d4f7db-0539-455e-b708-....", + "customKeyIdentifier": "ZEPpP/+KJe2fVDBNaPNOTDoJMac=", + "value": "MIIDAjCCAeqgAwIBAgIQFxeRiU59eL..... + } + ], + ``` + 6. Save your changes to the JSON file. + 7. Go back to the portal. Click **Manage Manifest** > **Upload Manifest** and upload the JSON file. +3. Run the following command to get the thumbprint of the certificate. + + ``` + certutil -store -user my [subject] + ``` + + where `[subject]` is the value that you specified for Subject in the PowerShell script. The thumbprint is listed under "Cert Hash(sha1)". Remove the spaces between the hexadecimal numbers. +4. Update your app secrets. In Solution Explorer, right-click the Tailspin.Surveys.Web project and select **Manage User Secrets**. Add an entry for "Asymmetric" under "AzureAd", as shown below: + + ``` + { + "AzureAd": { + "ClientId": "[Surveys application client ID]", + // "ClientSecret": "[client secret]", << Delete this entry + "PostLogoutRedirectUri": "https://localhost:44300/", + "WebApiResourceId": "[App ID URI of your Survey.WebAPI application]", + // new: + "Asymmetric": { + "CertificateThumbprint": "[certificate thumbprint]", // Example: "105b2ff3bc842c53582661716db1b7cdc6b43ec9" + "StoreName": "My", + "StoreLocation": "CurrentUser", + "ValidationRequired": "false" + } + }, + "Redis": { + "Configuration": "[Redis connection string]" + } + } + ``` + + You must set `ValidationRequired` to false, because the certificate was not a signed by a root CA authority. In production, use a certificate that is signed by a CA authority and set `ValidationRequired` to true. + + Also delete the entry for `ClientSecret`, because it's not needed with client assertion. +5. In Startup.cs, locate the code that registers the `ICredentialService`. Uncomment the line that uses `CertificateCredentialService`, and comment out the line that uses `ClientCredentialService`: + + ```csharp + // Uncomment this: + services.AddSingleton(); + // Comment out this: + //services.AddSingleton(); + ``` + +At run time, the web application reads the certificate from the certificate store. The certificate must be installed on the same machine as the web app. + +[**Next**][key vault] + + +[configure-web-app]: /azure/app-service-web/web-sites-configure/ +[azure-management-portal]: https://manage.windowsazure.com +[client assertion]: https://tools.ietf.org/html/rfc7521 +[key vault]: key-vault.md +[Setup-KeyVault]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/scripts/Setup-KeyVault.ps1 +[Surveys]: tailspin.md +[using-certs-in-websites]: https://azure.microsoft.com/blog/using-certificates-in-azure-websites-applications/ + +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps diff --git a/docs/multitenant-identity/images/access-token.png b/docs/multitenant-identity/images/access-token.png new file mode 100644 index 00000000000..d155fb9f0e9 Binary files /dev/null and b/docs/multitenant-identity/images/access-token.png differ diff --git a/docs/multitenant-identity/images/add-claims-rules.png b/docs/multitenant-identity/images/add-claims-rules.png new file mode 100644 index 00000000000..ca7554cbe37 Binary files /dev/null and b/docs/multitenant-identity/images/add-claims-rules.png differ diff --git a/docs/multitenant-identity/images/add-claims-rules2.png b/docs/multitenant-identity/images/add-claims-rules2.png new file mode 100644 index 00000000000..701b45750eb Binary files /dev/null and b/docs/multitenant-identity/images/add-claims-rules2.png differ diff --git a/docs/multitenant-identity/images/add-contributor.png b/docs/multitenant-identity/images/add-contributor.png new file mode 100644 index 00000000000..ef934f15f32 Binary files /dev/null and b/docs/multitenant-identity/images/add-contributor.png differ diff --git a/docs/multitenant-identity/images/add-rp-trust.png b/docs/multitenant-identity/images/add-rp-trust.png new file mode 100644 index 00000000000..9cf58a84d7d Binary files /dev/null and b/docs/multitenant-identity/images/add-rp-trust.png differ diff --git a/docs/multitenant-identity/images/add-rp-trust2.png b/docs/multitenant-identity/images/add-rp-trust2.png new file mode 100644 index 00000000000..5499c31a90f Binary files /dev/null and b/docs/multitenant-identity/images/add-rp-trust2.png differ diff --git a/docs/multitenant-identity/images/admin-consent.png b/docs/multitenant-identity/images/admin-consent.png new file mode 100644 index 00000000000..0cc2f4f0c30 Binary files /dev/null and b/docs/multitenant-identity/images/admin-consent.png differ diff --git a/docs/multitenant-identity/images/architecture.png b/docs/multitenant-identity/images/architecture.png new file mode 100644 index 00000000000..2fac0cfb92a Binary files /dev/null and b/docs/multitenant-identity/images/architecture.png differ diff --git a/docs/multitenant-identity/images/auth-flow.png b/docs/multitenant-identity/images/auth-flow.png new file mode 100644 index 00000000000..edf26dac7b0 Binary files /dev/null and b/docs/multitenant-identity/images/auth-flow.png differ diff --git a/docs/multitenant-identity/images/client-secret.png b/docs/multitenant-identity/images/client-secret.png new file mode 100644 index 00000000000..bf5e96153b5 Binary files /dev/null and b/docs/multitenant-identity/images/client-secret.png differ diff --git a/docs/multitenant-identity/images/co-admin.png b/docs/multitenant-identity/images/co-admin.png new file mode 100644 index 00000000000..53c40b2f92e Binary files /dev/null and b/docs/multitenant-identity/images/co-admin.png differ diff --git a/docs/multitenant-identity/images/consent-error.png b/docs/multitenant-identity/images/consent-error.png new file mode 100644 index 00000000000..38d575a1570 Binary files /dev/null and b/docs/multitenant-identity/images/consent-error.png differ diff --git a/docs/multitenant-identity/images/contributor.png b/docs/multitenant-identity/images/contributor.png new file mode 100644 index 00000000000..4e3cebab7f7 Binary files /dev/null and b/docs/multitenant-identity/images/contributor.png differ diff --git a/docs/multitenant-identity/images/delegated-permissions.png b/docs/multitenant-identity/images/delegated-permissions.png new file mode 100644 index 00000000000..fc5e62ff1b2 Binary files /dev/null and b/docs/multitenant-identity/images/delegated-permissions.png differ diff --git a/docs/multitenant-identity/images/edit-claims-rule.png b/docs/multitenant-identity/images/edit-claims-rule.png new file mode 100644 index 00000000000..ae349cb6106 Binary files /dev/null and b/docs/multitenant-identity/images/edit-claims-rule.png differ diff --git a/docs/multitenant-identity/images/edit-claims-rule2.png b/docs/multitenant-identity/images/edit-claims-rule2.png new file mode 100644 index 00000000000..504a33eece3 Binary files /dev/null and b/docs/multitenant-identity/images/edit-claims-rule2.png differ diff --git a/docs/multitenant-identity/images/edit-directory.png b/docs/multitenant-identity/images/edit-directory.png new file mode 100644 index 00000000000..c0dca8be6c3 Binary files /dev/null and b/docs/multitenant-identity/images/edit-directory.png differ diff --git a/docs/multitenant-identity/images/edit-directory2.png b/docs/multitenant-identity/images/edit-directory2.png new file mode 100644 index 00000000000..6dde08ff556 Binary files /dev/null and b/docs/multitenant-identity/images/edit-directory2.png differ diff --git a/docs/multitenant-identity/images/edit-survey.png b/docs/multitenant-identity/images/edit-survey.png new file mode 100644 index 00000000000..407f634392d Binary files /dev/null and b/docs/multitenant-identity/images/edit-survey.png differ diff --git a/docs/multitenant-identity/images/federation-trust.png b/docs/multitenant-identity/images/federation-trust.png new file mode 100644 index 00000000000..bd5dfe9c495 Binary files /dev/null and b/docs/multitenant-identity/images/federation-trust.png differ diff --git a/docs/multitenant-identity/images/get-secret.png b/docs/multitenant-identity/images/get-secret.png new file mode 100644 index 00000000000..e5c7e1b1c47 Binary files /dev/null and b/docs/multitenant-identity/images/get-secret.png differ diff --git a/docs/multitenant-identity/images/load-balancing.png b/docs/multitenant-identity/images/load-balancing.png new file mode 100644 index 00000000000..f1b85dc90ff Binary files /dev/null and b/docs/multitenant-identity/images/load-balancing.png differ diff --git a/docs/multitenant-identity/images/multitenant.png b/docs/multitenant-identity/images/multitenant.png new file mode 100644 index 00000000000..618b29167f0 Binary files /dev/null and b/docs/multitenant-identity/images/multitenant.png differ diff --git a/docs/multitenant-identity/images/org-users.png b/docs/multitenant-identity/images/org-users.png new file mode 100644 index 00000000000..132573333c1 Binary files /dev/null and b/docs/multitenant-identity/images/org-users.png differ diff --git a/docs/multitenant-identity/images/role-assignments.png b/docs/multitenant-identity/images/role-assignments.png new file mode 100644 index 00000000000..c29ae47b041 Binary files /dev/null and b/docs/multitenant-identity/images/role-assignments.png differ diff --git a/docs/multitenant-identity/images/settings.png b/docs/multitenant-identity/images/settings.png new file mode 100644 index 00000000000..1f3d8455734 Binary files /dev/null and b/docs/multitenant-identity/images/settings.png differ diff --git a/docs/multitenant-identity/images/sign-in-flow.png b/docs/multitenant-identity/images/sign-in-flow.png new file mode 100644 index 00000000000..b76227908be Binary files /dev/null and b/docs/multitenant-identity/images/sign-in-flow.png differ diff --git a/docs/multitenant-identity/images/sign-up-page.png b/docs/multitenant-identity/images/sign-up-page.png new file mode 100644 index 00000000000..7a173e2eb88 Binary files /dev/null and b/docs/multitenant-identity/images/sign-up-page.png differ diff --git a/docs/multitenant-identity/images/single-tenant.png b/docs/multitenant-identity/images/single-tenant.png new file mode 100644 index 00000000000..ec644abe789 Binary files /dev/null and b/docs/multitenant-identity/images/single-tenant.png differ diff --git a/docs/multitenant-identity/images/state-parameter.png b/docs/multitenant-identity/images/state-parameter.png new file mode 100644 index 00000000000..c5ce69f8160 Binary files /dev/null and b/docs/multitenant-identity/images/state-parameter.png differ diff --git a/docs/multitenant-identity/images/surveys-screenshot.png b/docs/multitenant-identity/images/surveys-screenshot.png new file mode 100644 index 00000000000..917f8cbc1de Binary files /dev/null and b/docs/multitenant-identity/images/surveys-screenshot.png differ diff --git a/docs/multitenant-identity/images/tenant-surveys.png b/docs/multitenant-identity/images/tenant-surveys.png new file mode 100644 index 00000000000..2445bd76e32 Binary files /dev/null and b/docs/multitenant-identity/images/tenant-surveys.png differ diff --git a/docs/multitenant-identity/images/tenant-table.png b/docs/multitenant-identity/images/tenant-table.png new file mode 100644 index 00000000000..a3e528f240a Binary files /dev/null and b/docs/multitenant-identity/images/tenant-table.png differ diff --git a/docs/multitenant-identity/images/token-cache.png b/docs/multitenant-identity/images/token-cache.png new file mode 100644 index 00000000000..094baa986ae Binary files /dev/null and b/docs/multitenant-identity/images/token-cache.png differ diff --git a/docs/multitenant-identity/images/users.png b/docs/multitenant-identity/images/users.png new file mode 100644 index 00000000000..47cf98e5e70 Binary files /dev/null and b/docs/multitenant-identity/images/users.png differ diff --git a/docs/multitenant-identity/index.md b/docs/multitenant-identity/index.md new file mode 100644 index 00000000000..869d2d23713 --- /dev/null +++ b/docs/multitenant-identity/index.md @@ -0,0 +1,111 @@ +--- +title: Identity Management for Multitenant Applications +description: >- + Best practices for authentication, authorization, and identity management in + multitenant apps. +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 629a954b-8f05-4f6e-8109-36f77ef179b7 +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 06/02/2016 +ms.author: mwasson +pnp.series.next: tailspin +--- +# Manage Identity in Multitenant Applications + +This series of articles describes best practices for multitenancy, when using Azure AD for authentication and identity management. + +[![GitHub](../_images/github.png) Sample code][sample application] + +When you're building a multitenant application, one of the first challenges is managing user identities, because now every user belongs to a tenant. For example: + +* Users sign in with their organizational credentials. +* Users should have access to their organization's data, but not data that belongs to other tenants. +* An organization can sign up for the application, and then assign application roles to its members. + +Azure Active Directory (Azure AD) has some great features that support all of these scenarios. + +To accompany this series of articles, we also created a complete, [end-to-end implementation][sample application] of a multitenant app. The articles reflect what we learned in the process of building the application. To get started with the application, see [Running the Surveys application][running-the-app]. + +## Introduction + +Let's say you're writing an enterprise SaaS application to be hosted in the cloud. Of course, the application will have users: + +![Users](./images/users.png) + +But those users belong to organizations: + +![Organizational users](./images/org-users.png) + +Example: Tailspin sells subscriptions to its SaaS application. Contoso and Fabrikam sign up for the app. When Alice (`alice@contoso`) signs in, the application should know that Alice is part of Contoso. + +* Alice *should* have access to Contoso data. +* Alice *should not* have access to Fabrikam data. + +This guidance will show you how to manage user identities in a multitenant application, using [Azure Active Directory][AzureAD] (Azure AD) to handle sign-in and authentication. + +## What is multitenancy? +A *tenant* is a group of users. In a SaaS application, the tenant is a subscriber or customer of the application. *Multitenancy* is an architecture where multiple tenants share the same physical instance of the app. Although tenants share physical resources (such as VMs or storage), each tenant gets its own logical instance of the app. + +Typically, application data is shared among the users within a tenant, but not with other tenants. + +![Multitenant](./images/multitenant.png) + +Compare this architecture with a single-tenant architecture, where each tenant has a dedicated physical instance. In a single-tenant architecture, you add tenants by spinning up new instances of the app. + +![Single tenant](./images/single-tenant.png) + +### Multitenancy and horizontal scaling +To achieve scale in the cloud, it’s common to add more physical instances. This is known as *horizontal scaling* or *scaling out*. Consider a web app. To handle more traffic, you can add more server VMs and put them behind a load balancer. Each VM runs a separate physical instance of the web app. + +![Load balancing a web site](./images/load-balancing.png) + +Any request can be routed to any instance. Together, the system functions as a single logical instance. You can tear down a VM or spin up a new VM, without affecting users. In this architecture, each physical instance is multi-tenant, and you scale by adding more instances. If one instance goes down, it should not affect any tenant. + +## Identity in a multitenant app +In a multitenant app, you must consider users in the context of tenants. + +**Authentication** + +* Users sign into the app with their organization credentials. They don't have to create new user profiles for the app. +* Users within the same organization are part of the same tenant. +* When a user signs in, the application knows which tenant the user belongs to. + +**Authorization** + +* When authorizing a user's actions (say, viewing a resource), the app must take into account the user's tenant. +* Users might be assigned roles within the application, such as "Admin" or "Standard User". Role assignments should be managed by the customer, not by the SaaS provider. + +**Example.** Alice, an employee at Contoso, navigates to the application in her browser and clicks the “Log in” button. She is redirected to a login screen where she enters her corporate credentials (username and password). At this point, she is logged into the app as `alice@contoso.com`. The application also knows that Alice is an admin user for this application. Because she is an admin, she can see a list of all the resources that belong to Contoso. However, she cannot view Fabrikam's resources, because she is an admin only within her tenant. + +In this guidance, we'll look specifically at using Azure AD for identity management. + +* We assume the customer stores their user profiles in Azure AD (including Office365 and Dynamics CRM tenants) +* Customers with on-premise Active Directory (AD) can use [Azure AD Connect][ADConnect] to sync their on-premise AD with Azure AD. + +If a customer with on-premise AD cannot use Azure AD Connect (due to corporate IT policy or other reasons), the SaaS provider can federate with the customer's AD through Active Directory Federation Services (AD FS). This option is described in [Federating with a customer's AD FS]. + +This guidance does not consider other aspects of multitenancy such as data partitioning, per-tenant configuration, and so forth. + +[**Next**][tailpin] + + + + +[ADConnect]: /azure/active-directory/active-directory-aadconnect/ +[AzureAD]: https://azure.microsoft.com/documentation/services/active-directory/ + +[Federating with a customer's AD FS]: adfs.md +[tailpin]: tailspin.md + +[running-the-app]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/docs/running-the-app.md +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps diff --git a/docs/multitenant-identity/key-vault.md b/docs/multitenant-identity/key-vault.md new file mode 100644 index 00000000000..06e987c38d8 --- /dev/null +++ b/docs/multitenant-identity/key-vault.md @@ -0,0 +1,411 @@ +--- +title: Use Key Vault to protect application secrets +description: How a use the Key Vault service to store application secrets +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 4d03980d-d9ff-4e69-bd44-f2a4af33b63d +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 02/16/2016 +ms.author: mwasson +pnp.series.prev: client-assertion +--- +# Use Azure Key Vault to protect application secrets + +[![GitHub](../_images/github.png) Sample code][sample application] + +It's common to have application settings that are sensitive and must be protected, such as: + +* Database connection strings +* Passwords +* Cryptographic keys + +As a security best practice, you should never store these secrets in source control. It's too easy for them to leak — even if your source code repository is private. And it's not just about keeping secrets from the general public. On larger projects, you might want to restrict which developers and operators can access the production secrets. (Settings for test or development environments are different.) + +A more secure option is to store these secrets in [Azure Key Vault][KeyVault]. Key Vault is a cloud-hosted service for managing cryptographic keys and other secrets. This article shows how to use Key Vault to store configuration settings for you app. + +In the [Tailspin Surveys][Surveys] application, the following settings are secret: + +* The database connection string. +* The Redis connection string. +* The client secret for the web application. + +To store configuration secrets in Key Vault, the Surveys application implements a custom configuration provider, which hooks into the ASP.NET Core 1.0 [configuration system][configuration]. The custom provider reads configuration settings from Key Vault on startup. + +The Surveys application loads configuration settings from the following places: + +* The appsettings.json file +* The [user secrets store][user-secrets] (development environment only; for testing) +* The hosting environment (app settings in Azure web apps) +* Key Vault + +Each of these overrides the previous one, so any settings stored in Key Vault take precedence. + +> [!NOTE] +> By default, the Key Vault configuration provider is disabled. It's not needed for running the application locally. You would enable it in a production deployment. +> +> The Key Vault provider is currently not supported for .NET Core, because it requires the [Microsoft.Azure.KeyVault][Microsoft.Azure.KeyVault] package. +> +> + +At startup, the application reads settings from every registered configuration provider, and uses them to populate a strongly typed options object. (For more information, see [Using Options and configuration objects][options].) + +## Implementation +The [KeyVaultConfigurationProvider][KeyVaultConfigurationProvider] class is a configuration provider that plugs into the ASP.NET Core 1.0 [configuration system][configuration]. + +To use the `KeyVaultConfigurationProvider`, call the `AddKeyVaultSecrets` extension method in the startup class: + +```csharp + var builder = new ConfigurationBuilder() + .SetBasePath(appEnv.ApplicationBasePath) + .AddJsonFile("appsettings.json"); + + if (env.IsDevelopment()) + { + builder.AddUserSecrets(); + } + builder.AddEnvironmentVariables(); + var config = builder.Build(); + + // Add key vault configuration: + builder.AddKeyVaultSecrets(config["AzureAd:ClientId"], + config["KeyVault:Name"], + config["AzureAd:Asymmetric:CertificateThumbprint"], + Convert.ToBoolean(config["AzureAd:Asymmetric:ValidationRequired"]), + loggerFactory); +``` + +Notice that `KeyVaultConfigurationProvider` requires some configuration settings, which need to be stored in one of the other configuration sources. + +When the application starts, `KeyVaultConfigurationProvider` enumerates all of the secrets in the key vault. For each secret, it looks for a tag named 'ConfigKey'. The value of the tag is the name of the configuration setting. + +> [!NOTE] +> [Tags][key-tags] are optional metadata stored with a key. Tags are used here because key names cannot contain colon (:) characters. +> +> + +```csharp +var kvClient = new KeyVaultClient(GetTokenAsync); +var secretsResponseList = await kvClient.GetSecretsAsync(_vault, MaxSecrets, token); +foreach (var secretItem in secretsResponseList.Value) +{ + //The actual config key is stored in a tag with the Key "ConfigKey" + // because ':' is not supported in a shared secret name by Key Vault. + if (secretItem.Tags != null && secretItem.Tags.ContainsKey(ConfigKey)) + { + var secret = await kvClient.GetSecretAsync(secretItem.Id, token); + Data.Add(secret.Tags[ConfigKey], secret.Value); + } +} +``` + +> [!NOTE] +> See [KeyVaultConfigurationProvider.cs]. +> +> + +## Setting up Key Vault in the Surveys app +Prerequisites: + +* Install the [Azure Resource Manager Cmdlets][azure-rm-cmdlets]. +* Configure the Surveys application as described in [Running the Surveys application][readme]. + +High-level steps: + +1. Set up an admin user in the tenant. +2. Set up a client certificate. +3. Create a key vault. +4. Add configuration settings to your key vault. +5. Uncomment the code that enables key vault. +6. Update the application's user secrets. + +### Set up an admin user +> [!NOTE] +> To create a key vault, you must use an account which can manage your Azure subscription. Also, any application that you authorize to read from the key vault must registered in the same tenant as that account. +> +> + +In this step, you will make sure that you can create a key vault while signed in as a user from the tenant where the Surveys app is registered. + +First, change the directory associated with your Azure subscription. + +1. Log into the [Azure management portal][azure-management-portal] +2. Click **Settings**. + + ![Settings](./images/settings.png) +3. Select your Azure subscription. +4. Click **Edit Directory** at the bottom of the portal. + + ![Settings](./images/edit-directory.png) +5. In "Change the associated directory", select the Azure AD tenant where the Surveys application is registered, + + ![Settings](./images/edit-directory2.png) +6. Click the arrow button and complete the dialog. + +Create an admin user within the Azure AD tenant where the Surveys application is registered. + +1. Log into the [Azure management portal][azure-management-portal]. +2. Select the Azure AD tenant where your application is registered. +3. Click **Users** > **Add User**. +4. In the **Add User** dialog, assign the user to the Global Admin role. + +Add the admin user as a co-administrator for your Azure subscription. + +1. Log into the [Azure management portal][azure-management-portal]. +2. Click **Settings** and select your Azure subscription. +3. Click **Administrators** +4. Click **Add** at the bottom of the portal. +5. Enter the email of the admin user that you created previously. +6. Check the checkbox for the subscription. +7. Click the checkmark button to complete the dialog. + +![Add a co-administrator](./images/co-admin.png) + +### Set up a client certificate +1. Run the PowerShell script [/Scripts/Setup-KeyVault.ps1][Setup-KeyVault] as follows: + + ``` + .\Setup-KeyVault.ps1 -Subject <> + ``` + For the `Subject` parameter, enter any name, such as "surveysapp". The script generates a self-signed certificate and stores it in the "Current User/Personal" certificate store. +2. The output from the script is a JSON fragment. Add this to the application manifest of the web app, as follows: + + 1. Log into the [Azure management portal][azure-management-portal] and navigate to your Azure AD directory. + 2. Click **Applications**. + 3. Select the Surveys application. + 4. Click **Manage Manifest** and select **Download Manifest**. + 5. Open the manifest JSON file in a text editor. Paste the output from the script into the `keyCredentials` property. It should look similar to the following: + + ``` + "keyCredentials": [ + { + "type": "AsymmetricX509Cert", + "usage": "Verify", + "keyId": "29d4f7db-0539-455e-b708-....", + "customKeyIdentifier": "ZEPpP/+KJe2fVDBNaPNOTDoJMac=", + "value": "MIIDAjCCAeqgAwIBAgIQFxeRiU59eL..... + } + ], + ``` + 6. Save your changes to the JSON file. + 7. Go back to the portal. Click **Manage Manifest** > **Upload Manifest** and upload the JSON file. +3. Add the same JSON fragment to the application manifest of the web API (Surveys.WebAPI). +4. Run the following command to get the thumbprint of the certificate. + + ``` + certutil -store -user my [subject] + ``` + where `[subject]` is the value that you specified for Subject in the PowerShell script. The thumbprint is listed under "Cert Hash(sha1)". Remove the spaces between the hexadecimal numbers. + +You will use the thumbprint later. + +### Create a key vault +1. Run the PowerShell script [/Scripts/Setup-KeyVault.ps1][Setup-KeyVault] as follows: + + ``` + .\Setup-KeyVault.ps1 -KeyVaultName <> -ResourceGroupName <> -Location <> + ``` + + When prompted for credentials, sign in as the Azure AD user that you created earlier. The script creates a new resource group, and a new key vault within that resource group. + + Note: For the -Location parameter, you can use the following PowerShell command to get a list of valid regions: + + ``` + Get-AzureRmResourceProvider -ProviderNamespace "microsoft.keyvault" | Where-Object { $_.ResourceTypes.ResourceTypeName -eq "vaults" } | Select-Object -ExpandProperty Locations + ``` +2. Run SetupKeyVault.ps again, with the following parameters: + + ``` + .\Setup-KeyVault.ps1 -KeyVaultName <> -ApplicationIds @("<>", "<>") + ``` + + where + + * key vault name = The name that you gave the key vault in the previous step. + * web app client ID = The client ID for the Surveys web application. + * web api client ID = The client ID for the Surveys.WebAPI application. + + Example: + + ``` + .\Setup-KeyVault.ps1 -KeyVaultName tailspinkv -ApplicationIds @("f84df9d1-91cc-4603-b662-302db51f1031", "8871a4c2-2a23-4650-8b46-0625ff3928a6") + ``` + + > [!NOTE] + > You can get the client IDs from the [Azure management portal][azure-management-portal]. Select the Azure AD tenant, select the application, and click **Configure**. + > + > + + This script authorizes the web app and web API to retrieve secrets from your key vault. See [Get started with Azure Key Vault](/azure/key-vault/key-vault-get-started/) for more information. + +### Add configuration settings to your key vault +1. Run SetupKeyVault.ps as follows:: + + ``` + .\Setup-KeyVault.ps1 -KeyVaultName < -KeyName RedisCache -KeyValue "<>.redis.cache.windows.net,password=<>,ssl=true" -ConfigName "Redis:Configuration" + ``` + where + + * key vault name = The name that you gave the key vault in the previous step. + * Redis DNS name = The DNS name of your Redis cache instance. + * Redis access key = The access key for your Redis cache instance. + + This command adds a secret to your key vault. The secret is a name/value pair plus a tag: + * The key name isn't used by the application, but must be unique within the Key Vault. + * The value is the value of the configuration option, in this case the Redis connection string. + * the "ConfigKey" tag holds the name of the configuration key. +2. At this point, it's a good idea to test whether you successfully stored the secrets to key vault. Run the following PowerShell command: + + ``` + Get-AzureKeyVaultSecret <> RedisCache | Select-Object * + ``` + The output should show the secret value plus some metadata: + + ![PowerShell output](./images/get-secret.png) +3. Run SetupKeyVault.ps again to add the database connection string: + + ``` + .\Setup-KeyVault.ps1 -KeyVaultName < -KeyName ConnectionString -KeyValue <> -ConfigName "Data:SurveysConnectionString" + ``` + + where `<>` is the value of the database connection string. + + For testing with the local database, copy the connection string from the Tailspin.Surveys.Web/appsettings.json file. If you do that, make sure to change the double backslash ('\\\\') into a single backslash. The double backslash is an escape character in the JSON file. + + Example: + + ``` + .\Setup-KeyVault.ps1 -KeyVaultName mykeyvault -KeyName ConnectionString -KeyValue "Server=(localdb)\MSSQLLocalDB;Database=Tailspin.SurveysDB;Trusted_Connection=True;MultipleActiveResultSets=true" -ConfigName "Data:SurveysConnectionString" + ``` + +### Uncomment the code that enables Key Vault +1. Open the Tailspin.Surveys solution. +2. In [Tailspin.Surveys.Web/Startup.cs][web-startup], locate the following code block and uncomment it. + + ```csharp + //#if DNX451 + // _configuration = builder.Build(); + // builder.AddKeyVaultSecrets(_configuration["AzureAd:ClientId"], + // _configuration["KeyVault:Name"], + // _configuration["AzureAd:Asymmetric:CertificateThumbprint"], + // Convert.ToBoolean(_configuration["AzureAd:Asymmetric:ValidationRequired"]), + // loggerFactory); + //#endif + ``` +3. In [Tailspin.Surveys.WebAPI/Startup.cs][web-api-startup], locate the following code block and uncomment it. + + ```csharp + //#if DNX451 + // var config = builder.Build(); + // builder.AddKeyVaultSecrets(config["AzureAd:ClientId"], + // config["KeyVault:Name"], + // config["AzureAd:Asymmetric:CertificateThumbprint"], + // Convert.ToBoolean(config["AzureAd:Asymmetric:ValidationRequired"]), + // loggerFactory); + //#endif + ``` +4. In [Tailspin.Surveys.Web/Startup.cs][web-startup], locate the code that registers the `ICredentialService`. Uncomment the line that uses `CertificateCredentialService`, and comment out the line that uses `ClientCredentialService`: + + ```csharp + // Uncomment this: + services.AddSingleton(); + // Comment out this: + //services.AddSingleton(); + ``` + + This change enables the web app to use [Client assertion][client-assertion] to get OAuth access tokens. With client assertion, you don't need an OAuth client secret. Alternatively, you could store the client secret in key vault. However, key vault and client assertion both use a client certificate, so if you enable key vault, it's a good practice to enable client assertion as well. + +### Update the user secrets +In Solution Explorer, right-click the Tailspin.Surveys.Web project and select **Manage User Secrets**. In the secrets.json file, delete the existing JSON and paste in the following: + + ``` + { + "AzureAd": { + "ClientId": "[Surveys web app client ID]", + "PostLogoutRedirectUri": "https://localhost:44300/", + "WebApiResourceId": "[App ID URI of your Surveys.WebAPI application]", + "Asymmetric": { + "CertificateThumbprint": "[certificate thumbprint. Example: 105b2ff3bc842c53582661716db1b7cdc6b43ec9]", + "StoreName": "My", + "StoreLocation": "CurrentUser", + "ValidationRequired": "false" + } + }, + "KeyVault": { + "Name": "[key vault name]" + } + } + ``` + +Replace the entries in [square brackets] with the correct values. + +* `AzureAd:ClientId`: The client ID of the Surveys app. +* `AzureAd:WebApiResourceId`: The App ID URI that you specified when you created the Surveys.WebAPI application in Azure AD. +* `Asymmetric:CertificateThumbprint`: The certificate thumbprint that you got previously, when you created the client certificate. +* `KeyVault:Name`: The name of your key vault. + +> [!NOTE] +> `Asymmetric:ValidationRequired` is false because the certificate that you created previously was not signed by a root certificate authority (CA). In production, use a certificate that is signed by a root CA and set `ValidationRequired` to true. +> +> + +Save the updated secrets.json file. + +Next, in Solution Explorer, right-click the Tailspin.Surveys.WebApi project and select **Manage User Secrets**. Delete the existing JSON and paste in the following: + +``` +{ + "AzureAd": { + "ClientId": "[Surveys.WebAPI client ID]", + "WebApiResourceId": "https://tailspin5.onmicrosoft.com/surveys.webapi", + "Asymmetric": { + "CertificateThumbprint": "[certificate thumbprint]", + "StoreName": "My", + "StoreLocation": "CurrentUser", + "ValidationRequired": "false" + } + }, + "KeyVault": { + "Name": "[key vault name]" + } +} +``` + +Replace the entries in [square brackets] and save the secrets.json file. + +> [!NOTE] +> For the web API, make sure to use the client ID for the Surveys.WebAPI application, not the Surveys application. +> +> + +[**Next**][adfs] + + +[adfs]: ./adfs.md +[authorize-app]: /azure/key-vault/key-vault-get-started//#authorize +[azure-management-portal]: https://manage.windowsazure.com/ +[azure-rm-cmdlets]: https://msdn.microsoft.com/library/mt125356.aspx +[client-assertion]: client-assertion.md +[configuration]: https://docs.asp.net/en/latest/fundamentals/configuration.html +[KeyVault]: https://azure.microsoft.com/services/key-vault/ +[KeyVaultConfigurationProvider]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Configuration.KeyVault/KeyVaultConfigurationProvider.cs +[key-tags]: https://msdn.microsoft.com/library/azure/dn903623.aspx#BKMK_Keytags +[Microsoft.Azure.KeyVault]: https://www.nuget.org/packages/Microsoft.Azure.KeyVault/ +[options]: https://docs.asp.net/en/latest/fundamentals/configuration.html#using-options-and-configuration-objects +[readme]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/docs/running-the-app.md +[Setup-KeyVault]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/scripts/Setup-KeyVault.ps1 +[Surveys]: tailspin.md +[user-secrets]: http://go.microsoft.com/fwlink/?LinkID=532709 +[web-startup]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Web/Startup.cs +[web-api-startup]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.WebAPI/Startup.cs + +[KeyVaultConfigurationProvider.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Configuration.KeyVault/KeyVaultConfigurationProvider.cs +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps diff --git a/docs/multitenant-identity/signup.md b/docs/multitenant-identity/signup.md new file mode 100644 index 00000000000..44c5f222284 --- /dev/null +++ b/docs/multitenant-identity/signup.md @@ -0,0 +1,307 @@ +--- +title: Sign-up and tenant onboarding in multitenant applications +description: How to onboard tenants in a multitenant application +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 57d5538a-8d59-4519-a8b1-2f8b16db02eb +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 05/23/2016 +ms.author: mwasson +pnp.series.prev: claims +pnp.series.next: app-roles +--- +# Tenant sign-up and onboarding + +[![GitHub](../_images/github.png) Sample code][sample application] + +This article describes how to implement a *sign-up* process in a multi-tenant application, which allows a customer to sign up their organization for your application. +There are several reasons to implement a sign-up process: + +* Allow an AD admin to consent for the customer's entire organization to use the application. +* Collect credit card payment or other customer information. +* Perform any one-time per-tenant setup needed by your application. + +## Admin consent and Azure AD permissions +In order to authenticate with Azure AD, an application needs access to the user's directory. At a minimum, the application needs permission to read the user's profile. The first time that a user signs in, Azure AD shows a consent page that lists the permissions being requested. By clicking **Accept**, the user grants permission to the application. + +By default, consent is granted on a per-user basis. Every user who signs in sees the consent page. However, Azure AD also supports *admin consent*, which allows an AD administrator to consent for an entire organization. + +When the admin consent flow is used, the consent page states that the AD admin is granting permission on behalf of the entire tenant: + +![Admin consent prompt](./images/admin-consent.png) + +After the admin clicks **Accept**, other users within the same tenant can sign in, and Azure AD will skip the consent screen. + +Only an AD administrator can give admin consent, because it grants permission on behalf of the entire organization. If a non-administrator tries to authenticate with the admin consent flow, Azure AD displays an error: + +![Consent error](./images/consent-error.png) + +If the application requires additional permissions at a later point, the customer will need to sign up again and consent to the updated permissions. + +## Implementing tenant sign-up +For the [Tailspin Surveys][Tailspin] application, we defined several requirements for the sign-up process: + +* A tenant must sign up before users can sign in. +* Sign-up uses the admin consent flow. +* Sign-up adds the user's tenant to the application database. +* After a tenant signs up, the application shows an onboarding page. + +In this section, we'll walk through our implementation of the sign-up process. +It's important to understand that "sign up" versus "sign in" is an application concept. During the authentication flow, Azure AD does not inherently know whether the user is in process of signing up. It's up to the application to keep track of the context. + +When an anonymous user visits the Surveys application, the user is shown two buttons, one to sign in, and one to "enroll your company" (sign up). + +![Application sign-up page](./images/sign-up-page.png) + +These buttons invoke actions in the [AccountController] class. + +The `SignIn` action returns a **ChallegeResult**, which causes the OpenID Connect middleware to redirect to the authentication endpoint. This is the default way to trigger authentication in ASP.NET Core 1.0. + +```csharp +[AllowAnonymous] +public IActionResult SignIn() +{ + return new ChallengeResult( + OpenIdConnectDefaults.AuthenticationScheme, + new AuthenticationProperties + { + IsPersistent = true, + RedirectUri = Url.Action("SignInCallback", "Account") + }); +} +``` + +Now compare the `SignUp` action: + +```csharp +[AllowAnonymous] +public IActionResult SignUp() +{ + // Workaround for https://github.com/aspnet/Security/issues/546 + HttpContext.Items.Add("signup", "true"); + + var state = new Dictionary { { "signup", "true" }}; + return new ChallengeResult( + OpenIdConnectDefaults.AuthenticationScheme, + new AuthenticationProperties(state) + { + RedirectUri = Url.Action(nameof(SignUpCallback), "Account") + }); +} +``` + +Like `SignIn`, the `SignUp` action also returns a `ChallengeResult`. But this time, we add a piece of state information to the `AuthenticationProperties` in the `ChallengeResult`: + +* signup: A Boolean flag, indicating that the user has started the sign-up process. + +The state information in `AuthenticationProperties` gets added to the OpenID Connect [state] parameter, which round trips during the authentication flow. + +![State parameter](./images/state-parameter.png) + +After the user authenticates in Azure AD and gets redirected back to the application, the authentication ticket contains the state. We are using this fact to make sure the "signup" value persists across the entire authentication flow. + +## Adding the admin consent prompt +In Azure AD, the admin consent flow is triggered by adding a "prompt" parameter to the query string in the authentication request: + +``` +/authorize?prompt=admin_consent&... +``` + +The Surveys application adds the prompt during the `RedirectToAuthenticationEndpoint` event. This event is called right before the middleware redirects to the authentication endpoint. + +```csharp +public override Task RedirectToAuthenticationEndpoint(RedirectContext context) +{ + if (context.IsSigningUp()) + { + context.ProtocolMessage.Prompt = "admin_consent"; + } + + _logger.RedirectToIdentityProvider(); + return Task.FromResult(0); +} +``` + +> [!NOTE] +> See [SurveyAuthenticationEvents.cs]. +> +> + +Setting` ProtocolMessage.Prompt` tells the middleware to add the "prompt" parameter to the authentication request. + +Note that the prompt is only needed during sign-up. Regular sign-in should not include it. To distinguish between them, we check for the `signup` value in the authentication state. The following extension method checks for this condition: + +```csharp +internal static bool IsSigningUp(this BaseControlContext context) +{ + Guard.ArgumentNotNull(context, nameof(context)); + + string signupValue; + object obj; + // Check the HTTP context and convert to string + if (context.HttpContext.Items.TryGetValue("signup", out obj)) + { + signupValue = (string)obj; + } + else + { + // It's not in the HTTP context, so check the authentication ticket. If it's not there, we aren't signing up. + if ((context.AuthenticationTicket == null) || + (!context.AuthenticationTicket.Properties.Items.TryGetValue("signup", out signupValue))) + { + return false; + } + } + + // We have found the value, so see if it's valid + bool isSigningUp; + if (!bool.TryParse(signupValue, out isSigningUp)) + { + // The value for signup is not a valid boolean, throw + throw new InvalidOperationException($"'{signupValue}' is an invalid boolean value"); + } + + return isSigningUp; +} +``` + +> [!NOTE] +> See [BaseControlContextExtensions.cs]. +> +> [!NOTE] +> Note: This code includes a workaround for a known bug in ASP.NET Core 1.0 RC1. In the `RedirectToAuthenticationEndpoint` event, there is no way to get the authentication properties that contains the "signup" state. As a workaround, the `AccountController.SignUp` method also puts the "signup" state into the `HttpContext`. This works because `RedirectToAuthenticationEndpoint` happens before the redirect, so we still have the same `HttpContext`. +> +> + +## Registering a Tenant +The Surveys application stores some information about each tenant and user in the application database. + +![Tenant table](./images/tenant-table.png) + +In the Tenant table, IssuerValue is the value of the issuer claim for the tenant. For Azure AD, this is `https://sts.windows.net/` and gives a unique value per tenant. + +When a new tenant signs up, the Surveys application writes a tenant record to the database. This happens inside the `AuthenticationValidated` event. (Don't do it before this event, because the ID token won't be validated yet, so you can't trust the claim values. See [Authentication]. + +Here is the relevant code from the Surveys application: + +```csharp +public override async Task AuthenticationValidated(AuthenticationValidatedContext context) +{ + var principal = context.AuthenticationTicket.Principal; + var userId = principal.GetObjectIdentifierValue(); + var tenantManager = context.HttpContext.RequestServices.GetService(); + var userManager = context.HttpContext.RequestServices.GetService(); + var issuerValue = principal.GetIssuerValue(); + _logger.AuthenticationValidated(userId, issuerValue); + + // Normalize the claims first. + NormalizeClaims(principal); + var tenant = await tenantManager.FindByIssuerValueAsync(issuerValue) + .ConfigureAwait(false); + + if (context.IsSigningUp()) + { + // Originally, we were checking to see if the tenant was non-null, however, this would not allow + // permission changes to the application in AAD since a re-consent may be required. Now we just don't + // try to recreate the tenant. + if (tenant == null) + { + tenant = await SignUpTenantAsync(context, tenantManager) + .ConfigureAwait(false); + } + + // In this case, we need to go ahead and set up the user signing us up. + await CreateOrUpdateUserAsync(context.AuthenticationTicket, userManager, tenant) + .ConfigureAwait(false); + } + else + { + if (tenant == null) + { + _logger.UnregisteredUserSignInAttempted(userId, issuerValue); + throw new SecurityTokenValidationException($"Tenant {issuerValue} is not registered"); + } + + await CreateOrUpdateUserAsync(context.AuthenticationTicket, userManager, tenant) + .ConfigureAwait(false); + } +} +``` + +> [!NOTE] +> See [SurveyAuthenticationEvents.cs]. +> +> + +This code does the following: + +1. Check if the tenant's issuer value is already in the database. If the tenant has not signed up, `FindByIssuerValueAsync` returns null. +2. If the user is signing up: + 1. Add the tenant to the database (`SignUpTenantAsync`). + 2. Add the authenticated user to the database (`CreateOrUpdateUserAsync`). +3. Otherwise complete the normal sign-in flow: + 1. If the tenant's issuer was not found in the database, it means the tenant is not registered, and the customer needs to sign up. In that case, throw an exception to cause the authentication to fail. + 2. Otherwise, create a database record for this user, if there isn't one already (`CreateOrUpdateUserAsync`). + +Here is the [SignUpTenantAsync] method that adds the tenant to the database. + +```csharp +private async Task SignUpTenantAsync(BaseControlContext context, TenantManager tenantManager) +{ + Guard.ArgumentNotNull(context, nameof(context)); + Guard.ArgumentNotNull(tenantManager, nameof(tenantManager)); + + var principal = context.AuthenticationTicket.Principal; + var issuerValue = principal.GetIssuerValue(); + var tenant = new Tenant + { + IssuerValue = issuerValue, + Created = DateTimeOffset.UtcNow + }; + + try + { + await tenantManager.CreateAsync(tenant) + .ConfigureAwait(false); + } + catch(Exception ex) + { + _logger.SignUpTenantFailed(principal.GetObjectIdentifierValue(), issuerValue, ex); + throw; + } + + return tenant; +} +``` + +Here is a summary of the entire sign-up flow in the Surveys application: + +1. The user clicks the **Sign Up** button. +2. The `AccountController.SignUp` action returns a challege result. The authentication state includes "signup" value. +3. In the `RedirectToAuthenticationEndpoint` event, add the `admin_consent` prompt. +4. The OpenID Connect middleware redirects to Azure AD and the user authenticates. +5. In the `AuthenticationValidated` event, look for the "signup" state. +6. Add the tenant to the database. + +[**Next**][app roles] + + +[app roles]: app-roles.md +[Tailspin]: tailspin.md + +[AccountController]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Web/Controllers/AccountController.cs +[state]: http://openid.net/specs/openid-connect-core-1_0.html#AuthRequest +[SurveyAuthenticationEvents.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Web/Security/SurveyAuthenticationEvents.cs +[BaseControlContextExtensions.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Web/Security/BaseControlContextExtensions.cs +[Authentication]: authenticate.md +[SignUpTenantAsync]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Web/Security/SurveyAuthenticationEvents.cs +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps diff --git a/docs/multitenant-identity/tailspin.md b/docs/multitenant-identity/tailspin.md new file mode 100644 index 00000000000..babe096b928 --- /dev/null +++ b/docs/multitenant-identity/tailspin.md @@ -0,0 +1,78 @@ +--- +title: About the Tailspin Surveys application +description: Tailspin Surveys application overview +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 5f8280c1-1670-46e9-a1ba-a2de2560970e +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 05/23/2016 +ms.author: mwasson +pnp.series.prev: index +pnp.series.next: authenticate +--- +# The Tailspin scenario + +[![GitHub](../_images/github.png) Sample code][sample application] + +Tailspin is a fictitious company that is developing a SaaS application named Surveys. This application enables organizations to create and publish online surveys. + +* An organization can sign up for the application. +* After the organization is signed up, users can sign into the application with their organizational credentials. +* Users can create, edit, and publish surveys. + +> [!NOTE] +> To get started with the application, see [Running the Surveys application]. +> +> + +## Users can create, edit, and view surveys +An authenticated user can view all the surveys that he or she has created or has contributor rights to, and create new surveys. Notice that the user is signed in with his organizational identity, `bob@contoso.com`. + +![Surveys app](./images/surveys-screenshot.png) + +This screenshot shows the Edit Survey page: + +![Edit survey](./images/edit-survey.png) + +Users can also view any surveys created by other users within the same tenant. + +![Tenant surveys](./images/tenant-surveys.png) + +## Survey owners can invite contributors +When a user creates a survey, he or she can invite other people to be contributors on the survey. Contributors can edit the survey, but cannot delete or publish it. + +![Add contributor](./images/add-contributor.png) + +A user can add contributors from other tenants, which enables cross-tenant sharing of resources. In this screenshot, Bob (`bob@contoso.com`) is adding Alice (`alice@fabrikam.com`) as a contributor to a survey that Bob created. + +When Alice logs in, she sees the survey listed under "Surveys I can contribute to". + +![Survey contributor](./images/contributor.png) + +Note that Alice signs into her own tenant, not as a guest of the Contoso tenant. Alice has contributor permissions only for that survey — she cannot view other surveys from the Contoso tenant. + +## Architecture +The Surveys application consists of a web front end and a web API backend. Both are implemented using [ASP.NET Core 1.0]. + +The web application uses Azure Active Directory (Azure AD) to authenticate users. The web application also calls Azure AD to get OAuth 2 access tokens for the Web API. Access tokens are cached in Azure Redis Cache. The cache enables multiple instances to share the same token cache (e.g., in a server farm). + +![Architecture](./images/architecture.png) + +[**Next**][authentication] + + + +[authentication]: authenticate.md + +[Running the Surveys application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/docs/running-the-app.md +[ASP.NET Core 1.0]: https://docs.asp.net/en/latest/ +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps diff --git a/docs/multitenant-identity/toc.md b/docs/multitenant-identity/toc.md new file mode 100644 index 00000000000..c220ae20235 --- /dev/null +++ b/docs/multitenant-identity/toc.md @@ -0,0 +1,14 @@ +# Manage Identity in Multitenant Applications +## [Introduction](./index.md) +## [The Tailspin scenario](./tailspin.md) +## [Authentication](./authenticate.md) +## [Claims-based identity](./claims.md) +## [Tenant sign-up](./signup.md) +## [Application roles](./app-roles.md) +## [Authorization](./authorize.md) +## [Secure a web API](./web-api.md) +## [Cache access tokens](./token-cache.md) +## [Client assertion](./client-assertion.md) +## [Protect application secrets](./key-vault.md) +## [Federate with a customer's AD FS](./adfs.md) + \ No newline at end of file diff --git a/docs/multitenant-identity/token-cache.md b/docs/multitenant-identity/token-cache.md new file mode 100644 index 00000000000..9a75ec604fe --- /dev/null +++ b/docs/multitenant-identity/token-cache.md @@ -0,0 +1,163 @@ +--- +title: Cache acess tokens in a multitenant application +description: Caching access tokens used for invoking a backend Web API +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 6ab9a772-d312-4485-bcc8-ae3bd33fc334 +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 02/16/2016 +ms.author: mwasson +pnp.series.prev: web-api +pnp.series.next: adfs +--- +# Cache access tokens + +[![GitHub](../_images/github.png) Sample code][sample application] + +It's relatively expensive to get an OAuth access token, because it requires an HTTP request to the token endpoint. Therefore, it's good to cache tokens whenever possible. The [Azure AD Authentication Library][ADAL] (ADAL) automatically caches tokens obtained from Azure AD, including refresh tokens. + +ADAL provides a default token cache implementation. However, this token cache is intended for native client apps, and is *not* suitable for web apps: + +* It is a static instance, and not thread safe. +* It doesn't scale to large numbers of users, because tokens from all users go into the same dictionary. +* It can't be shared across web servers in a farm. + +Instead, you should implement a custom token cache that derives from the ADAL `TokenCache` class but is suitable for a server environment and provides the desirable level of isolation between tokens for different users. + +The `TokenCache` class stores a dictionary of tokens, indexed by issuer, resource, client ID, and user. A custom token cache should write this dictionary to a backing store, such as a Redis cache. + +In the Tailspin Surveys application, the `DistributedTokenCache` class implements the token cache. This implementation uses the [IDistributedCache][distributed-cache] abstraction from ASP.NET Core 1.0. That way, any `IDistributedCache` implementation can be used as a backing store. + +* By default, the Surveys app uses a Redis cache. +* For a single-instance web server, you could use the ASP.NET Core 1.0 [in-memory cache][in-memory-cache]. (This is also a good option for running the app locally during development.) + +> [!NOTE] +> Currently the Redis cache is not supported for .NET Core. +> +> + +`DistributedTokenCache` stores the cache data as key/value pairs in the backing store. The key is the user ID plus client ID, so the backing store holds separate cache data for each unique combination of user/client. + +![Token cache](./images/token-cache.png) + +The backing store is partitioned by user. For each HTTP request, the tokens for that user are read from the backing store and loaded into the `TokenCache` dictionary. If Redis is used as the backing store, every server instance in a server farm reads/writes to the same cache, and this approach scales to many users. + +## Encrypting cached tokens +Tokens are sensitive data, because they grant access to a user's resources. (Moreover, unlike a user's password, you can't just store a hash of the token.) Therefore, it's critical to protect tokens from being compromised. The Redis-backed cache is protected by a password, but if someone obtains the password, they could get all of the cached access tokens. For that reason, the `DistributedTokenCache` encrypts everything that it writes to the backing store. Encryption is done using the ASP.NET Core 1.0 [data protection][data-protection] APIs. + +> [!NOTE] +> If you deploy to Azure Web Sites, the encryption keys are backed up to network storage and synchronized across all machines (see [Key Management][key-management]). By default, keys are not encrypted when running in Azure Web Sites, but you can [enable encryption using an X.509 certificate][x509-cert-encryption]. +> +> + +## DistributedTokenCache implementation +The [DistributedTokenCache][DistributedTokenCache] class derives from the ADAL [TokenCache][tokencache-class] class. + +In the constructor, the `DistributedTokenCache` class creates a key for the current user and loads the cache from the backing store: + +```csharp +public DistributedTokenCache( + ClaimsPrincipal claimsPrincipal, + IDistributedCache distributedCache, + ILoggerFactory loggerFactory, + IDataProtectionProvider dataProtectionProvider) + : base() +{ + _claimsPrincipal = claimsPrincipal; + _cacheKey = BuildCacheKey(_claimsPrincipal); + _distributedCache = distributedCache; + _logger = loggerFactory.CreateLogger(); + _protector = dataProtectionProvider.CreateProtector(typeof(DistributedTokenCache).FullName); + AfterAccess = AfterAccessNotification; + LoadFromCache(); +} +``` + +The key is created by concatenating the user ID and client ID. Both of these are taken from claims found in the user's `ClaimsPrincipal`: + +```csharp +private static string BuildCacheKey(ClaimsPrincipal claimsPrincipal) +{ + string clientId = claimsPrincipal.FindFirstValue("aud", true); + return string.Format( + "UserId:{0}::ClientId:{1}", + claimsPrincipal.GetObjectIdentifierValue(), + clientId); +} +``` + +To load the cache data, read the serialized blob from the backing store, and call `TokenCache.Deserialize` to convert the blob into cache data. + +```csharp +private void LoadFromCache() +{ + byte[] cacheData = _distributedCache.Get(_cacheKey); + if (cacheData != null) + { + this.Deserialize(_protector.Unprotect(cacheData)); + } +} +``` + +Whenever ADAL access the cache, it fires an `AfterAccess` event. If the cache data has changed, the `HasStateChanged` property is true. In that case, update the backing store to reflect the change, and then set `HasStateChanged` to false. + +```csharp +public void AfterAccessNotification(TokenCacheNotificationArgs args) +{ + if (this.HasStateChanged) + { + try + { + if (this.Count > 0) + { + _distributedCache.Set(_cacheKey, _protector.Protect(this.Serialize())); + } + else + { + // There are no tokens for this user/client, so remove the item from the cache. + _distributedCache.Remove(_cacheKey); + } + this.HasStateChanged = false; + } + catch (Exception exp) + { + _logger.WriteToCacheFailed(exp); + throw; + } + } +} +``` + +TokenCache sends two other events: + +* `BeforeWrite`. Called immediately before ADAL writes to the cache. You can use this to implement a concurrency strategy +* `BeforeAccess`. Called immediately before ADAL reads from the cache. Here you can reload the cache to get the latest version. + +In our case, we decided not to handle these two events. + +* For concurrency, last write wins. That's OK, because tokens are stored independently for each user + client, so a conflict would only happen if the same user had two concurrent login sessions. +* For reading, we load the cache on every request. Requests are short lived. If the cache gets modified in that time, the next request will pick up the new value. + +[**Next**][client-assertion] + + +[ADAL]: https://msdn.microsoft.com/library/azure/jj573266.aspx +[client-assertion]: ./client-assertion.md +[data-protection]: https://docs.asp.net/en/latest/security/data-protection/index.html +[distributed-cache]: https://docs.microsoft.com/aspnet/core/performance/caching/distributed +[DistributedTokenCache]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.TokenStorage/DistributedTokenCache.cs +[key-management]: https://docs.asp.net/en/latest/security/data-protection/configuration/default-settings.html +[in-memory-cache]: https://docs.microsoft.com/aspnet/core/performance/caching/memory +[tokencache-class]: https://msdn.microsoft.com/library/azure/microsoft.identitymodel.clients.activedirectory.tokencache.aspx +[x509-cert-encryption]: https://docs.asp.net/en/latest/security/data-protection/implementation/key-encryption-at-rest.html#x-509-certificate + +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps diff --git a/docs/multitenant-identity/web-api.md b/docs/multitenant-identity/web-api.md new file mode 100644 index 00000000000..12eae262225 --- /dev/null +++ b/docs/multitenant-identity/web-api.md @@ -0,0 +1,279 @@ +--- +title: Secure a backend web API in a multitenant application +description: How to secure a backend web API +services: '' +documentationcenter: na +author: MikeWasson +manager: roshar +editor: '' +tags: '' +pnp.series.title: Manage Identity in Multitenant Applications +ms.assetid: 9438e174-a57f-459c-9d83-98f6d34f6f75 +ms.service: guidance +ms.devlang: dotnet +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 06/02/2016 +ms.author: mwasson +pnp.series.prev: authorize +pnp.series.next: token-cache +--- +# Secure a backend web API + +[![GitHub](../_images/github.png) Sample code][sample application] + +The [Tailspin Surveys] application uses a backend web API to manage CRUD operations on surveys. For example, when a user clicks "My Surveys", the web application sends an HTTP request to the web API: + +``` +GET /users/{userId}/surveys +``` + +The web API returns a JSON object: + +``` +{ + "Published":[], + "Own":[ + {"Id":1,"Title":"Survey 1"}, + {"Id":3,"Title":"Survey 3"}, + ], + "Contribute": [{"Id":8,"Title":"My survey"}] +} +``` + +The web API does not allow anonymous requests, so the web app must authenticate itself using OAuth 2 bearer tokens. + +> [!NOTE] +> This is a server-to-server scenario. The application does not make any AJAX calls to the API from the browser client. +> +> + +There are two main approaches you can take: + +* Delegated user identity. The web application authenticates with the user's identity. +* Application identity. The web application authenticates with its client ID, using OAuth2 client credential flow. + +The Tailspin application implements delegated user identity. Here are the main differences: + +**Delegated user identity** + +* The bearer token sent to the web API contains the user identity. +* The web API makes authorization decisions based on the user identity. +* The web application needs to handle 403 (Forbidden) errors from the web API, if the user is not authorized to perform an action. +* Typically, the web application still makes some authorization decisions that affect UI, such as showing or hiding UI elements). +* The web API can potentially be used by untrusted clients, such as a JavaScript application or a native client application. + +**Application identity** + +* The web API does not get information about the user. +* The web API cannot perform any authorization based on the user identity. All authorization decisions are made by the web application. +* The web API cannot be used by an untrusted client (JavaScript or native client application). +* This approach may be somewhat simpler to implement, because there is no authorization logic in the Web API. + +In either approach, the web application must get an access token, which is the credential needed to call the web API. + +* For delegated user identity, the token has to come from the IDP, which can issue a token on behalf of the user. +* For client credentials, an application might get the token from the IDP or host its own token server. (But don't write a token server from scratch; use a well-tested framework like [IdentityServer3].) If you authenticate with Azure AD, it's strongly recommended to get the access token from Azure AD, even with client credential flow. + +The rest of this article assumes the application is authenticating with Azure AD. + +![Getting the access token](./images/access-token.png) + +## Register the web API in Azure AD +In order for Azure AD to issue a bearer token for the web API, you need to configure some things in Azure AD. + +1. [Register the web API in Azure AD]. +2. Add the client ID of the web app to the web API application manifest, in the `knownClientApplications` property. See [Update the application manifests]. +3. [Give the web application permission to call the web API]. + + In the Azure Management Portal, you can set two types of permissions: "Application Permissions" for application identity (client credential flow), or "Delegated Permissions" for delegated user identity. + + ![Delegated permissions](./images/delegated-permissions.png) + +## Getting an access token +Before calling the web API, the web application gets an access token from Azure AD. In a .NET application, use the [Azure AD Authentication Library (ADAL) for .NET][ADAL]. + +In the OAuth 2 authorization code flow, the application exchanges an authorization code for an access token. The following code uses ADAL to get the access token. This code is called during the `AuthorizationCodeReceived` event. + +```csharp +// The OpenID Connect middleware sends this event when it gets the authorization code. +public override async Task AuthorizationCodeReceived(AuthorizationCodeReceivedContext context) +{ + string authorizationCode = context.ProtocolMessage.Code; + string authority = "https://login.microsoftonline.com/" + tenantID + string resourceID = "https://tailspin.onmicrosoft.com/surveys.webapi" // App ID URI + ClientCredential credential = new ClientCredential(clientId, clientSecret); + + AuthenticationContext authContext = new AuthenticationContext(authority, tokenCache); + AuthenticationResult authResult = await authContext.AcquireTokenByAuthorizationCodeAsync( + authorizationCode, new Uri(redirectUri), credential, resourceID); + + // If successful, the token is in authResult.AccessToken +} +``` + +Here are the various parameters that are needed: + +* `authority`. Derived from the tenant ID of the signed in user. (Not the tenant ID of the SaaS provider) +* `authorizationCode`. the auth code that you got back from the IDP. +* `clientId`. The web application's client ID. +* `clientSecret`. The web application's client secret. +* `redirectUri`. The redirect URI that you set for OpenID connect. This is where the IDP calls back with the token. +* `resourceID`. The App ID URI of the web API, which you created when you registered the web API in Azure AD +* `tokenCache`. An object that caches the access tokens. See [Token caching]. + +If `AcquireTokenByAuthorizationCodeAsync` succeeds, ADAL caches the token. Later, you can get the token from the cache by calling AcquireTokenSilentAsync: + +```csharp +AuthenticationContext authContext = new AuthenticationContext(authority, tokenCache); +var result = await authContext.AcquireTokenSilentAsync(resourceID, credential, new UserIdentifier(userId, UserIdentifierType.UniqueId)); +``` + +where `userId` is the user's object ID, which is found in the `http://schemas.microsoft.com/identity/claims/objectidentifier` claim. + +## Using the access token to call the web API +Once you have the token, send it in the Authorization header of the HTTP requests to the web API. + +``` +Authorization: Bearer xxxxxxxxxx +``` + +The following extension method from the Surveys application sets the Authorization header on an HTTP request, using the **HttpClient** class. + +```csharp +public static async Task SendRequestWithBearerTokenAsync(this HttpClient httpClient, HttpMethod method, string path, object requestBody, string accessToken, CancellationToken ct) +{ + var request = new HttpRequestMessage(method, path); + if (requestBody != null) + { + var json = JsonConvert.SerializeObject(requestBody, Formatting.None); + var content = new StringContent(json, Encoding.UTF8, "application/json"); + request.Content = content; + } + + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", accessToken); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + + var response = await httpClient.SendAsync(request, ct); + return response; +} +``` + +> [!NOTE] +> See [HttpClientExtensions.cs]. +> +> + +## Authenticating in the web API +The web API has to authenticate the bearer token. In ASP.NET Core 1.0, you can use the [Microsoft.AspNet.Authentication.JwtBearer][JwtBearer] package. This package provides middleware that enables the application to receive OpenID Connect bearer tokens. + +Register the middleware in your web API `Startup` class. + +```csharp +app.UseJwtBearerAuthentication(options => +{ + options.Audience = "[app ID URI]"; + options.Authority = "https://login.microsoftonline.com/common/"; + options.TokenValidationParameters = new TokenValidationParameters + { + //Instead of validating against a fixed set of known issuers, we perform custom multi-tenant validation logic + ValidateIssuer = false, + }; + options.Events = new SurveysJwtBearerEvents(); +}); +``` + +> [!NOTE] +> See [Startup.cs]. +> +> + +* **Audience**. Set this to the App ID URL for the web API, which you created when you registered the web API with Azure AD. +* **Authority**. For a multitenant application, set this to `https://login.microsoftonline.com/common/`. +* **TokenValidationParameters**. For a multitenant application, set **ValidateIssuer** to false. That means the application will validate the issuer. +* **Events** is a class that derives from **JwtBearerEvents**. + +### Issuer validation +Validate the token issuer in the **JwtBearerEvents.ValidatedToken** event. The issuer is sent in the "iss" claim. + +In the Surveys application, the web API doesn't handle [tenant sign-up]. Therefore, it just checks if the issuer is already in the application database. If not, it throws an exception, which causes authentication to fail. + +```csharp +public override async Task ValidatedToken(ValidatedTokenContext context) +{ + var principal = context.AuthenticationTicket.Principal; + var tenantManager = context.HttpContext.RequestServices.GetService(); + var userManager = context.HttpContext.RequestServices.GetService(); + var issuerValue = principal.GetIssuerValue(); + var tenant = await tenantManager.FindByIssuerValueAsync(issuerValue); + + if (tenant == null) + { + // the caller was not from a trusted issuer - throw to block the authentication flow + throw new SecurityTokenValidationException(); + } +} +``` + +> [!NOTE] +> See [SurveysJwtBearerEvents.cs]. +> +> + +You can also use the **ValidatedToken** event to do [claims transformation]. Remember that the claims come directly from Azure AD, so if the web application did any claims transformations, those are not reflected in the bearer token that the web API receives. + +## Authorization +For a general discussion of authorization, see [Role-based and resource-based authorization][Authorization]. + +The JwtBearer middleware handles the authorization responses. For example, to restrict a controller action to authenticated users, use the **[Authorize]** atrribute and specify **JwtBearerDefaults.AuthenticationScheme** as the authentication scheme: + +```csharp +[Authorize(ActiveAuthenticationSchemes = JwtBearerDefaults.AuthenticationScheme)] +``` + +This returns a 401 status code if the user is not authenticated. + +To restrict a controller action by authorizaton policy, specify the policy name in the **[Authorize]** attribute: + +```csharp +[Authorize(Policy = PolicyNames.RequireSurveyCreator)] +``` + +This returns a 401 status code if the user is not authenticated, and 403 if the user is authenticated but not authorized. Register the policy on startup: + +```csharp +public void ConfigureServices(IServiceCollection services) +{ + services.AddAuthorization(options => + { + options.AddPolicy(PolicyNames.RequireSurveyCreator, + policy => + { + policy.AddRequirements(new SurveyCreatorRequirement()); + policy.AddAuthenticationSchemes(JwtBearerDefaults.AuthenticationScheme); + }); + }); +} +``` + +[**Next**][token cache] + + +[ADAL]: https://msdn.microsoft.com/library/azure/jj573266.aspx +[JwtBearer]: https://www.nuget.org/packages/Microsoft.AspNet.Authentication.JwtBearer + +[Tailspin Surveys]: tailspin.md +[IdentityServer3]: https://github.com/IdentityServer/IdentityServer3 +[Register the web API in Azure AD]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/docs/running-the-app.md#register-the-surveys-web-api +[Update the application manifests]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/docs/running-the-app.md#update-the-application-manifests +[Give the web application permission to call the web API]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/docs/running-the-app.md#give-the-web-app-permissions-to-call-the-web-api +[Token caching]: token-cache.md +[HttpClientExtensions.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.Common/HttpClientExtensions.cs +[Startup.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.WebAPI/Startup.cs +[tenant sign-up]: signup.md +[SurveysJwtBearerEvents.cs]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps/blob/master/src/Tailspin.Surveys.WebAPI/SurveyJwtBearerEvents.cs +[claims transformation]: claims.md#claims-transformations +[Authorization]: authorize.md +[sample application]: https://github.com/Azure-Samples/guidance-identity-management-for-multitenant-apps +[token cache]: token-cache.md diff --git a/docs/patterns/_categories.yml b/docs/patterns/_categories.yml new file mode 100644 index 00000000000..0c92b0e0482 --- /dev/null +++ b/docs/patterns/_categories.yml @@ -0,0 +1,35 @@ +--- +categories: + +- title: Availability + url: availability + description: Availability defines the proportion of time that the system is functional and working. It will be affected by system errors, infrastructure problems, malicious attacks, and system load. It is usually measured as a percentage of uptime. Cloud applications typically provide users with a service level agreement (SLA), which means that applications must be designed and implemented in a way that maximizes availability. + +- title: Data Management + url: data-management + description: Data management is the key element of cloud applications, and influences most of the quality attributes. Data is typically hosted in different locations and across multiple servers for reasons such as performance, scalability or availability, and this can present a range of challenges. For example, data consistency must be maintained, and data will typically need to be synchronized across different locations. + +- title: Design and Implementation + url: design-implementation + description: Good design encompasses factors such as consistency and coherence in component design and deployment, maintainability to simplify administration and development, and reusability to allow components and subsystems to be used in other applications and in other scenarios. Decisions made during the design and implementation phase have a huge impact on the quality and the total cost of ownership of cloud hosted applications and services. + +- title: Messaging + url: messaging + description: The distributed nature of cloud applications requires a messaging infrastructure that connects the components and services, ideally in a loosely coupled manner in order to maximize scalability. Asynchronous messaging is widely used, and provides many benefits, but also brings challenges such as the ordering of messages, poison message management, idempotency, and more. + +- title: Management and Monitoring + url: management-monitoring + description: Cloud applications run in in a remote datacenter where you do not have full control of the infrastructure or, in some cases, the operating system. This can make management and monitoring more difficult than an on-premises deployment. Applications must expose runtime information that administrators and operators can use to manage and monitor the system, as well as supporting changing business requirements and customization without requiring the application to be stopped or redeployed. + +- title: Performance and Scalability + url: performance-scalability + description: Performance is an indication of the responsiveness of a system to execute any action within a given time interval, while scalability is ability of a system either to handle increases in load without impact on performance or for the available resources to be readily increased. Cloud applications typically encounter variable workloads and peaks in activity. Predicting these, especially in a multi-tenant scenario, is almost impossible. Instead, applications should be able to scale out within limits to meet peaks in demand, and scale in when demand decreases. Scalability concerns not just compute instances, but other elements such as data storage, messaging infrastructure, and more. + +- title: Resiliency + url: resiliency + description: Resiliency is the ability of a system to gracefully handle and recover from failures. The nature of cloud hosting, where applications are often multi-tenant, use shared platform services, compete for resources and bandwidth, communicate over the Internet, and run on commodity hardware means there is an increased likelihood that both transient and more permanent faults will arise. Detecting failures, and recovering quickly and efficiently, is necessary to maintain resiliency. + +- title: Security + url: security + description: Security is the capability of a system to prevent malicious or accidental actions outside of the designed usage, and to prevent disclosure or loss of information. Cloud applications are exposed on the Internet outside trusted on-premises boundaries, are often open to the public, and may serve untrusted users. Applications must be designed and deployed in a way that protects them from malicious attacks, restricts access to only approved users, and protects sensitive data. +--- \ No newline at end of file diff --git a/docs/patterns/_images/cache-aside-diagram.png b/docs/patterns/_images/cache-aside-diagram.png new file mode 100644 index 00000000000..122d71196b6 Binary files /dev/null and b/docs/patterns/_images/cache-aside-diagram.png differ diff --git a/docs/patterns/_images/category-availability.png b/docs/patterns/_images/category-availability.png new file mode 100644 index 00000000000..7b95d926747 Binary files /dev/null and b/docs/patterns/_images/category-availability.png differ diff --git a/docs/patterns/_images/category-data-management.png b/docs/patterns/_images/category-data-management.png new file mode 100644 index 00000000000..101e79ea272 Binary files /dev/null and b/docs/patterns/_images/category-data-management.png differ diff --git a/docs/patterns/_images/category-design-implementation.png b/docs/patterns/_images/category-design-implementation.png new file mode 100644 index 00000000000..7f56ac98aef Binary files /dev/null and b/docs/patterns/_images/category-design-implementation.png differ diff --git a/docs/patterns/_images/category-management-monitoring.png b/docs/patterns/_images/category-management-monitoring.png new file mode 100644 index 00000000000..e12804fa2fb Binary files /dev/null and b/docs/patterns/_images/category-management-monitoring.png differ diff --git a/docs/patterns/_images/category-messaging.png b/docs/patterns/_images/category-messaging.png new file mode 100644 index 00000000000..10fb9164e84 Binary files /dev/null and b/docs/patterns/_images/category-messaging.png differ diff --git a/docs/patterns/_images/category-performance-scalability.png b/docs/patterns/_images/category-performance-scalability.png new file mode 100644 index 00000000000..1b88b144f74 Binary files /dev/null and b/docs/patterns/_images/category-performance-scalability.png differ diff --git a/docs/patterns/_images/category-resiliency.png b/docs/patterns/_images/category-resiliency.png new file mode 100644 index 00000000000..e7d8f73f770 Binary files /dev/null and b/docs/patterns/_images/category-resiliency.png differ diff --git a/docs/patterns/_images/category-security.png b/docs/patterns/_images/category-security.png new file mode 100644 index 00000000000..e862cd035b2 Binary files /dev/null and b/docs/patterns/_images/category-security.png differ diff --git a/docs/patterns/_images/category/availability.svg b/docs/patterns/_images/category/availability.svg new file mode 100644 index 00000000000..25624b707f4 --- /dev/null +++ b/docs/patterns/_images/category/availability.svg @@ -0,0 +1,85 @@ + + + + + Availability + + + + + + image/svg+xml + + Availability + + + patterns & practices + + + + + + + + + + + + + diff --git a/docs/patterns/_images/category/data-management.svg b/docs/patterns/_images/category/data-management.svg new file mode 100644 index 00000000000..a3d93521e4a --- /dev/null +++ b/docs/patterns/_images/category/data-management.svg @@ -0,0 +1,203 @@ + + + + + Data management + + + + + + image/svg+xml + + Data management + + + patterns & practices + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/patterns/_images/category/design-implementation.svg b/docs/patterns/_images/category/design-implementation.svg new file mode 100644 index 00000000000..1c666112805 --- /dev/null +++ b/docs/patterns/_images/category/design-implementation.svg @@ -0,0 +1,112 @@ + + + + + Design and implementation + + + + + + image/svg+xml + + Design and implementation + + + patterns & practices + + + + + + + + + + + + + + + + + + + + diff --git a/docs/patterns/_images/category/management-monitoring.svg b/docs/patterns/_images/category/management-monitoring.svg new file mode 100644 index 00000000000..bbead71bfec --- /dev/null +++ b/docs/patterns/_images/category/management-monitoring.svg @@ -0,0 +1,126 @@ + + + + + Management and monitoring + + + + + + image/svg+xml + + Management and monitoring + + + patterns & practices + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/patterns/_images/category/messaging.svg b/docs/patterns/_images/category/messaging.svg new file mode 100644 index 00000000000..02a7dc7148b --- /dev/null +++ b/docs/patterns/_images/category/messaging.svg @@ -0,0 +1,108 @@ + + + + + Messaging + + + + + + image/svg+xml + + Messaging + + + patterns & practices + + + + + + + + + + + + + + + + + + + + diff --git a/docs/patterns/_images/category/performance-scalability.svg b/docs/patterns/_images/category/performance-scalability.svg new file mode 100644 index 00000000000..9880d707292 --- /dev/null +++ b/docs/patterns/_images/category/performance-scalability.svg @@ -0,0 +1,79 @@ + + + + + Performance and scalabilities + + + + + + image/svg+xml + + Performance and scalabilities + + + patterns & practices + + + + + + + + + + diff --git a/docs/patterns/_images/category/resiliency.svg b/docs/patterns/_images/category/resiliency.svg new file mode 100644 index 00000000000..73335fa0e0c --- /dev/null +++ b/docs/patterns/_images/category/resiliency.svg @@ -0,0 +1,70 @@ + + + + + Resiliency + + + + + + image/svg+xml + + Resiliency + + + patterns & practices + + + + + + + + + diff --git a/docs/patterns/_images/category/security.svg b/docs/patterns/_images/category/security.svg new file mode 100644 index 00000000000..80b13ec288b --- /dev/null +++ b/docs/patterns/_images/category/security.svg @@ -0,0 +1,69 @@ + + + + + Security + + + + + + image/svg+xml + + Security + + + patterns & practices + + + + + + + + + diff --git a/docs/patterns/_images/circuit-breaker-diagram.png b/docs/patterns/_images/circuit-breaker-diagram.png new file mode 100644 index 00000000000..86057b5905f Binary files /dev/null and b/docs/patterns/_images/circuit-breaker-diagram.png differ diff --git a/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-basic.png b/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-basic.png new file mode 100644 index 00000000000..1786ef3dd63 Binary files /dev/null and b/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-basic.png differ diff --git a/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-separate-stores.png b/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-separate-stores.png new file mode 100644 index 00000000000..235fdd70007 Binary files /dev/null and b/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-separate-stores.png differ diff --git a/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-tradition-crud.png b/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-tradition-crud.png new file mode 100644 index 00000000000..7e7bba6843c Binary files /dev/null and b/docs/patterns/_images/command-and-query-responsibility-segregation-cqrs-tradition-crud.png differ diff --git a/docs/patterns/_images/compensating-transaction-diagram.png b/docs/patterns/_images/compensating-transaction-diagram.png new file mode 100644 index 00000000000..d08faa2c493 Binary files /dev/null and b/docs/patterns/_images/compensating-transaction-diagram.png differ diff --git a/docs/patterns/_images/competing-consumers-diagram.png b/docs/patterns/_images/competing-consumers-diagram.png new file mode 100644 index 00000000000..e6fbb3904f9 Binary files /dev/null and b/docs/patterns/_images/competing-consumers-diagram.png differ diff --git a/docs/patterns/_images/compute-resource-consolidation-diagram.png b/docs/patterns/_images/compute-resource-consolidation-diagram.png new file mode 100644 index 00000000000..efe97d49b08 Binary files /dev/null and b/docs/patterns/_images/compute-resource-consolidation-diagram.png differ diff --git a/docs/patterns/_images/compute-resource-consolidation-lifecycle.png b/docs/patterns/_images/compute-resource-consolidation-lifecycle.png new file mode 100644 index 00000000000..5e8ff1432e2 Binary files /dev/null and b/docs/patterns/_images/compute-resource-consolidation-lifecycle.png differ diff --git a/docs/patterns/_images/event-sourcing-bounded-context.png b/docs/patterns/_images/event-sourcing-bounded-context.png new file mode 100644 index 00000000000..c8aa42d4a16 Binary files /dev/null and b/docs/patterns/_images/event-sourcing-bounded-context.png differ diff --git a/docs/patterns/_images/event-sourcing-overview.png b/docs/patterns/_images/event-sourcing-overview.png new file mode 100644 index 00000000000..c4fbb8e6250 Binary files /dev/null and b/docs/patterns/_images/event-sourcing-overview.png differ diff --git a/docs/patterns/_images/external-configuration-store-overview.png b/docs/patterns/_images/external-configuration-store-overview.png new file mode 100644 index 00000000000..9758e5b9701 Binary files /dev/null and b/docs/patterns/_images/external-configuration-store-overview.png differ diff --git a/docs/patterns/_images/federated-identity-multitenat.png b/docs/patterns/_images/federated-identity-multitenat.png new file mode 100644 index 00000000000..51340a2f98e Binary files /dev/null and b/docs/patterns/_images/federated-identity-multitenat.png differ diff --git a/docs/patterns/_images/federated-identity-overview.png b/docs/patterns/_images/federated-identity-overview.png new file mode 100644 index 00000000000..9835bd9584e Binary files /dev/null and b/docs/patterns/_images/federated-identity-overview.png differ diff --git a/docs/patterns/_images/gatekeeper-diagram.png b/docs/patterns/_images/gatekeeper-diagram.png new file mode 100644 index 00000000000..97f34e44afd Binary files /dev/null and b/docs/patterns/_images/gatekeeper-diagram.png differ diff --git a/docs/patterns/_images/gatekeeper-endpoint.png b/docs/patterns/_images/gatekeeper-endpoint.png new file mode 100644 index 00000000000..7e8a9699c8d Binary files /dev/null and b/docs/patterns/_images/gatekeeper-endpoint.png differ diff --git a/docs/patterns/_images/health-endpoint-monitoring-pattern.png b/docs/patterns/_images/health-endpoint-monitoring-pattern.png new file mode 100644 index 00000000000..905baf9b8a7 Binary files /dev/null and b/docs/patterns/_images/health-endpoint-monitoring-pattern.png differ diff --git a/docs/patterns/_images/index-table-figure-1.png b/docs/patterns/_images/index-table-figure-1.png new file mode 100644 index 00000000000..7c4f7d350d7 Binary files /dev/null and b/docs/patterns/_images/index-table-figure-1.png differ diff --git a/docs/patterns/_images/index-table-figure-2.png b/docs/patterns/_images/index-table-figure-2.png new file mode 100644 index 00000000000..c308ea16a6b Binary files /dev/null and b/docs/patterns/_images/index-table-figure-2.png differ diff --git a/docs/patterns/_images/index-table-figure-3.png b/docs/patterns/_images/index-table-figure-3.png new file mode 100644 index 00000000000..4c3bc706e8d Binary files /dev/null and b/docs/patterns/_images/index-table-figure-3.png differ diff --git a/docs/patterns/_images/index-table-figure-4.png b/docs/patterns/_images/index-table-figure-4.png new file mode 100644 index 00000000000..7d080133bb0 Binary files /dev/null and b/docs/patterns/_images/index-table-figure-4.png differ diff --git a/docs/patterns/_images/index-table-figure-5.png b/docs/patterns/_images/index-table-figure-5.png new file mode 100644 index 00000000000..d931c2fdc27 Binary files /dev/null and b/docs/patterns/_images/index-table-figure-5.png differ diff --git a/docs/patterns/_images/index-table-figure-6.png b/docs/patterns/_images/index-table-figure-6.png new file mode 100644 index 00000000000..fbcd368d0ae Binary files /dev/null and b/docs/patterns/_images/index-table-figure-6.png differ diff --git a/docs/patterns/_images/index-table-figure-7.png b/docs/patterns/_images/index-table-figure-7.png new file mode 100644 index 00000000000..7436a798737 Binary files /dev/null and b/docs/patterns/_images/index-table-figure-7.png differ diff --git a/docs/patterns/_images/index-table-figure-8.png b/docs/patterns/_images/index-table-figure-8.png new file mode 100644 index 00000000000..15ff6e6f853 Binary files /dev/null and b/docs/patterns/_images/index-table-figure-8.png differ diff --git a/docs/patterns/_images/leader-election-diagram.png b/docs/patterns/_images/leader-election-diagram.png new file mode 100644 index 00000000000..6f17e504970 Binary files /dev/null and b/docs/patterns/_images/leader-election-diagram.png differ diff --git a/docs/patterns/_images/materialized-view-pattern-diagram.png b/docs/patterns/_images/materialized-view-pattern-diagram.png new file mode 100644 index 00000000000..77a04464af0 Binary files /dev/null and b/docs/patterns/_images/materialized-view-pattern-diagram.png differ diff --git a/docs/patterns/_images/materialized-view-summary-diagram.png b/docs/patterns/_images/materialized-view-summary-diagram.png new file mode 100644 index 00000000000..7c3f43a7e41 Binary files /dev/null and b/docs/patterns/_images/materialized-view-summary-diagram.png differ diff --git a/docs/patterns/_images/pipes-and-filters-load-balancing.png b/docs/patterns/_images/pipes-and-filters-load-balancing.png new file mode 100644 index 00000000000..29d06fcd9fe Binary files /dev/null and b/docs/patterns/_images/pipes-and-filters-load-balancing.png differ diff --git a/docs/patterns/_images/pipes-and-filters-message-queues.png b/docs/patterns/_images/pipes-and-filters-message-queues.png new file mode 100644 index 00000000000..e05ced5058b Binary files /dev/null and b/docs/patterns/_images/pipes-and-filters-message-queues.png differ diff --git a/docs/patterns/_images/pipes-and-filters-modules.png b/docs/patterns/_images/pipes-and-filters-modules.png new file mode 100644 index 00000000000..7751ce0efa0 Binary files /dev/null and b/docs/patterns/_images/pipes-and-filters-modules.png differ diff --git a/docs/patterns/_images/pipes-and-filters-solution.png b/docs/patterns/_images/pipes-and-filters-solution.png new file mode 100644 index 00000000000..f719acdf897 Binary files /dev/null and b/docs/patterns/_images/pipes-and-filters-solution.png differ diff --git a/docs/patterns/_images/pnp-logo.svg b/docs/patterns/_images/pnp-logo.svg new file mode 100644 index 00000000000..900524937c3 --- /dev/null +++ b/docs/patterns/_images/pnp-logo.svg @@ -0,0 +1,346 @@ + + + + + patterns & practices + + + + + + image/svg+xml + + patterns & practices + http://aka.ms/practices + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/patterns/_images/priority-queue-pattern.png b/docs/patterns/_images/priority-queue-pattern.png new file mode 100644 index 00000000000..99abcd57c94 Binary files /dev/null and b/docs/patterns/_images/priority-queue-pattern.png differ diff --git a/docs/patterns/_images/priority-queue-separate.png b/docs/patterns/_images/priority-queue-separate.png new file mode 100644 index 00000000000..b2d18214305 Binary files /dev/null and b/docs/patterns/_images/priority-queue-separate.png differ diff --git a/docs/patterns/_images/priority-queue-service-bus.png b/docs/patterns/_images/priority-queue-service-bus.png new file mode 100644 index 00000000000..d4a1721b9ea Binary files /dev/null and b/docs/patterns/_images/priority-queue-service-bus.png differ diff --git a/docs/patterns/_images/queue-based-load-leveling-overwhelmed.png b/docs/patterns/_images/queue-based-load-leveling-overwhelmed.png new file mode 100644 index 00000000000..ec7fcc5bb40 Binary files /dev/null and b/docs/patterns/_images/queue-based-load-leveling-overwhelmed.png differ diff --git a/docs/patterns/_images/queue-based-load-leveling-pattern.png b/docs/patterns/_images/queue-based-load-leveling-pattern.png new file mode 100644 index 00000000000..b161a0924ee Binary files /dev/null and b/docs/patterns/_images/queue-based-load-leveling-pattern.png differ diff --git a/docs/patterns/_images/queue-based-load-leveling-worker-role.png b/docs/patterns/_images/queue-based-load-leveling-worker-role.png new file mode 100644 index 00000000000..6adc792a27f Binary files /dev/null and b/docs/patterns/_images/queue-based-load-leveling-worker-role.png differ diff --git a/docs/patterns/_images/retry-pattern.png b/docs/patterns/_images/retry-pattern.png new file mode 100644 index 00000000000..0f177ee2442 Binary files /dev/null and b/docs/patterns/_images/retry-pattern.png differ diff --git a/docs/patterns/_images/runtime-reconfiguration-pattern.png b/docs/patterns/_images/runtime-reconfiguration-pattern.png new file mode 100644 index 00000000000..ee1ec8e4b36 Binary files /dev/null and b/docs/patterns/_images/runtime-reconfiguration-pattern.png differ diff --git a/docs/patterns/_images/scheduler-agent-supervisor-pattern.png b/docs/patterns/_images/scheduler-agent-supervisor-pattern.png new file mode 100644 index 00000000000..405283ae44d Binary files /dev/null and b/docs/patterns/_images/scheduler-agent-supervisor-pattern.png differ diff --git a/docs/patterns/_images/scheduler-agent-supervisor-solution.png b/docs/patterns/_images/scheduler-agent-supervisor-solution.png new file mode 100644 index 00000000000..d9751aa5275 Binary files /dev/null and b/docs/patterns/_images/scheduler-agent-supervisor-solution.png differ diff --git a/docs/patterns/_images/sharding-data-hash.png b/docs/patterns/_images/sharding-data-hash.png new file mode 100644 index 00000000000..64ed295916a Binary files /dev/null and b/docs/patterns/_images/sharding-data-hash.png differ diff --git a/docs/patterns/_images/sharding-sequential-sets.png b/docs/patterns/_images/sharding-sequential-sets.png new file mode 100644 index 00000000000..5b67c0fed96 Binary files /dev/null and b/docs/patterns/_images/sharding-sequential-sets.png differ diff --git a/docs/patterns/_images/sharding-tenant.png b/docs/patterns/_images/sharding-tenant.png new file mode 100644 index 00000000000..f8665df0b12 Binary files /dev/null and b/docs/patterns/_images/sharding-tenant.png differ diff --git a/docs/patterns/_images/static-content-hosting-pattern.png b/docs/patterns/_images/static-content-hosting-pattern.png new file mode 100644 index 00000000000..e11955bd664 Binary files /dev/null and b/docs/patterns/_images/static-content-hosting-pattern.png differ diff --git a/docs/patterns/_images/throttling-autoscaling.png b/docs/patterns/_images/throttling-autoscaling.png new file mode 100644 index 00000000000..7d2da6d6c57 Binary files /dev/null and b/docs/patterns/_images/throttling-autoscaling.png differ diff --git a/docs/patterns/_images/throttling-multi-tenant.png b/docs/patterns/_images/throttling-multi-tenant.png new file mode 100644 index 00000000000..e0ee2b1372e Binary files /dev/null and b/docs/patterns/_images/throttling-multi-tenant.png differ diff --git a/docs/patterns/_images/throttling-resource-utilization.png b/docs/patterns/_images/throttling-resource-utilization.png new file mode 100644 index 00000000000..9184f51bad8 Binary files /dev/null and b/docs/patterns/_images/throttling-resource-utilization.png differ diff --git a/docs/patterns/_images/valet-key-pattern.png b/docs/patterns/_images/valet-key-pattern.png new file mode 100644 index 00000000000..e4d077b389d Binary files /dev/null and b/docs/patterns/_images/valet-key-pattern.png differ diff --git a/docs/patterns/cache-aside.md b/docs/patterns/cache-aside.md new file mode 100644 index 00000000000..2dc2cf57c9a --- /dev/null +++ b/docs/patterns/cache-aside.md @@ -0,0 +1,160 @@ +--- +title: Cache-Aside +description: Load data on demand into a cache from a data store +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [data-management, performance-scalability] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Cache-Aside + +[!INCLUDE [header](../_includes/header.md)] + +Load data on demand into a cache from a data store. This can improve performance and also helps to maintain consistency between data held in the cache and data in the underlying data store. + +## Context and problem + +Applications use a cache to improve repeated access to information held in a data store. However, it's impractical to expect that cached data will always be completely consistent with the data in the data store. Applications should implement a strategy that helps to ensure that the data in the cache is as up-to-date as possible, but can also detect and handle situations that arise when the data in the cache has become stale. + +## Solution + +Many commercial caching systems provide read-through and write-through/write-behind operations. In these systems, an application retrieves data by referencing the cache. If the data isn't in the cache, it's retrieved from the data store and added to the cache. Any modifications to data held in the cache are automatically written back to the data store as well. + +For caches that don't provide this functionality, it's the responsibility of the applications that use the cache to maintain the data. + +An application can emulate the functionality of read-through caching by implementing the cache-aside strategy. This strategy loads data into the cache on demand. The figure illustrates using the Cache-Aside pattern to store data in the cache. + +![Using the Cache-Aside pattern to store data in the cache](./_images/cache-aside-diagram.png) + + +If an application updates information, it can follow the write-through strategy by making the modification to the data store, and by invalidating the corresponding item in the cache. + +When the item is next required, using the cache-aside strategy will cause the updated data to be retrieved from the data store and added back into the cache. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +**Lifetime of cached data**. Many caches implement an expiration policy that invalidates data and removes it from the cache if it's not accessed for a specified period. For cache-aside to be effective, ensure that the expiration policy matches the pattern of access for applications that use the data. Don't make the expiration period too short because this can cause applications to continually retrieve data from the data store and add it to the cache. Similarly, don't make the expiration period so long that the cached data is likely to become stale. Remember that caching is most effective for relatively static data, or data that is read frequently. + +**Evicting data**. Most caches have a limited size compared to the data store where the data originates, and they'll evict data if necessary. Most caches adopt a least-recently-used policy for selecting items to evict, but this might be customizable. Configure the global expiration property and other properties of the cache, and the expiration property of each cached item, to ensure that the cache is cost effective. It isn't always appropriate to apply a global eviction policy to every item in the cache. For example, if a cached item is very expensive to retrieve from the data store, it can be beneficial to keep this item in the cache at the expense of more frequently accessed but less costly items. + +**Priming the cache**. Many solutions prepopulate the cache with the data that an application is likely to need as part of the startup processing. The Cache-Aside pattern can still be useful if some of this data expires or is evicted. + +**Consistency**. Implementing the Cache-Aside pattern doesn't guarantee consistency between the data store and the cache. An item in the data store can be changed at any time by an external process, and this change might not be reflected in the cache until the next time the item is loaded. In a system that replicates data across data stores, this problem can become serious if synchronization occurs frequently. + +**Local (in-memory) caching**. A cache could be local to an application instance and stored in-memory. Cache-aside can be useful in this environment if an application repeatedly accesses the same data. However, a local cache is private and so different application instances could each have a copy of the same cached data. This data could quickly become inconsistent between caches, so it might be necessary to expire data held in a private cache and refresh it more frequently. In these scenarios, consider investigating the use of a shared or a distributed caching mechanism. + +## When to use this pattern + +Use this pattern when: + +- A cache doesn't provide native read-through and write-through operations. +- Resource demand is unpredictable. This pattern enables applications to load data on demand. It makes no assumptions about which data an application will require in advance. + +This pattern might not be suitable: + +- When the cached data set is static. If the data will fit into the available cache space, prime the cache with the data on startup and apply a policy that prevents the data from expiring. +- For caching session state information in a web application hosted in a web farm. In this environment, you should avoid introducing dependencies based on client-server affinity. + +## Example + +In Microsoft Azure you can use Azure Cache to create a distributed cache that can be shared by multiple instances of an application. The `GetMyEntityAsync` method in the following code example shows an implementation of the Cache-Aside pattern based on Azure Cache. This method retrieves an object from the cache using the read-though approach. + +An object is identified by using an integer ID as the key. The `GetMyEntityAsync` method generates a string value based on this key (the Azure Cache API uses strings for key values) and tries to retrieve an item with this key from the cache. If a matching item is found, it's returned. If there's no match in the cache, the `GetMyEntityAsync` method retrieves the object from a data store, adds it to the cache, and then returns it. The code that actually retrieves the data from the data store has been omitted because it is data store dependent. Note that the cached item is configured to expire in order to prevent it from becoming stale if it's updated elsewhere. + +```csharp +private DataCache cache; + +public async Task GetMyEntityAsync(int id) +{ + // Define a unique key for this method and its parameters. + var key = string.Format("StoreWithCache_GetAsync_{0}", id); + var expiration = TimeSpan.FromMinutes(3); + bool cacheException = false; + + try + { + // Try to get the entity from the cache. + var cacheItem = cache.GetCacheItem(key); + if (cacheItem != null) + { + return cacheItem.Value as MyEntity; + } + } + catch (DataCacheException) + { + // If there's a cache related issue, raise an exception + // and avoid using the cache for the rest of the call. + cacheException = true; + } + + // If there's a cache miss, get the entity from the original store and cache it. + // Code has been omitted because it's data store dependent. + var entity = ...; + + if (!cacheException) + { + try + { + // Avoid caching a null value. + if (entity != null) + { + // Put the item in the cache with a custom expiration time that + // depends on how critical it is to have stale data. + cache.Put(key, entity, timeout: expiration); + } + } + catch (DataCacheException) + { + // If there's a cache related issue, ignore it + // and just return the entity. + } + } + + return entity; +} +``` + +> The examples use the Azure Cache API to access the store and retrieve information from the cache. For more information, see [Using Microsoft Azure Cache](https://msdn.microsoft.com/library/azure/hh914165.aspx). + +The `UpdateEntityAsync` method shown below demonstrates how to invalidate an object in the cache when the value is changed by the application. This is an example of a write-through approach. The code updates the original data store and then removes the cached item from the cache by calling the `Remove` method, specifying the key (the code has been omitted because it is data store dependent). + +> The order of the steps in this sequence is important. If the item is removed before the cache is updated, the client application has a short period of time to fetch the data (because it isn't found in the cache) before the item in the data store has been changed, resulting in the cache containing stale data. + +```csharp +public async Task UpdateEntityAsync(MyEntity entity) +{ + // Update the object in the original data store + await this.store.UpdateEntityAsync(entity).ConfigureAwait(false); + + // Get the correct key for the cached object. + var key = this.GetAsyncCacheKey(entity.Id); + + // Then, invalidate the current cache object + this.cache.Remove(key); +} + +private string GetAsyncCacheKey(int objectId) +{ + return string.Format("StoreWithCache_GetAsync_{0}", objectId); +} +``` + +## Related guidance + +The following information may be relevant when implementing this pattern: + +- [Caching Guidance](https://msdn.microsoft.com/library/dn589802.aspx). Provides additional information on how you can cache data in a cloud solution, and the issues that you should consider when you implement a cache. + +- [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). Cloud applications typically use data that's spread across data stores. Managing and maintaining data consistency in this environment is a critical aspect of the system, particularly the concurrency and availability issues that can arise. This primer describes issues about consistency across distributed data, and summarizes how an application can implement eventual consistency to maintain the availability of data. diff --git a/docs/patterns/category/availability.md b/docs/patterns/category/availability.md new file mode 100644 index 00000000000..f8506f6351d --- /dev/null +++ b/docs/patterns/category/availability.md @@ -0,0 +1,26 @@ +--- +title: Availability patterns +description: Availability defines the proportion of time that the system is functional and working. It will be affected by system errors, infrastructure problems, malicious attacks, and system load. It is usually measured as a percentage of uptime. Cloud applications typically provide users with a service level agreement (SLA), which means that applications must be designed and implemented in a way that maximizes availability. +keywords: design pattern +author: dragon119 +manager: bennage + +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance + +pnp.series.title: Cloud Design Patterns +--- + +# Availability patterns + +[!INCLUDE [header](../../_includes/header.md)] + +Availability defines the proportion of time that the system is functional and working. It will be affected by system errors, infrastructure problems, malicious attacks, and system load. It is usually measured as a percentage of uptime. Cloud applications typically provide users with a service level agreement (SLA), which means that applications must be designed and implemented in a way that maximizes availability. + +| Pattern | Summary | +| ------- | ------- | +| [Health Endpoint Monitoring](../health-endpoint-monitoring.md) | Implement functional checks in an application that external tools can access through exposed endpoints at regular intervals. | +| [Queue-Based Load Leveling](../queue-based-load-leveling.md) | Use a queue that acts as a buffer between a task and a service that it invokes in order to smooth intermittent heavy loads. | +| [Throttling](../throttling.md) | Control the consumption of resources used by an instance of an application, an individual tenant, or an entire service. | \ No newline at end of file diff --git a/docs/patterns/category/data-management.md b/docs/patterns/category/data-management.md new file mode 100644 index 00000000000..2005b6e8795 --- /dev/null +++ b/docs/patterns/category/data-management.md @@ -0,0 +1,31 @@ +--- +title: Data Management patterns +description: Data management is the key element of cloud applications, and influences most of the quality attributes. Data is typically hosted in different locations and across multiple servers for reasons such as performance, scalability or availability, and this can present a range of challenges. For example, data consistency must be maintained, and data will typically need to be synchronized across different locations. +keywords: design pattern +author: dragon119 +manager: bennage + +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance + +pnp.series.title: Cloud Design Patterns +--- + +# Data Management patterns + +[!INCLUDE [header](../../_includes/header.md)] + +Data management is the key element of cloud applications, and influences most of the quality attributes. Data is typically hosted in different locations and across multiple servers for reasons such as performance, scalability or availability, and this can present a range of challenges. For example, data consistency must be maintained, and data will typically need to be synchronized across different locations. + +| Pattern | Summary | +| ------- | ------- | +| [Cache-Aside](../cache-aside.md) | Load data on demand into a cache from a data store | +| [CQRS](../cqrs.md) | Segregate operations that read data from operations that update data by using separate interfaces. | +| [Event Sourcing](../event-sourcing.md) | Use an append-only store to record the full series of events that describe actions taken on data in a domain. | +| [Index Table](../index-table.md) | Create indexes over the fields in data stores that are frequently referenced by queries. | +| [Materialized View](../materialized-view.md) | Generate prepopulated views over the data in one or more data stores when the data isn't ideally formatted for required query operations. | +| [Sharding](../sharding.md) | Divide a data store into a set of horizontal partitions or shards. | +| [Static Content Hosting](../static-content-hosting.md) | Deploy static content to a cloud-based storage service that can deliver them directly to the client. | +| [Valet Key](../valet-key.md) | Use a token or key that provides clients with restricted direct access to a specific resource or service. | \ No newline at end of file diff --git a/docs/patterns/category/design-implementation.md b/docs/patterns/category/design-implementation.md new file mode 100644 index 00000000000..dd5bb1ce2ad --- /dev/null +++ b/docs/patterns/category/design-implementation.md @@ -0,0 +1,30 @@ +--- +title: Design and Implementation patterns +description: Good design encompasses factors such as consistency and coherence in component design and deployment, maintainability to simplify administration and development, and reusability to allow components and subsystems to be used in other applications and in other scenarios. Decisions made during the design and implementation phase have a huge impact on the quality and the total cost of ownership of cloud hosted applications and services. +keywords: design pattern +author: dragon119 +manager: bennage + +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance + +pnp.series.title: Cloud Design Patterns +--- + +# Design and Implementation patterns + +[!INCLUDE [header](../../_includes/header.md)] + +Good design encompasses factors such as consistency and coherence in component design and deployment, maintainability to simplify administration and development, and reusability to allow components and subsystems to be used in other applications and in other scenarios. Decisions made during the design and implementation phase have a huge impact on the quality and the total cost of ownership of cloud hosted applications and services. + +| Pattern | Summary | +| ------- | ------- | +| [CQRS](../cqrs.md) | Segregate operations that read data from operations that update data by using separate interfaces. | +| [Compute Resource Consolidation](../compute-resource-consolidation.md) | Consolidate multiple tasks or operations into a single computational unit | +| [External Configuration Store](../external-configuration-store.md) | Move configuration information out of the application deployment package to a centralized location. | +| [Leader Election](../leader-election.md) | Coordinate the actions performed by a collection of collaborating task instances in a distributed application by electing one instance as the leader that assumes responsibility for managing the other instances. | +| [Pipes and Filters](../pipes-and-filters.md) | Break down a task that performs complex processing into a series of separate elements that can be reused. | +| [Runtime Reconfiguration](../runtime-reconfiguration.md) | Design an application so that it can be reconfigured without requiring redeployment or restarting the application. | +| [Static Content Hosting](../static-content-hosting.md) | Deploy static content to a cloud-based storage service that can deliver them directly to the client. | \ No newline at end of file diff --git a/docs/patterns/category/index.md b/docs/patterns/category/index.md new file mode 100644 index 00000000000..42b519f933c --- /dev/null +++ b/docs/patterns/category/index.md @@ -0,0 +1,3 @@ +--- +redirect_url: /azure/patterns/category/availability +--- diff --git a/docs/patterns/category/management-monitoring.md b/docs/patterns/category/management-monitoring.md new file mode 100644 index 00000000000..fd3b771056b --- /dev/null +++ b/docs/patterns/category/management-monitoring.md @@ -0,0 +1,25 @@ +--- +title: Management and Monitoring patterns +description: Cloud applications run in in a remote datacenter where you do not have full control of the infrastructure or, in some cases, the operating system. This can make management and monitoring more difficult than an on-premises deployment. Applications must expose runtime information that administrators and operators can use to manage and monitor the system, as well as supporting changing business requirements and customization without requiring the application to be stopped or redeployed. +keywords: design pattern +author: dragon119 +manager: bennage +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance + +pnp.series.title: Cloud Design Patterns +--- + +# Management and Monitoring patterns + +[!INCLUDE [header](../../_includes/header.md)] + +Cloud applications run in in a remote datacenter where you do not have full control of the infrastructure or, in some cases, the operating system. This can make management and monitoring more difficult than an on-premises deployment. Applications must expose runtime information that administrators and operators can use to manage and monitor the system, as well as supporting changing business requirements and customization without requiring the application to be stopped or redeployed. + +| Pattern | Summary | +| ------- | ------- | +| [External Configuration Store](../external-configuration-store.md) | Move configuration information out of the application deployment package to a centralized location. | +| [Health Endpoint Monitoring](../health-endpoint-monitoring.md) | Implement functional checks in an application that external tools can access through exposed endpoints at regular intervals. | +| [Runtime Reconfiguration](../runtime-reconfiguration.md) | Design an application so that it can be reconfigured without requiring redeployment or restarting the application. | \ No newline at end of file diff --git a/docs/patterns/category/messaging.md b/docs/patterns/category/messaging.md new file mode 100644 index 00000000000..a209b3ec620 --- /dev/null +++ b/docs/patterns/category/messaging.md @@ -0,0 +1,28 @@ +--- +title: Messaging patterns +description: The distributed nature of cloud applications requires a messaging infrastructure that connects the components and services, ideally in a loosely coupled manner in order to maximize scalability. Asynchronous messaging is widely used, and provides many benefits, but also brings challenges such as the ordering of messages, poison message management, idempotency, and more. +keywords: design pattern +author: dragon119 +manager: bennage + +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance + +pnp.series.title: Cloud Design Patterns +--- + +# Messaging patterns + +[!INCLUDE [header](../../_includes/header.md)] + +The distributed nature of cloud applications requires a messaging infrastructure that connects the components and services, ideally in a loosely coupled manner in order to maximize scalability. Asynchronous messaging is widely used, and provides many benefits, but also brings challenges such as the ordering of messages, poison message management, idempotency, and more. + +| Pattern | Summary | +| ------- | ------- | +| [Competing Consumers](../competing-consumers.md) | Enable multiple concurrent consumers to process messages received on the same messaging channel. | +| [Pipes and Filters](../pipes-and-filters.md) | Break down a task that performs complex processing into a series of separate elements that can be reused. | +| [Priority Queue](../priority-queue.md) | Prioritize requests sent to services so that requests with a higher priority are received and processed more quickly than those with a lower priority. | +| [Queue-Based Load Leveling](../queue-based-load-leveling.md) | Use a queue that acts as a buffer between a task and a service that it invokes in order to smooth intermittent heavy loads. | +| [Scheduler Agent Supervisor](../scheduler-agent-supervisor.md) | Coordinate a set of actions across a distributed set of services and other remote resources. | \ No newline at end of file diff --git a/docs/patterns/category/performance-scalability.md b/docs/patterns/category/performance-scalability.md new file mode 100644 index 00000000000..e39824eae49 --- /dev/null +++ b/docs/patterns/category/performance-scalability.md @@ -0,0 +1,33 @@ +--- +title: Performance and Scalability patterns +description: Performance is an indication of the responsiveness of a system to execute any action within a given time interval, while scalability is ability of a system either to handle increases in load without impact on performance or for the available resources to be readily increased. Cloud applications typically encounter variable workloads and peaks in activity. Predicting these, especially in a multi-tenant scenario, is almost impossible. Instead, applications should be able to scale out within limits to meet peaks in demand, and scale in when demand decreases. Scalability concerns not just compute instances, but other elements such as data storage, messaging infrastructure, and more. +keywords: design pattern +author: dragon119 +manager: bennage + +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance + +pnp.series.title: Cloud Design Patterns +--- + +# Performance and Scalability patterns + +[!INCLUDE [header](../../_includes/header.md)] + +Performance is an indication of the responsiveness of a system to execute any action within a given time interval, while scalability is ability of a system either to handle increases in load without impact on performance or for the available resources to be readily increased. Cloud applications typically encounter variable workloads and peaks in activity. Predicting these, especially in a multi-tenant scenario, is almost impossible. Instead, applications should be able to scale out within limits to meet peaks in demand, and scale in when demand decreases. Scalability concerns not just compute instances, but other elements such as data storage, messaging infrastructure, and more. + +| Pattern | Summary | +| ------- | ------- | +| [Cache-Aside](../cache-aside.md) | Load data on demand into a cache from a data store | +| [CQRS](../cqrs.md) | Segregate operations that read data from operations that update data by using separate interfaces. | +| [Event Sourcing](../event-sourcing.md) | Use an append-only store to record the full series of events that describe actions taken on data in a domain. | +| [Index Table](../index-table.md) | Create indexes over the fields in data stores that are frequently referenced by queries. | +| [Materialized View](../materialized-view.md) | Generate prepopulated views over the data in one or more data stores when the data isn't ideally formatted for required query operations. | +| [Priority Queue](../priority-queue.md) | Prioritize requests sent to services so that requests with a higher priority are received and processed more quickly than those with a lower priority. | +| [Queue-Based Load Leveling](../queue-based-load-leveling.md) | Use a queue that acts as a buffer between a task and a service that it invokes in order to smooth intermittent heavy loads. | +| [Sharding](../sharding.md) | Divide a data store into a set of horizontal partitions or shards. | +| [Static Content Hosting](../static-content-hosting.md) | Deploy static content to a cloud-based storage service that can deliver them directly to the client. | +| [Throttling](../throttling.md) | Control the consumption of resources used by an instance of an application, an individual tenant, or an entire service. | \ No newline at end of file diff --git a/docs/patterns/category/resiliency.md b/docs/patterns/category/resiliency.md new file mode 100644 index 00000000000..04406a93669 --- /dev/null +++ b/docs/patterns/category/resiliency.md @@ -0,0 +1,30 @@ +--- +title: Resiliency patterns +description: Resiliency is the ability of a system to gracefully handle and recover from failures. The nature of cloud hosting, where applications are often multi-tenant, use shared platform services, compete for resources and bandwidth, communicate over the Internet, and run on commodity hardware means there is an increased likelihood that both transient and more permanent faults will arise. Detecting failures, and recovering quickly and efficiently, is necessary to maintain resiliency. +keywords: design pattern +author: dragon119 +manager: bennage + +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance + +pnp.series.title: Cloud Design Patterns +--- + +# Resiliency patterns + +[!INCLUDE [header](../../_includes/header.md)] + +Resiliency is the ability of a system to gracefully handle and recover from failures. The nature of cloud hosting, where applications are often multi-tenant, use shared platform services, compete for resources and bandwidth, communicate over the Internet, and run on commodity hardware means there is an increased likelihood that both transient and more permanent faults will arise. Detecting failures, and recovering quickly and efficiently, is necessary to maintain resiliency. + +| Pattern | Summary | +| ------- | ------- | +| [Circuit Breaker](../circuit-breaker.md) | Handle faults that might take a variable amount of time to fix when connecting to a remote service or resource. | +| [Compensating Transaction](../compensating-transaction.md) | Undo the work performed by a series of steps, which together define an eventually consistent operation. | +| [Health Endpoint Monitoring](../health-endpoint-monitoring.md) | Implement functional checks in an application that external tools can access through exposed endpoints at regular intervals. | +| [Leader Election](../leader-election.md) | Coordinate the actions performed by a collection of collaborating task instances in a distributed application by electing one instance as the leader that assumes responsibility for managing the other instances. | +| [Queue-Based Load Leveling](../queue-based-load-leveling.md) | Use a queue that acts as a buffer between a task and a service that it invokes in order to smooth intermittent heavy loads. | +| [Retry](../transient-faults.md) | Enable an application to handle anticipated, temporary failures when it tries to connect to a service or network resource by transparently retrying an operation that's previously failed. | +| [Scheduler Agent Supervisor](../scheduler-agent-supervisor.md) | Coordinate a set of actions across a distributed set of services and other remote resources. | \ No newline at end of file diff --git a/docs/patterns/category/security.md b/docs/patterns/category/security.md new file mode 100644 index 00000000000..4a296391ce4 --- /dev/null +++ b/docs/patterns/category/security.md @@ -0,0 +1,26 @@ +--- +title: Security patterns +description: Security is the capability of a system to prevent malicious or accidental actions outside of the designed usage, and to prevent disclosure or loss of information. Cloud applications are exposed on the Internet outside trusted on-premises boundaries, are often open to the public, and may serve untrusted users. Applications must be designed and deployed in a way that protects them from malicious attacks, restricts access to only approved users, and protects sensitive data. +keywords: design pattern +author: dragon119 +manager: bennage + +ms.author: mwasson +ms.date: 02/21/2017 +ms.topic: article +ms.service: guidance + +pnp.series.title: Cloud Design Patterns +--- + +# Security patterns + +[!INCLUDE [header](../../_includes/header.md)] + +Security is the capability of a system to prevent malicious or accidental actions outside of the designed usage, and to prevent disclosure or loss of information. Cloud applications are exposed on the Internet outside trusted on-premises boundaries, are often open to the public, and may serve untrusted users. Applications must be designed and deployed in a way that protects them from malicious attacks, restricts access to only approved users, and protects sensitive data. + +| Pattern | Summary | +| ------- | ------- | +| [Federated Identity](../federated-identity.md) | Delegate authentication to an external identity provider. | +| [Gatekeeper](../gatekeeper.md) | Protect applications and services by using a dedicated host instance that acts as a broker between clients and the application or service, validates and sanitizes requests, and passes requests and data between them. | +| [Valet Key](../valet-key.md) | Use a token or key that provides clients with restricted direct access to a specific resource or service. | \ No newline at end of file diff --git a/docs/patterns/circuit-breaker.md b/docs/patterns/circuit-breaker.md new file mode 100644 index 00000000000..767d5948c86 --- /dev/null +++ b/docs/patterns/circuit-breaker.md @@ -0,0 +1,293 @@ +--- +title: Circuit Breaker +description: Handle faults that might take a variable amount of time to fix when connecting to a remote service or resource. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [resiliency] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Circuit Breaker + +[!INCLUDE [header](../_includes/header.md)] + +Handle faults that might take a variable amount of time to recover from, when connecting to a remote service or resource. This can improve the stability and resiliency of an application. + +## Context and problem + +In a distributed environment, calls to remote resources and services can fail due to transient faults, such as slow network connections, timeouts, or the resources being overcommitted or temporarily unavailable. These faults typically correct themselves after a short period of time, and a robust cloud application should be prepared to handle them by using a strategy such as the [Retry pattern](transient-faults.md). + +However, there can also be situations where faults are due to unanticipated events, and that might take much longer to fix. These faults can range in severity from a partial loss of connectivity to the complete failure of a service. In these situations it might be pointless for an application to continually retry an operation that is unlikely to succeed, and instead the application should quickly accept that the operation has failed and handle this failure accordingly. + +Additionally, if a service is very busy, failure in one part of the system might lead to cascading failures. For example, an operation that invokes a service could be configured to implement a timeout, and reply with a failure message if the service fails to respond within this period. However, this strategy could cause many concurrent requests to the same operation to be blocked until the timeout period expires. These blocked requests might hold critical system resources such as memory, threads, database connections, and so on. Consequently, these resources could become exhausted, causing failure of other possibly unrelated parts of the system that need to use the same resources. In these situations, it would be preferable for the operation to fail immediately, and only attempt to invoke the service if it's likely to succeed. Note that setting a shorter timeout might help to resolve this problem, but the timeout shouldn't be so short that the operation fails most of the time, even if the request to the service would eventually succeed. + +## Solution + +The Circuit Breaker pattern can prevent an application from repeatedly trying to execute an operation that's likely to fail. Allowing it to continue without waiting for the fault to be fixed or wasting CPU cycles while it determines that the fault is long lasting. The Circuit Breaker pattern also enables an application to detect whether the fault has been resolved. If the problem appears to have been fixed, the application can try to invoke the operation. + +> The purpose of the Circuit Breaker pattern is different than the Retry pattern. The Retry pattern enables an application to retry an operation in the expectation that it'll succeed. The Circuit Breaker pattern prevents an application from performing an operation that is likely to fail. An application can combine these two patterns by using the Retry pattern to invoke an operation through a circuit breaker. However, the retry logic should be sensitive to any exceptions returned by the circuit breaker and abandon retry attempts if the circuit breaker indicates that a fault is not transient. + +A circuit breaker acts as a proxy for operations that might fail. The proxy should monitor the number of recent failures that have occurred, and use this information to decide whether to allow the operation to proceed, or simply return an exception immediately. + +The proxy can be implemented as a state machine with the following states that mimic the functionality of an electrical circuit breaker: + +- **Closed**: The request from the application is routed to the operation. The proxy maintains a count of the number of recent failures, and if the call to the operation is unsuccessful the proxy increments this count. If the number of recent failures exceeds a specified threshold within a given time period, the proxy is placed into the **Open** state. At this point the proxy starts a timeout timer, and when this timer expires the proxy is placed into the **Half-Open** state. + + > The purpose of the timeout timer is to give the system time to fix the problem that caused the failure before allowing the application to try to perform the operation again. + +- **Open**: The request from the application fails immediately and an exception is returned to the application. + +- **Half-Open**: A limited number of requests from the application are allowed to pass through and invoke the operation. If these requests are successful, it's assumed that the fault that was previously causing the failure has been fixed and the circuit breaker switches to the **Closed** state (the failure counter is reset). If any request fails, the circuit breaker assumes that the fault is still present so it reverts back to the **Open** state and restarts the timeout timer to give the system a further period of time to recover from the failure. + + > The **Half-Open** state is useful to prevent a recovering service from suddenly being flooded with requests. As a service recovers, it might be able to support a limited volume of requests until the recovery is complete, but while recovery is in progress a flood of work can cause the service to time out or fail again. + +![Circuit Breaker states](./_images/cache-aside-diagram.png) + +In the figure, the failure counter used by the **Closed** state is time based. It's automatically reset at periodic intervals. This helps to prevent the circuit breaker from entering the **Open** state if it experiences occasional failures. The failure threshold that trips the circuit breaker into the **Open** state is only reached when a specified number of failures have occurred during a specified interval. The counter used by the **Half-Open** state records the number of successful attempts to invoke the operation. The circuit breaker reverts to the **Closed** state after a specified number of consecutive operation invocations have been successful. If any invocation fails, the circuit breaker enters the **Open** state immediately and the success counter will be reset the next time it enters the **Half-Open** state. + +> How the system recovers is handled externally, possibly by restoring or restarting a failed component or repairing a network connection. + +The Circuit Breaker pattern provides stability while the system recovers from a failure and minimizes the impact on performance. It can help to maintain the response time of the system by quickly rejecting a request for an operation that's likely to fail, rather than waiting for the operation to time out, or never return. If the circuit breaker raises an event each time it changes state, this information can be used to monitor the health of the part of the system protected by the circuit breaker, or to alert an administrator when a circuit breaker trips to the **Open** state. + +The pattern is customizable and can be adapted according to the type of the possible failure. For example, you can apply an increasing timeout timer to a circuit breaker. You could place the circuit breaker in the **Open** state for a few seconds initially, and then if the failure hasn't been resolved increase the timeout to a few minutes, and so on. In some cases, rather than the **Open** state returning failure and raising an exception, it could be useful to return a default value that is meaningful to the application. + +## Issues and considerations + +You should consider the following points when deciding how to implement this pattern: + +**Exception Handling**. An application invoking an operation through a circuit breaker must be prepared to handle the exceptions raised if the operation is unavailable. The way exceptions are handled will be application specific. For example, an application could temporarily degrade its functionality, invoke an alternative operation to try to perform the same task or obtain the same data, or report the exception to the user and ask them to try again later. + +**Types of Exceptions**. A request might fail for many reasons, some of which might indicate a more severe type of failure than others. For example, a request might fail because a remote service has crashed and will take several minutes to recover, or because of a timeout due to the service being temporarily overloaded. A circuit breaker might be able to examine the types of exceptions that occur and adjust its strategy depending on the nature of these exceptions. For example, it might require a larger number of timeout exceptions to trip the circuit breaker to the **Open** state compared to the number of failures due to the service being completely unavailable. + +**Logging**. A circuit breaker should log all failed requests (and possibly successful requests) to enable an administrator to monitor the health of the operation. + +**Recoverability**. You should configure the circuit breaker to match the likely recovery pattern of the operation it's protecting. For example, if the circuit breaker remains in the **Open** state for a long period, it could raise exceptions even if the reason for the failure has been resolved. Similarly, a circuit breaker could fluctuate and reduce the response times of applications if it switches from the **Open** state to the **Half-Open** state too quickly. + +**Testing Failed Operations**. In the **Open** state, rather than using a timer to determine when to switch to the **Half-Open** state, a circuit breaker can instead periodically ping the remote service or resource to determine whether it's become available again. This ping could take the form of an attempt to invoke an operation that had previously failed, or it could use a special operation provided by the remote service specifically for testing the health of the service, as described by the [Health Endpoint Monitoring pattern](health-endpoint-monitoring.md). + +**Manual Override**. In a system where the recovery time for a failing operation is extremely variable, it's beneficial to provide a manual reset option that enables an administrator to close a circuit breaker (and reset the failure counter). Similarly, an administrator could force a circuit breaker into the **Open** state (and restart the timeout timer) if the operation protected by the circuit breaker is temporarily unavailable. + +**Concurrency**. The same circuit breaker could be accessed by a large number of concurrent instances of an application. The implementation shouldn't block concurrent requests or add excessive overhead to each call to an operation. + +**Resource Differentiation**. Be careful when using a single circuit breaker for one type of resource if there might be multiple underlying independent providers. For example, in a data store that contains multiple shards, one shard might be fully accessible while another is experiencing a temporary issue. If the error responses in these scenarios are merged, an application might try to access some shards even when failure is highly likely, while access to other shards might be blocked even though it's likely to succeed. + +**Accelerated Circuit Breaking**. Sometimes a failure response can contain enough information for the circuit breaker to trip immediately and stay tripped for a minimum amount of time. For example, the error response from a shared resource that's overloaded could indicate that an immediate retry isn't recommended and that the application should instead try again in a few minutes. + +> [!NOTE] +> A service can return HTTP 429 (Too Many Requests) if it is throttling the client, or HTTP 503 (Service Unavailable) if the service is not currently available. The response can include additional information, such as the anticipated duration of the delay. + +**Replaying Failed Requests**. In the **Open** state, rather than simply failing quickly, a circuit breaker could also record the details of each request to a journal and arrange for these requests to be replayed when the remote resource or service becomes available. + +**Inappropriate Timeouts on External Services**. A circuit breaker might not be able to fully protect applications from operations that fail in external services that are configured with a lengthy timeout period. If the timeout is too long, a thread running a circuit breaker might be blocked for an extended period before the circuit breaker indicates that the operation has failed. In this time, many other application instances might also try to invoke the service through the circuit breaker and tie up a significant number of threads before they all fail. + +## When to use this pattern + +Use this pattern: + +- To prevent an application from trying to invoke a remote service or access a shared resource if this operation is highly likely to fail. + +This pattern isn't recommended: + +- For handling access to local private resources in an application, such as in-memory data structure. In this environment, using a circuit breaker would add overhead to your system. +- As a substitute for handling exceptions in the business logic of your applications. + +## Example + +In a web application, several of the pages are populated with data retrieved from an external service. If the system implements minimal caching, most hits to these pages will cause a round trip to the service. Connections from the web application to the service could be configured with a timeout period (typically 60 seconds), and if the service doesn't respond in this time the logic in each web page will assume that the service is unavailable and throw an exception. + +However, if the service fails and the system is very busy, users could be forced to wait for up to 60 seconds before an exception occurs. Eventually resources such as memory, connections, and threads could be exhausted, preventing other users from connecting to the system, even if they aren't accessing pages that retrieve data from the service. + +Scaling the system by adding further web servers and implementing load balancing might delay when resources become exhausted, but it won't resolve the issue because user requests will still be unresponsive and all web servers could still eventually run out of resources. + +Wrapping the logic that connects to the service and retrieves the data in a circuit breaker could help to solve this problem and handle the service failure more elegantly. User requests will still fail, but they'll fail more quickly and the resources won't be blocked. + +The `CircuitBreaker` class maintains state information about a circuit breaker in an object that implements the `ICircuitBreakerStateStore` interface shown in the following code. + +```csharp +interface ICircuitBreakerStateStore +{ + CircuitBreakerStateEnum State { get; } + + Exception LastException { get; } + + DateTime LastStateChangedDateUtc { get; } + + void Trip(Exception ex); + + void Reset(); + + void HalfOpen(); + + bool IsClosed { get; } +} +``` + +The `State` property indicates the current state of the circuit breaker, and will be either **Open**, **HalfOpen**, or **Closed** as defined by the `CircuitBreakerStateEnum` enumeration. The `IsClosed` property should be true if the circuit breaker is closed, but false if it's open or half open. The `Trip` method switches the state of the circuit breaker to the open state and records the exception that caused the change in state, together with the date and time that the exception occurred. The `LastException` and the `LastStateChangedDateUtc` properties return this information. The `Reset` method closes the circuit breaker, and the `HalfOpen` method sets the circuit breaker to half open. + +The `InMemoryCircuitBreakerStateStore` class in the example contains an implementation of the `ICircuitBreakerStateStore` interface. The `CircuitBreaker` class creates an instance of this class to hold the state of the circuit breaker. + +The `ExecuteAction` method in the `CircuitBreaker` class wraps an operation, specified as an `Action` delegate. If the circuit breaker is closed, `ExecuteAction` invokes the `Action` delegate. If the operation fails, an exception handler calls `TrackException`, which sets the circuit breaker state to open. The following code example highlights this flow. + +```csharp +public class CircuitBreaker +{ + private readonly ICircuitBreakerStateStore stateStore = + CircuitBreakerStateStoreFactory.GetCircuitBreakerStateStore(); + + private readonly object halfOpenSyncObject = new object (); + ... + public bool IsClosed { get { return stateStore.IsClosed; } } + + public bool IsOpen { get { return !IsClosed; } } + + public void ExecuteAction(Action action) + { + ... + if (IsOpen) + { + // The circuit breaker is Open. + ... (see code sample below for details) + } + + // The circuit breaker is Closed, execute the action. + try + { + action(); + } + catch (Exception ex) + { + // If an exception still occurs here, simply + // retrip the breaker immediately. + this.TrackException(ex); + + // Throw the exception so that the caller can tell + // the type of exception that was thrown. + throw; + } + } + + private void TrackException(Exception ex) + { + // For simplicity in this example, open the circuit breaker on the first exception. + // In reality this would be more complex. A certain type of exception, such as one + // that indicates a service is offline, might trip the circuit breaker immediately. + // Alternatively it might count exceptions locally or across multiple instances and + // use this value over time, or the exception/success ratio based on the exception + // types, to open the circuit breaker. + this.stateStore.Trip(ex); + } +} +``` + +The following example shows the code (omitted from the previous example) that is executed if the circuit breaker isn't closed. It first checks if the circuit breaker has been open for a period longer than the time specified by the local `OpenToHalfOpenWaitTime` field in the `CircuitBreaker` class. If this is the case, the `ExecuteAction` method sets the circuit breaker to half open, then tries to perform the operation specified by the `Action` delegate. + +If the operation is successful, the circuit breaker is reset to the closed state. If the operation fails, it is tripped back to the open state and the time the exception occurred is updated so that the circuit breaker will wait for a further period before trying to perform the operation again. + +If the circuit breaker has only been open for a short time, less than the `OpenToHalfOpenWaitTime` value, the `ExecuteAction` method simply throws a `CircuitBreakerOpenException` exception and returns the error that caused the circuit breaker to transition to the open state. + +Additionally, it uses a lock to prevent the circuit breaker from trying to perform concurrent calls to the operation while it's half open. A concurrent attempt to invoke the operation will be handled as if the circuit breaker was open, and it'll fail with an exception as described later. + +```csharp + ... + if (IsOpen) + { + // The circuit breaker is Open. Check if the Open timeout has expired. + // If it has, set the state to HalfOpen. Another approach might be to + // check for the HalfOpen state that had be set by some other operation. + if (stateStore.LastStateChangedDateUtc + OpenToHalfOpenWaitTime < DateTime.UtcNow) + { + // The Open timeout has expired. Allow one operation to execute. Note that, in + // this example, the circuit breaker is set to HalfOpen after being + // in the Open state for some period of time. An alternative would be to set + // this using some other approach such as a timer, test method, manually, and + // so on, and check the state here to determine how to handle execution + // of the action. + // Limit the number of threads to be executed when the breaker is HalfOpen. + // An alternative would be to use a more complex approach to determine which + // threads or how many are allowed to execute, or to execute a simple test + // method instead. + bool lockTaken = false; + try + { + Monitor.TryEnter(halfOpenSyncObject, ref lockTaken) + if (lockTaken) + { + // Set the circuit breaker state to HalfOpen. + stateStore.HalfOpen(); + + // Attempt the operation. + action(); + + // If this action succeeds, reset the state and allow other operations. + // In reality, instead of immediately returning to the Open state, a counter + // here would record the number of successful operations and return the + // circuit breaker to the Open state only after a specified number succeed. + this.stateStore.Reset(); + return; + } + catch (Exception ex) + { + // If there's still an exception, trip the breaker again immediately. + this.stateStore.Trip(ex); + + // Throw the exception so that the caller knows which exception occurred. + throw; + } + finally + { + if (lockTaken) + { + Monitor.Exit(halfOpenSyncObject); + } + } + } + } + // The Open timeout hasn't yet expired. Throw a CircuitBreakerOpen exception to + // inform the caller that the call was not actually attempted, + // and return the most recent exception received. + throw new CircuitBreakerOpenException(stateStore.LastException); + } + ... +``` + +To use a `CircuitBreaker` object to protect an operation, an application creates an instance of the `CircuitBreaker` class and invokes the `ExecuteAction` method, specifying the operation to be performed as the parameter. The application should be prepared to catch the `CircuitBreakerOpenException` exception if the operation fails because the circuit breaker is open. The following code shows an example: + +```csharp +var breaker = new CircuitBreaker(); + +try +{ + breaker.ExecuteAction(() => + { + // Operation protected by the circuit breaker. + ... + }); +} +catch (CircuitBreakerOpenException ex) +{ + // Perform some different action when the breaker is open. + // Last exception details are in the inner exception. + ... +} +catch (Exception ex) +{ + ... +} +``` + +## Related patterns and guidance + +The following patterns might also be useful when implementing this pattern: + +- [Retry Pattern](transient-faults.md). Describes how an application can handle anticipated temporary failures when it tries to connect to a service or network resource by transparently retrying an operation that has previously failed. + +- [Health Endpoint Monitoring Pattern](health-endpoint-monitoring.md). A circuit breaker might be able to test the health of a service by sending a request to an endpoint exposed by the service. The service should return information indicating its status. diff --git a/docs/patterns/compensating-transaction.md b/docs/patterns/compensating-transaction.md new file mode 100644 index 00000000000..9d6ccbf79b4 --- /dev/null +++ b/docs/patterns/compensating-transaction.md @@ -0,0 +1,104 @@ +--- +title: Compensating Transaction +description: Undo the work performed by a series of steps, which together define an eventually consistent operation. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [resiliency] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Compensating Transaction + +[!INCLUDE [header](../_includes/header.md)] + +Undo the work performed by a series of steps, which together define an eventually consistent operation, if one or more of the steps fail. Operations that follow the eventual consistency model are commonly found in cloud-hosted applications that implement complex business processes and workflows. + +## Context and problem + +Applications running in the cloud frequently modify data. This data might be spread across various data sources held in different geographic locations. To avoid contention and improve performance in a distributed environment, an application shouldn't try to provide strong transactional consistency. Rather, the application should implement eventual consistency. In this model, a typical business operation consists of a series of separate steps. While these steps are being performed, the overall view of the system state might be inconsistent, but when the operation has completed and all of the steps have been executed the system should become consistent again. + +> The [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx) provides information about why distributed transactions don't scale well, and the principles of the eventual consistency model. + +A challenge in the eventual consistency model is how to handle a step that has failed. In this case it might be necessary to undo all of the work completed by the previous steps in the operation. However, the data can't simply be rolled back because other concurrent instances of the application might have changed it. Even in cases where the data hasn't been changed by a concurrent instance, undoing a step might not simply be a matter of restoring the original state. It might be necessary to apply various business-specific rules (see the travel website described in the Example section). + +If an operation that implements eventual consistency spans several heterogeneous data stores, undoing the steps in the operation will require visiting each data store in turn. The work performed in every data store must be undone reliably to prevent the system from remaining inconsistent. + +Not all data affected by an operation that implements eventual consistency might be held in a database. In a service oriented architecture (SOA) environment an operation could invoke an action in a service, and cause a change in the state held by that service. To undo the operation, this state change must also be undone. This can involve invoking the service again and performing another action that reverses the effects of the first. + +## Solution + +The solution is to implement a compensating transaction. The steps in a compensating transaction must undo the effects of the steps in the original operation. A compensating transaction might not be able to simply replace the current state with the state the system was in at the start of the operation because this approach could overwrite changes made by other concurrent instances of an application. Instead, it must be an intelligent process that takes into account any work done by concurrent instances. This process will usually be application specific, driven by the nature of the work performed by the original operation. + +A common approach is to use a workflow to implement an eventually consistent operation that requires compensation. As the original operation proceeds, the system records information about each step and how the work performed by that step can be undone. If the operation fails at any point, the workflow rewinds back through the steps it's completed and performs the work that reverses each step. Note that a compensating transaction might not have to undo the work in the exact reverse order of the original operation, and it might be possible to perform some of the undo steps in parallel. + +> This approach is similar to the Sagas strategy discussed in [Clemens Vasters’ blog](http://vasters.com/clemensv/2012/09/01/Sagas.aspx). + +A compensating transaction is also an eventually consistent operation and it could also fail. The system should be able to resume the compensating transaction at the point of failure and continue. It might be necessary to repeat a step that's failed, so the steps in a compensating transaction should be defined as idempotent commands. For more information, see [Idempotency Patterns](http://blog.jonathanoliver.com/2010/04/idempotency-patterns/) on Jonathan Oliver’s blog. + +In some cases it might not be possible to recover from a step that has failed except through manual intervention. In these situations the system should raise an alert and provide as much information as possible about the reason for the failure. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +It might not be easy to determine when a step in an operation that implements eventual consistency has failed. A step might not fail immediately, but instead could block. It might be necessary to implement some form of time-out mechanism. + +-Compensation logic isn't easily generalized. A compensating transaction is application specific. It relies on the application having sufficient information to be able to undo the effects of each step in a failed operation. + +You should define the steps in a compensating transaction as idempotent commands. This enables the steps to be repeated if the compensating transaction itself fails. + +The infrastructure that handles the steps in the original operation, and the compensating transaction, must be resilient. It must not lose the information required to compensate for a failing step, and it must be able to reliably monitor the progress of the compensation logic. + +A compensating transaction doesn't necessarily return the data in the system to the state it was in at the start of the original operation. Instead, it compensates for the work performed by the steps that completed successfully before the operation failed. + +The order of the steps in the compensating transaction doesn't necessarily have to be the exact opposite of the steps in the original operation. For example, one data store might be more sensitive to inconsistencies than another, and so the steps in the compensating transaction that undo the changes to this store should occur first. + +Placing a short-term timeout-based lock on each resource that's required to complete an operation, and obtaining these resources in advance, can help increase the likelihood that the overall activity will succeed. The work should be performed only after all the resources have been acquired. All actions must be finalized before the locks expire. + +Consider using retry logic that is more forgiving than usual to minimize failures that trigger a compensating transaction. If a step in an operation that implements eventual consistency fails, try handling the failure as a transient exception and repeat the step. Only stop the operation and initiate a compensating transaction if a step fails repeatedly or irrecoverably. + +> Many of the challenges of implementing a compensating transaction are the same as those with implementing eventual consistency. See the section Considerations for Implementing Eventual Consistency in the [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx) for more information. + +## When to use this pattern + +Use this pattern only for operations that must be undone if they fail. If possible, design solutions to avoid the complexity of requiring compensating transactions. + +## Example + +A travel website lets customers book itineraries. A single itinerary might comprise a series of flights and hotels. A customer traveling from Seattle to London and then on to Paris could perform the following steps when creating an itinerary: + +1. Book a seat on flight F1 from Seattle to London. +2. Book a seat on flight F2 from London to Paris. +3. Book a seat on flight F3 from Paris to Seattle. +4. Reserve a room at hotel H1 in London. +5. Reserve a room at hotel H2 in Paris. + +These steps constitute an eventually consistent operation, although each step is a separate action. Therefore, as well as performing these steps, the system must also record the counter operations necessary to undo each step in case the customer decides to cancel the itinerary. The steps necessary to perform the counter operations can then run as a compensating transaction. + +Notice that the steps in the compensating transaction might not be the exact opposite of the original steps, and the logic in each step in the compensating transaction must take into account any business-specific rules. For example, unbooking a seat on a flight might not entitle the customer to a complete refund of any money paid. The figure illustrates generating a compensating transaction to undo a long-running transaction to book a travel itinerary. + +![Generating a compensating transaction to undo a long-running transaction to book a travel itinerary](./_images/compensating-transaction-diagram.png) + + +> It might be possible for the steps in the compensating transaction to be performed in parallel, depending on how you've designed the compensating logic for each step. + +In many business solutions, failure of a single step doesn't always necessitate rolling the system back by using a compensating transaction. For example, if—after having booked flights F1, F2, and F3 in the travel website scenario—the customer is unable to reserve a room at hotel H1, it's preferable to offer the customer a room at a different hotel in the same city rather than canceling the flights. The customer can still decide to cancel (in which case the compensating transaction runs and undoes the bookings made on flights F1, F2, and F3), but this decision should be made by the customer rather than by the system. + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: + +- [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). The Compensating Transaction pattern is often used to undo operations that implement the eventual consistency model. This primer provides information on the benefits and tradeoffs of eventual consistency. + +- [Scheduler-Agent-Supervisor Pattern](scheduler-agent-supervisor.md). Describes how to implement resilient systems that perform business operations that use distributed services and resources. Sometimes, it might be necessary to undo the work performed by an operation by using a compensating transaction. + +- [Retry Pattern](transient-faults.md). Compensating transactions can be expensive to perform, and it might be possible to minimize their use by implementing an effective policy of retrying failing operations by following the Retry pattern. diff --git a/docs/patterns/competing-consumers.md b/docs/patterns/competing-consumers.md new file mode 100644 index 00000000000..040703d2fd7 --- /dev/null +++ b/docs/patterns/competing-consumers.md @@ -0,0 +1,193 @@ +--- +title: Competing Consumers +description: Enable multiple concurrent consumers to process messages received on the same messaging channel. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [messaging] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Competing Consumers + +[!INCLUDE [header](../_includes/header.md)] + +Enable multiple concurrent consumers to process messages received on the same messaging channel. This enables a system to process multiple messages concurrently to optimize throughput, to improve scalability and availability, and to balance the workload. + +## Context and problem + +An application running in the cloud is expected to handle a large number of requests. Rather than process each request synchronously, a common technique is for the application to pass them through a messaging system to another service (a consumer service) that handles them asynchronously. This strategy helps to ensure that the business logic in the application isn't blocked while the requests are being processed. + +The number of requests can vary significantly over time for many reasons. A sudden increase in user activity or aggregated requests coming from multiple tenants can cause an unpredictable workload. At peak hours a system might need to process many hundreds of requests per second, while at other times the number could be very small. Additionally, the nature of the work performed to handle these requests might be highly variable. Using a single instance of the consumer service can cause that instance to become flooded with requests, or the messaging system might be overloaded by an influx of messages coming from the application. To handle this fluctuating workload, the system can run multiple instances of the consumer service. However, these consumers must be coordinated to ensure that each message is only delivered to a single consumer. The workload also needs to be load balanced across consumers to prevent an instance from becoming a bottleneck. + +## Solution + +Use a message queue to implement the communication channel between the application and the instances of the consumer service. The application posts requests in the form of messages to the queue, and the consumer service instances receive messages from the queue and process them. This approach enables the same pool of consumer service instances to handle messages from any instance of the application. The figure illustrates using a message queue to distribute work to instances of a service. + +![Using a message queue to distribute work to instances of a service](./_images/compensating-transaction-diagram.png) + +This solution has the following benefits: + +- It provides a load-leveled system that can handle wide variations in the volume of requests sent by application instances. The queue acts as a buffer between the application instances and the consumer service instances. This can help to minimize the impact on availability and responsiveness for both the application and the service instances, as described by the [Queue-based Load Leveling pattern](queue-based-load-leveling.md). Handling a message that requires some long-running processing doesn't prevent other messages from being handled concurrently by other instances of the consumer service. + +- It improves reliability. If a producer communicates directly with a consumer instead of using this pattern, but doesn't monitor the consumer, there's a high probability that messages could be lost or fail to be processed if the consumer fails. In this pattern, messages aren't sent to a specific service instance. A failed service instance won't block a producer, and messages can be processed by any working service instance. + +- It doesn't require complex coordination between the consumers, or between the producer and the consumer instances. The message queue ensures that each message is delivered at least once. + +- It's scalable. The system can dynamically increase or decrease the number of instances of the consumer service as the volume of messages fluctuates. + +- It can improve resiliency if the message queue provides transactional read operations. If a consumer service instance reads and processes the message as part of a transactional operation, and the consumer service instance fails, this pattern can ensure that the message will be returned to the queue to be picked up and handled by another instance of the consumer service. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +- **Message ordering**. The order in which consumer service instances receive messages isn't guaranteed, and doesn't necessarily reflect the order in which the messages were created. Design the system to ensure that message processing is idempotent because this will help to eliminate any dependency on the order in which messages are handled. For more information, see [Idempotency Patterns](http://blog.jonathanoliver.com/2010/04/idempotency-patterns/) on Jonathon Oliver’s blog. + + > Microsoft Azure Service Bus Queues can implement guaranteed first-in-first-out ordering of messages by using message sessions. For more information, see [Messaging Patterns Using Sessions](https://msdn.microsoft.com/magazine/jj863132.aspx). + +- **Designing services for resiliency**. If the system is designed to detect and restart failed service instances, it might be necessary to implement the processing performed by the service instances as idempotent operations to minimize the effects of a single message being retrieved and processed more than once. + +- **Detecting poison messages**. A malformed message, or a task that requires access to resources that aren't available, can cause a service instance to fail. The system should prevent such messages being returned to the queue, and instead capture and store the details of these messages elsewhere so that they can be analyzed if necessary. + +- **Handling results**. The service instance handling a message is fully decoupled from the application logic that generates the message, and they might not be able to communicate directly. If the service instance generates results that must be passed back to the application logic, this information must be stored in a location that's accessible to both. In order to prevent the application logic from retrieving incomplete data the system must indicate when processing is complete. + + > If you're using Azure, a worker process can pass results back to the application logic by using a dedicated message reply queue. The application logic must be able to correlate these results with the original message. This scenario is described in more detail in the [Asynchronous Messaging Primer](https://msdn.microsoft.com/library/dn589781.aspx). + +- **Scaling the messaging system**. In a large-scale solution, a single message queue could be overwhelmed by the number of messages and become a bottleneck in the system. In this situation, consider partitioning the messaging system to send messages from specific producers to a particular queue, or use load balancing to distribute messages across multiple message queues. + +- **Ensuring reliability of the messaging system**. A reliable messaging system is needed to guarantee that after the application enqueues a message it won't be lost. This is essential for ensuring that all messages are delivered at least once. + +## When to use this pattern + +Use this pattern when: + +- The workload for an application is divided into tasks that can run asynchronously. +- Tasks are independent and can run in parallel. +- The volume of work is highly variable, requiring a scalable solution. +- The solution must provide high availability, and must be resilient if the processing for a task fails. + +This pattern might not be useful when: + +- It's not easy to separate the application workload into discrete tasks, or there's a high degree of dependence between tasks. +- Tasks must be performed synchronously, and the application logic must wait for a task to complete before continuing. +- Tasks must be performed in a specific sequence. + +> Some messaging systems support sessions that enable a producer to group messages together and ensure that they're all handled by the same consumer. This mechanism can be used with prioritized messages (if they are supported) to implement a form of message ordering that delivers messages in sequence from a producer to a single consumer. + +## Example + +Azure provides storage queues and Service Bus queues that can act as a mechanism for implementing this pattern. The application logic can post messages to a queue, and consumers implemented as tasks in one or more roles can retrieve messages from this queue and process them. For resiliency, a Service Bus queue enables a consumer to use `PeekLock` mode when it retrieves a message from the queue. This mode doesn't actually remove the message, but simply hides it from other consumers. The original consumer can delete the message when it's finished processing it. If the consumer fails, the peek lock will time out and the message will become visible again, allowing another consumer to retrieve it. + +> For detailed information on using Azure Service Bus queues, see [Service Bus queues, topics, and subscriptions](https://msdn.microsoft.com/library/windowsazure/hh367516.aspx). +For information on using Azure storage queues, see [Get started with Azure Queue storage using .NET](https://azure.microsoft.com/documentation/articles/storage-dotnet-how-to-use-queues/). + +The following code from the `QueueManager` class in CompetingConsumers solution available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/competing-consumers) shows how you can create a queue by using a `QueueClient` instance in the `Start` event handler in a web or worker role. + +```csharp +private string queueName = ...; +private string connectionString = ...; +... + +public async Task Start() +{ + // Check if the queue already exists. + var manager = NamespaceManager.CreateFromConnectionString(this.connectionString); + if (!manager.QueueExists(this.queueName)) + { + var queueDescription = new QueueDescription(this.queueName); + + // Set the maximum delivery count for messages in the queue. A message + // is automatically dead-lettered after this number of deliveries. The + // default value for dead letter count is 10. + queueDescription.MaxDeliveryCount = 3; + + await manager.CreateQueueAsync(queueDescription); + } + ... + + // Create the queue client. By default the PeekLock method is used. + this.client = QueueClient.CreateFromConnectionString( + this.connectionString, this.queueName); +} +``` + +The next code snippet shows how an application can create and send a batch of messages to the queue. + +```csharp +public async Task SendMessagesAsync() +{ + // Simulate sending a batch of messages to the queue. + var messages = new List(); + + for (int i = 0; i < 10; i++) + { + var message = new BrokeredMessage() { MessageId = Guid.NewGuid().ToString() }; + messages.Add(message); + } + await this.client.SendBatchAsync(messages); +} +``` + +The following code shows how a consumer service instance can receive messages from the queue by following an event-driven approach. The `processMessageTask` parameter to the `ReceiveMessages` method is a delegate that references the code to run when a message is received. This code is run asynchronously. + +```csharp +private ManualResetEvent pauseProcessingEvent; +... + +public void ReceiveMessages(Func processMessageTask) +{ + // Set up the options for the message pump. + var options = new OnMessageOptions(); + + // When AutoComplete is disabled it's necessary to manually + // complete or abandon the messages and handle any errors. + options.AutoComplete = false; + options.MaxConcurrentCalls = 10; + options.ExceptionReceived += this.OptionsOnExceptionReceived; + + // Use of the Service Bus OnMessage message pump. + // The OnMessage method must be called once, otherwise an exception will occur. + this.client.OnMessageAsync( + async (msg) => + { + // Will block the current thread if Stop is called. + this.pauseProcessingEvent.WaitOne(); + + // Execute processing task here. + await processMessageTask(msg); + }, + options); +} +... + +private void OptionsOnExceptionReceived(object sender, + ExceptionReceivedEventArgs exceptionReceivedEventArgs) +{ + ... +} +``` + +Note that autoscaling features, such as those available in Azure, can be used to start and stop role instances as the queue length fluctuates. For more information, see [Autoscaling Guidance](https://msdn.microsoft.com/library/dn589774.aspx). Also, it's not necessary to maintain a one-to-one correspondence between role instances and worker processes—a single role instance can implement multiple worker processes. For more information, see [Compute Resource Consolidation pattern](compute-resource-consolidation.md). + +## Related patterns and guidance + +The following patterns and guidance might be relevant when implementing this pattern: + +- [Asynchronous Messaging Primer](https://msdn.microsoft.com/library/dn589781.aspx). Message queues are an asynchronous communications mechanism. If a consumer service needs to send a reply to an application, it might be necessary to implement some form of response messaging. The Asynchronous Messaging Primer provides information on how to implement request/reply messaging using message queues. + +- [Autoscaling Guidance](https://msdn.microsoft.com/library/dn589774.aspx). It might be possible to start and stop instances of a consumer service since the length of the queue applications post messages on varies. Autoscaling can help to maintain throughput during times of peak processing. + +- [Compute Resource Consolidation Pattern](compute-resource-consolidation.md). It might be possible to consolidate multiple instances of a consumer service into a single process to reduce costs and management overhead. The Compute Resource Consolidation pattern describes the benefits and tradeoffs of following this approach. + +- [Queue-based Load Leveling Pattern](queue-based-load-leveling.md). Introducing a message queue can add resiliency to the system, enabling service instances to handle widely varying volumes of requests from application instances. The message queue acts as a buffer, which levels the load. The Queue-based Load Leveling pattern describes this scenario in more detail. + +- This pattern has a [sample application](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/competing-consumers) associated with it. diff --git a/docs/patterns/compute-resource-consolidation.md b/docs/patterns/compute-resource-consolidation.md new file mode 100644 index 00000000000..6d4dd35b963 --- /dev/null +++ b/docs/patterns/compute-resource-consolidation.md @@ -0,0 +1,257 @@ +--- +title: Compute Resource Consolidation +description: Consolidate multiple tasks or operations into a single computational unit +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [design-implementation] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Compute Resource Consolidation + +[!INCLUDE [header](../_includes/header.md)] + +Consolidate multiple tasks or operations into a single computational unit. This can increase compute resource utilization, and reduce the costs and management overhead associated with performing compute processing in cloud-hosted applications. + +## Context and problem + +A cloud application often implements a variety of operations. In some solutions it makes sense to follow the design principle of separation of concerns initially, and divide these operations into separate computational units that are hosted and deployed individually (for example, as separate App Service web apps, separate Virtual Machines, or separate Cloud Service roles). However, although this strategy can help simplify the logical design of the solution, deploying a large number of computational units as part of the same application can increase runtime hosting costs and make management of the system more complex. + +As an example, the figure shows the simplified structure of a cloud-hosted solution that is implemented using more than one computational unit. Each computational unit runs in its own virtual environment. Each function has been implemented as a separate task (labeled Task A through Task E) running in its own computational unit. + +![Running tasks in a cloud environment using a set of dedicated computational units](./_images/compute-resource-consolidation-diagram.png) + + +Each computational unit consumes chargeable resources, even when it's idle or lightly used. Therefore, this isn't always the most cost-effective solution. + +In Azure, this concern applies to roles in a Cloud Service, App Services, and Virtual Machines. These items run in their own virtual environment. Running a collection of separate roles, websites, or virtual machines that are designed to perform a set of well-defined operations, but that need to communicate and cooperate as part of a single solution, can be an inefficient use of resources. + +## Solution + +To help reduce costs, increase utilization, improve communication speed, and reduce management it's possible to consolidate multiple tasks or operations into a single computational unit. + +Tasks can be grouped according to criteria based on the features provided by the environment and the costs associated with these features. A common approach is to look for tasks that have a similar profile concerning their scalability, lifetime, and processing requirements. Grouping these together allows them to scale as a unit. The elasticity provided by many cloud environments enables additional instances of a computational unit to be started and stopped according to the workload. For example, Azure provides autoscaling that you can apply to roles in a Cloud Service, App Services, and Virtual Machines. For more information, see [Autoscaling Guidance](https://msdn.microsoft.com/library/dn589774.aspx). + +As a counter example to show how scalability can be used to determine which operations shouldn't be grouped together, consider the following two tasks: + +- Task 1 polls for infrequent, time-insensitive messages sent to a queue. +- Task 2 handles high-volume bursts of network traffic. + +The second task requires elasticity that can involve starting and stopping a large number of instances of the computational unit. Applying the same scaling to the first task would simply result in more tasks listening for infrequent messages on the same queue, and is a waste of resources. + +In many cloud environments it's possible to specify the resources available to a computational unit in terms of the number of CPU cores, memory, disk space, and so on. Generally, the more resources specified, the greater the cost. To save money, it's important to maximize the work an expensive computational unit performs, and not let it become inactive for an extended period. + +If there are tasks that require a great deal of CPU power in short bursts, consider consolidating these into a single computational unit that provides the necessary power. However, it's important to balance this need to keep expensive resources busy against the contention that could occur if they are over stressed. Long-running, compute-intensive tasks shouldn't share the same computational unit, for example. + +## Issues and considerations + +Consider the following points when implementing this pattern: + +**Scalability and elasticity**. Many cloud solutions implement scalability and elasticity at the level of the computational unit by starting and stopping instances of units. Avoid grouping tasks that have conflicting scalability requirements in the same computational unit. + +**Lifetime**. The cloud infrastructure periodically recycles the virtual environment that hosts a computational unit. When there are many long-running tasks inside a computational unit, it might be necessary to configure the unit to prevent it from being recycled until these tasks have finished. Alternatively, design the tasks by using a check-pointing approach that enables them to stop cleanly, and continue at the point they were interrupted when the computational unit is restarted. + +**Release cadence**. If the implementation or configuration of a task changes frequently, it might be necessary to stop the computational unit hosting the updated code, reconfigure and redeploy the unit, and then restart it. This process will also require that all other tasks within the same computational unit are stopped, redeployed, and restarted. + +**Security**. Tasks in the same computational unit might share the same security context and be able to access the same resources. There must be a high degree of trust between the tasks, and confidence that one task isn't going to corrupt or adversely affect another. Additionally, increasing the number of tasks running in a computational unit increases the attack surface of the unit. Each task is only as secure as the one with the most vulnerabilities. + +**Fault tolerance**. If one task in a computational unit fails or behaves abnormally, it can affect the other tasks running within the same unit. For example, if one task fails to start correctly it can cause the entire startup logic for the computational unit to fail, and prevent other tasks in the same unit from running. + +**Contention**. Avoid introducing contention between tasks that compete for resources in the same computational unit. Ideally, tasks that share the same computational unit should exhibit different resource utilization characteristics. For example, two compute-intensive tasks should probably not reside in the same computational unit, and neither should two tasks that consume large amounts of memory. However, mixing a compute intensive task with a task that requires a large amount of memory is a workable combination. + +> [!NOTE] +> Consider consolidating compute resources only for a system that's been in production for a period of time so that operators and developers can monitor the system and create a _heat map_ that identifies how each task utilizes differing resources. This map can be used to determine which tasks are good candidates for sharing compute resources. + +**Complexity**. Combining multiple tasks into a single computational unit adds complexity to the code in the unit, possibly making it more difficult to test, debug, and maintain. + +**Stable logical architecture**. Design and implement the code in each task so that it shouldn't need to change, even if the physical environment the task runs in does change. + +**Other strategies**. Consolidating compute resources is only one way to help reduce costs associated with running multiple tasks concurrently. It requires careful planning and monitoring to ensure that it remains an effective approach. Other strategies might be more appropriate, depending on the nature of the work and where the users these tasks are running are located. For example, functional decomposition of the workload (as described by the [Compute Partitioning Guidance](https://msdn.microsoft.com/library/dn589773.aspx)) might be a better option. + +## When to use this pattern + +Use this pattern for tasks that are not cost effective if they run in their own computational units. If a task spends much of its time idle, running this task in a dedicated unit can be expensive. + +This pattern might not be suitable for tasks that perform critical fault-tolerant operations, or tasks that process highly-sensitive or private data and require their own security context. These tasks should run in their own isolated environment, in a separate computational unit. + +## Example + +When building a cloud service on Azure, it’s possible to consolidate the processing performed by multiple tasks into a single role. Typically this is a worker role that performs background or asynchronous processing tasks. + +> In some cases it's possible to include background or asynchronous processing tasks in the web role. This technique helps to reduce costs and simplify deployment, although it can impact the scalability and responsiveness of the public-facing interface provided by the web role. The article [Combining Multiple Azure Worker Roles into an Azure Web Role](http://www.31a2ba2a-b718-11dc-8314-0800200c9a66.com/2012/02/combining-multiple-azure-worker-roles.html) contains a detailed description of implementing background or asynchronous processing tasks in a web role. + +The role is responsible for starting and stopping the tasks. When the Azure fabric controller loads a role, it raises the `Start` event for the role. You can override the `OnStart` method of the `WebRole` or `WorkerRole` class to handle this event, perhaps to initialize the data and other resources the tasks in this method depend on. + +When the `OnStart `method completes, the role can start responding to requests. You can find more information and guidance about using the `OnStart` and `Run` methods in a role in the [Application Startup Processes](https://msdn.microsoft.com/library/ff803371.aspx#sec16) section in the patterns & practices guide [Moving Applications to the Cloud](https://msdn.microsoft.com/library/ff728592.aspx). + +> Keep the code in the `OnStart` method as concise as possible. Azure doesn't impose any limit on the time taken for this method to complete, but the role won't be able to start responding to network requests sent to it until this method completes. + +When the `OnStart` method has finished, the role executes the `Run` method. At this point, the fabric controller can start sending requests to the role. + +Place the code that actually creates the tasks in the `Run` method. Note that the `Run` method defines the lifetime of the role instance. When this method completes, the fabric controller will arrange for the role to be shut down. + +When a role shuts down or is recycled, the fabric controller prevents any more incoming requests being received from the load balancer and raises the `Stop` event. You can capture this event by overriding the `OnStop` method of the role and perform any tidying up required before the role terminates. + +> Any actions performed in the `OnStop` method must be completed within five minutes (or 30 seconds if you are using the Azure emulator on a local computer). Otherwise the Azure fabric controller assumes that the role has stalled and will force it to stop. + +The tasks are started by the `Run` method that waits for the tasks to complete. The tasks implement the business logic of the cloud service, and can respond to messages posted to the role through the Azure load balancer. The figure shows the lifecycle of tasks and resources in a role in a Azure cloud service. + +![The lifecycle of tasks and resources in a role in a Azure cloud service](./_images/compute-resource-consolidation-lifecycle.png) + + +The _WorkerRole.cs_ file in the _ComputeResourceConsolidation.Worker_ project shows an example of how you might implement this pattern in a Azure cloud service. + +> The _ComputeResourceConsolidation.Worker_ project is part of the _ComputeResourceConsolidation_ solution available for download from [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/compute-resource-consolidation). + +In the worker role, code that runs when the role is initialized creates the required cancellation token and a list of tasks to run. + +```csharp +public class WorkerRole: RoleEntryPoint +{ + // The cancellation token source used to cooperatively cancel running tasks. + private readonly CancellationTokenSource cts = new CancellationTokenSource (); + + // List of tasks running on the role instance. + private readonly List tasks = new List(); + + // List of worker tasks to run on this role. + private readonly List> workerTasks + = new List> + { + MyWorkerTask1, + MyWorkerTask2 + }; + + ... +} +``` + +The `MyWorkerTask1` and the `MyWorkerTask2` methods illustrate how to perform different tasks within the same worker role. The following code shows `MyWorkerTask1`. This is a simple task that sleeps for 30 seconds and then outputs a trace message. It repeats this process until the task is canceled. The code in `MyWorkerTask2` is similar. + +```csharp +// A sample worker role task. +private static async Task MyWorkerTask1(CancellationToken ct) +{ + // Fixed interval to wake up and check for work and/or do work. + var interval = TimeSpan.FromSeconds(30); + + try + { + while (!ct.IsCancellationRequested) + { + // Wake up and do some background processing if not canceled. + // TASK PROCESSING CODE HERE + Trace.TraceInformation("Doing Worker Task 1 Work"); + + // Go back to sleep for a period of time unless asked to cancel. + // Task.Delay will throw an OperationCanceledException when canceled. + await Task.Delay(interval, ct); + } + } + catch (OperationCanceledException) + { + // Expect this exception to be thrown in normal circumstances or check + // the cancellation token. If the role instances are shutting down, a + // cancellation request will be signaled. + Trace.TraceInformation("Stopping service, cancellation requested"); + + // Rethrow the exception. + throw; + } +} +``` + +> The sample code shows a common implementation of a background process. In a real world application you can follow this same structure, except that you should place your own processing logic in the body of the loop that waits for the cancellation request. + +After the worker role has initialized the resources it uses, the `Run` method starts the two tasks concurrently, as shown here. + +```csharp +// RoleEntry Run() is called after OnStart(). +// Returning from Run() will cause a role instance to recycle. +public override void Run() +{ + // Start worker tasks and add them to the task list. + foreach (var worker in workerTasks) + tasks.Add(worker(cts.Token)); + + Trace.TraceInformation("Worker host tasks started"); + // The assumption is that all tasks should remain running and not return, + // similar to role entry Run() behavior. + try + { + Task.WaitAny(tasks.ToArray()); + } + catch (AggregateException ex) + { + Trace.TraceError(ex.Message); + + // If any of the inner exceptions in the aggregate exception + // are not cancellation exceptions then re-throw the exception. + ex.Handle(innerEx => (innerEx is OperationCanceledException)); + } + + // If there wasn't a cancellation request, stop all tasks and return from Run() + // An alternative to canceling and returning when a task exits would be to + // restart the task. + if (!cts.IsCancellationRequested) + { + Trace.TraceInformation("Task returned without cancellation request"); + Stop(TimeSpan.FromMinutes(5)); + } +} +... +``` + +In this example, the `Run` method waits for tasks to be completed. If a task is canceled, the `Run` method assumes that the role is being shut down and waits for the remaining tasks to be canceled before finishing (it waits for a maximum of five minutes before terminating). If a task fails due to an expected exception, the `Run` method cancels the task. + +> You could implement more comprehensive monitoring and exception handling strategies in the `Run` method such as restarting tasks that have failed, or including code that enables the role to stop and start individual tasks. + +The `Stop` method shown in the following code is called when the fabric controller shuts down the role instance (it's invoked from the `OnStop` method). The code stops each task gracefully by canceling it. If any task takes more than five minutes to complete, the cancellation processing in the `Stop` method ceases waiting and the role is terminated. + +```csharp +// Stop running tasks and wait for tasks to complete before returning +// unless the timeout expires. +private void Stop(TimeSpan timeout) +{ + Trace.TraceInformation("Stop called. Canceling tasks."); + // Cancel running tasks. + cts.Cancel(); + + Trace.TraceInformation("Waiting for canceled tasks to finish and return"); + + // Wait for all the tasks to complete before returning. Note that the + // emulator currently allows 30 seconds and Azure allows five + // minutes for processing to complete. + try + { + Task.WaitAll(tasks.ToArray(), timeout); + } + catch (AggregateException ex) + { + Trace.TraceError(ex.Message); + + // If any of the inner exceptions in the aggregate exception + // are not cancellation exceptions then rethrow the exception. + ex.Handle(innerEx => (innerEx is OperationCanceledException)); + } +} +``` + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: + +- [Autoscaling Guidance](https://msdn.microsoft.com/library/dn589774.aspx). Autoscaling can be used to start and stop instances of service hosting computational resources, depending on the anticipated demand for processing. + +- [Compute Partitioning Guidance](https://msdn.microsoft.com/library/dn589773.aspx). Describes how to allocate the services and components in a cloud service in a way that helps to minimize running costs while maintaining the scalability, performance, availability, and security of the service. + +- This pattern includes a downloadable [sample application](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/compute-resource-consolidation). diff --git a/docs/patterns/cqrs.md b/docs/patterns/cqrs.md new file mode 100644 index 00000000000..73bcd41bc35 --- /dev/null +++ b/docs/patterns/cqrs.md @@ -0,0 +1,257 @@ +--- +title: CQRS +description: Segregate operations that read data from operations that update data by using separate interfaces. +keywords: design pattern +services: '' +documentationcenter: na +author: dragon119 +manager: bennage +tags: '' + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [data-management, design-implementation, performance-scalability] + +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 12/14/2016 +ms.author: mwasson +--- + +# Command and Query Responsibility Segregation (CQRS) + +[!INCLUDE [header](../_includes/header.md)] + +Segregate operations that read data from operations that update data by using separate interfaces. This can maximize performance, scalability, and security. Supports the evolution of the system over time through higher flexibility, and prevent update commands from causing merge conflicts at the domain level. + +## Context and problem + +In traditional data management systems, both commands (updates to the data) and queries (requests for data) are executed against the same set of entities in a single data repository. These entities can be a subset of the rows in one or more tables in a relational database such as SQL Server. + +Typically in these systems, all create, read, update, and delete (CRUD) operations are applied to the same representation of the entity. For example, a data transfer object (DTO) representing a customer is retrieved from the data store by the data access layer (DAL) and displayed on the screen. A user updates some fields of the DTO (perhaps through data binding) and the DTO is then saved back in the data store by the DAL. The same DTO is used for both the read and write operations. The figure illustrates a traditional CRUD architecture. + +![A traditional CRUD architecture](./_images/command-and-query-responsibility-segregation-cqrs-tradition-crud.png) + +Traditional CRUD designs work well when only limited business logic is applied to the data operations. Scaffold mechanisms provided by development tools can create data access code very quickly, which can then be customized as required. + +However, the traditional CRUD approach has some disadvantages: + +- It often means that there's a mismatch between the read and write representations of the data, such as additional columns or properties that must be updated correctly even though they aren't required as part of an operation. + +- It risks data contention when records are locked in the data store in a collaborative domain, where multiple actors operate in parallel on the same set of data. Or update conflicts caused by concurrent updates when optimistic locking is used. These risks increase as the complexity and throughput of the system grows. In addition, the traditional approach can have a negative effect on performance due to load on the data store and data access layer, and the complexity of queries required to retrieve information. + +- It can make managing security and permissions more complex because each entity is subject to both read and write operations, which might expose data in the wrong context. + +> For a deeper understanding of the limits of the CRUD approach see [CRUD, Only When You Can Afford It](https://msdn.microsoft.com/library/ms978509.aspx). + +## Solution + +Command and Query Responsibility Segregation (CQRS) is a pattern that segregates the operations that read data (queries) from the operations that update data (commands) by using separate interfaces. This means that the data models used for querying and updates are different. The models can then be isolated, as shown in the following figure, although that's not an absolute requirement. + +![A basic CQRS architecture](./_images/command-and-query-responsibility-segregation-cqrs-basic.png) + +Compared to the single data model used in CRUD-based systems, the use of separate query and update models for the data in CQRS-based systems simplifies design and implementation. However, one disadvantage is that unlike CRUD designs, CQRS code can't automatically be generated using scaffold mechanisms. + +The query model for reading data and the update model for writing data can access the same physical store, perhaps by using SQL views or by generating projections on the fly. However, it's common to separate the data into different physical stores to maximize performance, scalability, and security, as shown in the next figure. + +![A CQRS architecture with separate read and write stores](./_images/command-and-query-responsibility-segregation-cqrs-separate-stores.png) + +The read store can be a read-only replica of the write store, or the read and write stores can have a different structure altogether. Using multiple read-only replicas of the read store can greatly increase query performance and application UI responsiveness, especially in distributed scenarios where read-only replicas are located close to the application instances. Some database systems (SQL Server) provide additional features such as failover replicas to maximize availability. + +Separation of the read and write stores also allows each to be scaled appropriately to match the load. For example, read stores typically encounter a much higher load than write stores. + +When the query/read model contains denormalized data (see [Materialized View pattern](materialized-view.md)), performance is maximized when reading data for each of the views in an application or when querying the data in the system. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +- Dividing the data store into separate physical stores for read and write operations can increase the performance and security of a system, but it can add complexity in terms of resiliency and eventual consistency. The read model store must be updated to reflect changes to the write model store, and it can be difficult to detect when a user has issued a request based on stale read data, which means that the operation can't be completed. + + > For a description of eventual consistency see the [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). + +- Consider applying CQRS to limited sections of your system where it will be most valuable. + +- A typical approach to deploying eventual consistency is to use event sourcing in conjunction with CQRS so that the write model is an append-only stream of events driven by execution of commands. These events are used to update materialized views that act as the read model. For more information see [Event Sourcing and CQRS](https://msdn.microsoft.com/library/dn568103.aspx#EventSourcingandCQRS). + +## When to use this pattern + +Use this pattern in the following situations: + +- Collaborative domains where multiple operations are performed in parallel on the same data. CQRS allows you to define commands with enough granularity to minimize merge conflicts at the domain level (any conflicts that do arise can be merged by the command), even when updating what appears to be the same type of data. + +- Task-based user interfaces where users are guided through a complex process as a series of steps or with complex domain models. Also, useful for teams already familiar with domain-driven design (DDD) techniques. The write model has a full command-processing stack with business logic, input validation, and business validation to ensure that everything is always consistent for each of the aggregates (each cluster of associated objects treated as a unit for data changes) in the write model. The read model has no business logic or validation stack and just returns a DTO for use in a view model. The read model is eventually consistent with the write model. + +- Scenarios where performance of data reads must be fine tuned separately from performance of data writes, especially when the read/write ratio is very high, and when horizontal scaling is required. For example, in many systems the number of read operations is many times greater that the number of write operations. To accommodate this, consider scaling out the read model, but running the write model on only one or a few instances. A small number of write model instances also helps to minimize the occurrence of merge conflicts. + +- Scenarios where one team of developers can focus on the complex domain model that is part of the write model, and another team can focus on the read model and the user interfaces. + +- Scenarios where the system is expected to evolve over time and might contain multiple versions of the model, or where business rules change regularly. + +- Integration with other systems, especially in combination with event sourcing, where the temporal failure of one subsystem shouldn't affect the availability of the others. + +This pattern isn't recommended in the following situations: + +- Where the domain or the business rules are simple. + +- Where a simple CRUD-style user interface and the related data access operations are sufficient. + +- For implementation across the whole system. There are specific components of an overall data management scenario where CQRS can be useful, but it can add considerable and unnecessary complexity when it isn't required. + +## Event Sourcing and CQRS + +The CQRS pattern is often used along with the Event Sourcing pattern. CQRS-based systems use separate read and write data models, each tailored to relevant tasks and often located in physically separate stores. When used with the [Event Sourcing](event-sourcing.md) pattern, the store of events is the write model, and is the official source of information. The read model of a CQRS-based system provides materialized views of the data, typically as highly denormalized views. These views are tailored to the interfaces and display requirements of the application, which helps to maximize both display and query performance. + +Using the stream of events as the write store, rather than the actual data at a point in time, avoids update conflicts on a single aggregate and maximizes performance and scalability. The events can be used to asynchronously generate materialized views of the data that are used to populate the read store. + +Because the event store is the official source of information, it is possible to delete the materialized views and replay all past events to create a new representation of the current state when the system evolves, or when the read model must change. The materialized views are in effect a durable read-only cache of the data. + +When using CQRS combined with the Event Sourcing pattern, consider the following: + +- As with any system where the write and read stores are separate, systems based on this pattern are only eventually consistent. There will be some delay between the event being generated and the data store being updated. + +- The pattern adds complexity because code must be created to initiate and handle events, and assemble or update the appropriate views or objects required by queries or a read model. The complexity of the CQRS pattern when used with the Event Sourcing pattern can make a successful implementation more difficult, and requires a different approach to designing systems. However, event sourcing can make it easier to model the domain, and makes it easier to rebuild views or create new ones because the intent of the changes in the data is preserved. + +- Generating materialized views for use in the read model or projections of the data by replaying and handling the events for specific entities or collections of entities can require significant processing time and resource usage. This is especially true if it requires summation or analysis of values over long periods, because all the associated events might need to be examined. Resolve this by implementing snapshots of the data at scheduled intervals, such as a total count of the number of a specific action that have occurred, or the current state of an entity. + +## Example + +The following code shows some extracts from an example of a CQRS implementation that uses different definitions for the read and the write models. The model interfaces don't dictate any features of the underlying data stores, and they can evolve and be fine-tuned independently because these interfaces are separated. + +The following code shows the read model definition. + +```csharp +// Query interface +namespace ReadModel +{ + public interface ProductsDao + { + ProductDisplay FindById(int productId); + IEnumerable FindByName(string name); + IEnumerable FindOutOfStockProducts(); + IEnumerable FindRelatedProducts(int productId); + } + + public class ProductDisplay + { + public int ID { get; set; } + public string Name { get; set; } + public string Description { get; set; } + public decimal UnitPrice { get; set; } + public bool IsOutOfStock { get; set; } + public double UserRating { get; set; } + } + + public class ProductInventory + { + public int ID { get; set; } + public string Name { get; set; } + public int CurrentStock { get; set; } + } +} +``` + +The system allows users to rate products. The application code does this using the `RateProduct` command shown in the following code. + +```csharp +public interface Icommand +{ + Guid Id { get; } +} + +public class RateProduct : Icommand +{ + public RateProduct() + { + this.Id = Guid.NewGuid(); + } + public Guid Id { get; set; } + public int ProductId { get; set; } + public int rating { get; set; } + public int UserId {get; set; } +} +``` + +The system uses the `ProductsCommandHandler` class to handle commands sent by the application. Clients typically send commands to the domain through a messaging system such as a queue. The command handler accepts these commands and invokes methods of the domain interface. The granularity of each command is designed to reduce the chance of conflicting requests. The following code shows an outline of the `ProductsCommandHandler` class. + +```csharp +public class ProductsCommandHandler : + ICommandHandler, + ICommandHandler, + ICommandHandler, + ICommandHandler, + ICommandHandler +{ + private readonly IRepository repository; + + public ProductsCommandHandler (IRepository repository) + { + this.repository = repository; + } + + void Handle (AddNewProduct command) + { + ... + } + + void Handle (RateProduct command) + { + var product = repository.Find(command.ProductId); + if (product != null) + { + product.RateProuct(command.UserId, command.rating); + repository.Save(product); + } + } + + void Handle (AddToInventory command) + { + ... + } + + void Handle (ConfirmItemsShipped command) + { + ... + } + + void Handle (UpdateStockFromInventoryRecount command) + { + ... + } +} +``` + +The following code shows the `IProductsDomain` interface from the write model. + +```csharp +public interface IProductsDomain +{ + void AddNewProduct(int id, string name, string description, decimal price); + void RateProduct(int userId int rating); + void AddToInventory(int productId, int quantity); + void ConfirmItemsShipped(int productId, int quantity); + void UpdateStockFromInventoryRecount(int productId, int updatedQuantity); +} +``` + +Also notice how the `IProductsDomain` interface contains methods that have a meaning in the domain. Typically, in a CRUD environment these methods would have generic names such as `Save` or `Update`, and have a DTO as the only argument. The CQRS approach can be designed to meet the needs of this organization's business and inventory management systems. + +## Related patterns and guidance + +The following patterns and guidance are useful when implementing this pattern: + +- [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). Explains the issues that are typically encountered due to eventual consistency between the read and write data stores when using the CQRS pattern, and how these issues can be resolved. + +- [Data Partitioning Guidance](https://msdn.microsoft.com/library/dn589795.aspx). Describes how the read and write data stores used in the CQRS pattern can be divided into partitions that can be managed and accessed separately to improve scalability, reduce contention, and optimize performance. + +- [Event Sourcing Pattern](event-sourcing.md). Describes in more detail how Event Sourcing can be used with the CQRS pattern to simplify tasks in complex domains while improving performance, scalability, and responsiveness. As well as how to provide consistency for transactional data while maintaining full audit trails and history that can enable compensating actions. + +- [Materialized View Pattern](materialized-view.md). The read model of a CQRS implementation can contain materialized views of the write model data, or the read model can be used to generate materialized views. + +- The patterns & practices guide [CQRS Journey](http://aka.ms/cqrs). In particular, [Introducing the Command Query Responsibility Segregation Pattern](https://msdn.microsoft.com/library/jj591573.aspx) explores the pattern and when it's useful, and [Epilogue: Lessons Learned](https://msdn.microsoft.com/library/jj591568.aspx) helps you understand some of the issues that come up when using this pattern. + +- The post [CQRS by Martin Fowler](http://martinfowler.com/bliki/CQRS.html), which explains the basics of the pattern and links to other useful resources. + +- [Greg Young’s posts](http://codebetter.com/gregyoung/), which explore many aspects of the CQRS pattern. diff --git a/docs/patterns/event-sourcing.md b/docs/patterns/event-sourcing.md new file mode 100644 index 00000000000..dbacd1839f7 --- /dev/null +++ b/docs/patterns/event-sourcing.md @@ -0,0 +1,167 @@ +--- +title: Event Sourcing +description: Use an append-only store to record the full series of events that describe actions taken on data in a domain. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [data-management, performance-scalability] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Event Sourcing + +[!INCLUDE [header](../_includes/header.md)] + +Instead of storing just the current state of the data in a domain, use an append-only store to record the full series of actions taken on that data. +The store acts as the system of record and can be used to materialize the domain objects. This can simplify tasks in complex domains, by avoiding the need to synchronize the data model and the business domain, while improving performance, scalability, and responsiveness. It can also provide consistency for transactional data, and maintain full audit trails and history that can enable compensating actions. + +## Context and problem + +Most applications work with data, and the typical approach is for the application to maintain the current state of the data by updating it as users work with it. For example, in the traditional create, read, update, and delete (CRUD) model a typical data process is to read data from the store, make some modifications to it, and update the current state of the data with the new values—often by using transactions that lock the data. + +The CRUD approach has some limitations: + +- CRUD systems perform update operations directly against a data store, which can slow down performance and responsiveness, and limit scalability, due to the processing overhead it requires. + +- In a collaborative domain with many concurrent users, data update conflicts are more likely because the update operations take place on a single item of data. + +- Unless there's an additional auditing mechanism that records the details of each operation in a separate log, history is lost. + +> For a deeper understanding of the limits of the CRUD approach see [CRUD, Only When You Can Afford It](https://msdn.microsoft.com/library/ms978509.aspx). + +## Solution + +The Event Sourcing pattern defines an approach to handling operations on data that's driven by a sequence of events, each of which is recorded in an append-only store. Application code sends a series of events that imperatively describe each action that has occurred on the data to the event store, where they're persisted. Each event represents a set of changes to the data (such as `AddedItemToOrder`). + +The events are persisted in an event store that acts as the system of record (the authoritative data source) about the current state of the data. The event store typically publishes these events so that consumers can be notified and can handle them if needed. Consumers could, for example, initiate tasks that apply the operations in the events to other systems, or perform any other associated action that's required to complete the operation. Notice that the application code that generates the events is decoupled from the systems that subscribe to the events. + +Typical uses of the events published by the event store are to maintain materialized views of entities as actions in the application change them, and for integration with external systems. For example, a system can maintain a materialized view of all customer orders that's used to populate parts of the UI. As the application adds new orders, adds or removes items on the order, and adds shipping information, the events that describe these changes can be handled and used to update the [materialized view](materialized-view.md). + +In addition, at any point it's possible for applications to read the history of events, and use it to materialize the current state of an entity by playing back and consuming all the events related to that entity. This can occur on demand to materialize a domain object when handling a request, or through a scheduled task so that the state of the entity can be stored as a materialized view to support the presentation layer. + +The figure shows an overview of the pattern, including some of the options for using the event stream such as creating a materialized view, integrating events with external applications and systems, and replaying events to create projections of the current state of specific entities. + +![An overview and example of the Event Sourcing pattern](./_images/event-sourcing-overview.png) + + +The Event Sourcing pattern provides the following advantages: + +Events are immutable and can be stored using an append-only operation. The user interface, workflow, or process that initiated an event can continue, and tasks that handle the events can run in the background. This, combined with the fact that there's no contention during the processing of transactions, can vastly improve performance and scalability for applications, especially for the presentation level or user interface. + +Events are simple objects that describe some action that occurred, together with any associated data required to describe the action represented by the event. Events don't directly update a data store. They're simply recorded for handling at the appropriate time. This can simplify implementation and management. + +Events typically have meaning for a domain expert, whereas [object-relational impedance mismatch](https://en.wikipedia.org/wiki/Object-relational_impedance_mismatch) can make complex database tables hard to understand. Tables are artificial constructs that represent the current state of the system, not the events that occurred. + +Event sourcing can help prevent concurrent updates from causing conflicts because it avoids the requirement to directly update objects in the data store. However, the domain model must still be designed to protect itself from requests that might result in an inconsistent state. + +The append-only storage of events provides an audit trail that can be used to monitor actions taken against a data store, regenerate the current state as materialized views or projections by replaying the events at any time, and assist in testing and debugging the system. In addition, the requirement to use compensating events to cancel changes provides a history of changes that were reversed, which wouldn't be the case if the model simply stored the current state. The list of events can also be used to analyze application performance and detect user behavior trends, or to obtain other useful business information. + +The event store raises events, and tasks perform operations in response to those events. This decoupling of the tasks from the events provides flexibility and extensibility. Tasks know about the type of event and the event data, but not about the operation that triggered the event. In addition, multiple tasks can handle each event. This enables easy integration with other services and systems that only listen for new events raised by the event store. However, the event sourcing events tend to be very low level, and it might be necessary to generate specific integration events instead. + +> Event sourcing is commonly combined with the CQRS pattern by performing the data management tasks in response to the events, and by materializing views from the stored events. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +The system will only be eventually consistent when creating materialized views or generating projections of data by replaying events. There's some delay between an application adding events to the event store as the result of handling a request, the events being published, and consumers of the events handling them. During this period, new events that describe further changes to entities might have arrived at the event store. + +> [!NOTE] +> See the [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx) for information about eventual consistency. + +The event store is the permanent source of information, and so the event data should never be updated. The only way to update an entity to undo a change is to add a compensating event to the event store. If the format (rather than the data) of the persisted events needs to change, perhaps during a migration, it can be difficult to combine existing events in the store with the new version. It might be necessary to iterate through all the events making changes so they're compliant with the new format, or add new events that use the new format. Consider using a version stamp on each version of the event schema to maintain both the old and the new event formats. + +Multi-threaded applications and multiple instances of applications might be storing events in the event store. The consistency of events in the event store is vital, as is the order of events that affect a specific entity (the order that changes occur to an entity affects its current state). Adding a timestamp to every event can help to avoid issues. Another common practice is to annotate each event resulting from a request with an incremental identifier. If two actions attempt to add events for the same entity at the same time, the event store can reject an event that matches an existing entity identifier and event identifier. + +There's no standard approach, or existing mechanisms such as SQL queries, for reading the events to obtain information. The only data that can be extracted is a stream of events using an event identifier as the criteria. The event ID typically maps to individual entities. The current state of an entity can be determined only by replaying all of the events that relate to it against the original state of that entity. + +The length of each event stream affects managing and updating the system. If the streams are large, consider creating snapshots at specific intervals such as a specified number of events. The current state of the entity can be obtained from the snapshot and by replaying any events that occurred after that point in time. For more information about creating snapshots of data, see [Snapshot on Martin Fowler’s Enterprise Application Architecture website](http://martinfowler.com/eaaDev/Snapshot.html) and [Master-Subordinate Snapshot Replication](https://msdn.microsoft.com/library/ff650012.aspx). + +Even though event sourcing minimizes the chance of conflicting updates to the data, the application must still be able to deal with inconsistencies that result from eventual consistency and the lack of transactions. For example, an event that indicates a reduction in stock inventory might arrive in the data store while an order for that item is being placed, resulting in a requirement to reconcile the two operations either by advising the customer or creating a back order. + +Event publication might be “at least once,” and so consumers of the events must be idempotent. They must not reapply the update described in an event if the event is handled more than once. For example, if multiple instances of a consumer maintain an aggregate an entity's property, such as the total number of orders placed, only one must succeed in incrementing the aggregate when an order placed event occurs. While this isn't a key characteristic of event sourcing, it's the usual implementation decision. + +## When to use this pattern + +Use this pattern in the following scenarios: + +- When you want to capture intent, purpose, or reason in the data. For example, changes to a customer entity can be captured as a series of specific event types such as _Moved home_, _Closed account_, or _Deceased_. + +- When it's vital to minimize or completely avoid the occurrence of conflicting updates to data. + +- When you want to record events that occur, and be able to replay them to restore the state of a system, roll back changes, or keep a history and audit log. For example, when a task involves multiple steps you might need to execute actions to revert updates and then replay some steps to bring the data back into a consistent state. + +- When using events is a natural feature of the operation of the application, and requires little additional development or implementation effort. + +- When you need to decouple the process of inputting or updating data from the tasks required to apply these actions. This might be to improve UI performance, or to distribute events to other listeners that take action when the events occur. For example, integrating a payroll system with an expense submission website so that events raised by the event store in response to data updates made in the website are consumed by both the website and the payroll system. + +- When you want flexibility to be able to change the format of materialized models and entity data if requirements change, or—when used in conjunction with CQRS—you need to adapt a read model or the views that expose the data. + +- When used in conjunction with CQRS, and eventual consistency is acceptable while a read model is updated, or the performance impact of rehydrating entities and data from an event stream is acceptable. + +This pattern might not be useful in the following situations: + +- Small or simple domains, systems that have little or no business logic, or nondomain systems that naturally work well with traditional CRUD data management mechanisms. + +- Systems where consistency and real-time updates to the views of the data are required. + +- Systems where audit trails, history, and capabilities to roll back and replay actions are not required. + +- Systems where there's only a very low occurrence of conflicting updates to the underlying data. For example, systems that predominantly add data rather than updating it. + +## Example + +A conference management system needs to track the number of completed bookings for a conference so that it can check whether there are seats still available when a potential attendee tries to make a booking. The system could store the total number of bookings for a conference in at least two ways: + +- The system could store the information about the total number of bookings as a separate entity in a database that holds booking information. As bookings are made or canceled, the system could increment or decrement this number as appropriate. This approach is simple in theory, but can cause scalability issues if a large number of attendees are attempting to book seats during a short period of time. For example, in the last day or so prior to the booking period closing. + +- The system could store information about bookings and cancellations as events held in an event store. It could then calculate the number of seats available by replaying these events. This approach can be more scalable due to the immutability of events. The system only needs to be able to read data from the event store, or append data to the event store. Event information about bookings and cancellations is never modified. + +The following diagram illustrates how the seat reservation subsystem of the conference management system might be implemented using event sourcing. + +![Using event sourcing to capture information about seat reservations in a conference management system](./_images/event-sourcing-bounded-context.png) + + +The sequence of actions for reserving two seats is as follows: + +1. The user interface issues a command to reserve seats for two attendees. The command is handled by a separate command handler. A piece of logic that is decoupled from the user interface and is responsible for handling requests posted as commands. + +2. An aggregate containing information about all reservations for the conference is constructed by querying the events that describe bookings and cancellations. This aggregate is called `SeatAvailability`, and is contained within a domain model that exposes methods for querying and modifying the data in the aggregate. + + > Some optimizations to consider are using snapshots (so that you don’t need to query and replay the full list of events to obtain the current state of the aggregate), and maintaining a cached copy of the aggregate in memory. + +3. The command handler invokes a method exposed by the domain model to make the reservations. + +4. The `SeatAvailability` aggregate records an event containing the number of seats that were reserved. The next time the aggregate applies events, all the reservations will be used to compute how many seats remain. + +5. The system appends the new event to the list of events in the event store. + +If a user cancels a seat, the system follows a similar process except the command handler issues a command that generates a seat cancellation event and appends it to the event store. + +As well as providing more scope for scalability, using an event store also provides a complete history, or audit trail, of the bookings and cancellations for a conference. The events in the event store are the accurate record. There is no need to persist aggregates in any other way because the system can easily replay the events and restore the state to any point in time. + +> You can find more information about this example in [Introducing Event Sourcing](https://msdn.microsoft.com/library/jj591559.aspx). + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: + +- [Command and Query Responsibility Segregation (CQRS) Pattern](cqrs.md). The write store that provides the permanent source of information for a CQRS implementation is often based on an implementation of the Event Sourcing pattern. Describes how to segregate the operations that read data in an application from the operations that update data by using separate interfaces. + +- [Materialized View Pattern](materialized-view.md). The data store used in a system based on event sourcing is typically not well suited to efficient querying. Instead, a common approach is to generate prepopulated views of the data at regular intervals, or when the data changes. Shows how this can be done. + +- [Compensating Transaction Pattern](materialized-view.md). The existing data in an event sourcing store is not updated, instead new entries are added that transition the state of entities to the new values. To reverse a change, compensating entries are used because it isn't possible to simply reverse the previous change. Describes how to undo the work that was performed by a previous operation. + +- [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). When using event sourcing with a separate read store or materialized views, the read data won't be immediately consistent, instead it'll be only eventually consistent. Summarizes the issues surrounding maintaining consistency over distributed data. + +- [Data Partitioning Guidance](https://msdn.microsoft.com/library/dn589795.aspx). Data is often partitioned when using event sourcing to improve scalability, reduce contention, and optimize performance. Describes how to divide data into discrete partitions, and the issues that can arise. + +- Greg Young’s post [Why use Event Sourcing?](http://codebetter.com/gregyoung/2010/02/20/why-use-event-sourcing/). diff --git a/docs/patterns/external-configuration-store.md b/docs/patterns/external-configuration-store.md new file mode 100644 index 00000000000..6a14d82fef6 --- /dev/null +++ b/docs/patterns/external-configuration-store.md @@ -0,0 +1,340 @@ +--- +title: External Configuration Store +description: Move configuration information out of the application deployment package to a centralized location. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [design-implementation, management-monitoring] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# External Configuration Store + +[!INCLUDE [header](../_includes/header.md)] + +Move configuration information out of the application deployment package to a centralized location. This can provide opportunities for easier management and control of configuration data, and for sharing configuration data across applications and application instances. + +## Context and problem + +The majority of application runtime environments include configuration information that's held in files deployed with the application. In some cases, it's possible to edit these files to change the application behavior after it's been deployed. However, changes to the configuration require the application be redeployed, often resulting in unacceptable downtime and other administrative overhead. + +Local configuration files also limit the configuration to a single application, but sometimes it would be useful to share configuration settings across multiple applications. Examples include database connection strings, UI theme information, or the URLs of queues and storage used by a related set of applications. + +It's challenging to manage changes to local configurations across multiple running instances of the application, especially in a cloud-hosted scenario. It can result in instances using different configuration settings while the update is being deployed. + +In addition, updates to applications and components might require changes to configuration schemas. Many configuration systems don't support different versions of configuration information. + +## Solution + +Store the configuration information in external storage, and provide an interface that can be used to quickly and efficiently read and update configuration settings. The type of external store depends on the hosting and runtime environment of the application. In a cloud-hosted scenario it's typically a cloud-based storage service, but could be a hosted database or other system. + +The backing store you choose for configuration information should have an interface that provides consistent and easy-to-use access. It should expose the information in a correctly typed and structured format. The implementation might also need to authorize users’ access in order to protect configuration data, and be flexible enough to allow storage of multiple versions of the configuration (such as development, staging, or production, including multiple release versions of each one). + +> Many built-in configuration systems read the data when the application starts up, and cache the data in memory to provide fast access and minimize the impact on application performance. Depending on the type of backing store used, and the latency of this store, it might be helpful to implement a caching mechanism within the external configuration store. For more information, see the [Caching Guidance](https://msdn.microsoft.com/library/dn589802.aspx). The figure illustrates an overview of the External Configuration Store pattern with optional local cache. + +![An overview of the External Configuration Store pattern with optional local cache](./_images/external-configuration-store-overview.png) + + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +Choose a backing store that offers acceptable performance, high availability, robustness, and can be backed up as part of the application maintenance and administration process. In a cloud-hosted application, using a cloud storage mechanism is usually a good choice to meet these requirements. + +Design the schema of the backing store to allow flexibility in the types of information it can hold. Ensure that it provides for all configuration requirements such as typed data, collections of settings, multiple versions of settings, and any other features that the applications using it require. The schema should be easy to extend to support additional settings as requirements change. + +Consider the physical capabilities of the backing store, how it relates to the way configuration information is stored, and the effects on performance. For example, storing an XML document containing configuration information will require either the configuration interface or the application to parse the document in order to read individual settings. It'll make updating a setting more complicated, though caching the settings can help to offset slower read performance. + +Consider how the configuration interface will permit control of the scope and inheritance of configuration settings. For example, it might be a requirement to scope configuration settings at the organization, application, and the machine level. It might need to support delegation of control over access to different scopes, and to prevent or allow individual applications to override settings. + +Ensure that the configuration interface can expose the configuration data in the required formats such as typed values, collections, key/value pairs, or property bags. + +Consider how the configuration store interface will behave when settings contain errors, or don't exist in the backing store. It might be appropriate to return default settings and log errors. Also consider aspects such as the case sensitivity of configuration setting keys or names, the storage and handling of binary data, and the ways that null or empty values are handled. + +Consider how to protect the configuration data to allow access to only the appropriate users and applications. This is likely a feature of the configuration store interface, but it's also necessary to ensure that the data in the backing store can't be accessed directly without the appropriate permission. Ensure strict separation between the permissions required to read and to write configuration data. Also consider whether you need to encrypt some or all of the configuration settings, and how this'll be implemented in the configuration store interface. + +Centrally stored configurations, which change application behavior during runtime, are critically important and should be deployed, updated, and managed using the same mechanisms as deploying application code. For example, changes that can affect more than one application must be carried out using a full test and staged deployment approach to ensure that the change is appropriate for all applications that use this configuration. If an administrator edits a setting to update one application, it could adversely impact other applications that use the same setting. + +If an application caches configuration information, the application needs to be alerted if the configuration changes. It might be possible to implement an expiration policy over cached configuration data so that this information is automatically refreshed periodically and any changes picked up (and acted on). The [Runtime Reconfiguration pattern](runtime-reconfiguration.md) might be relevant to your scenario. + +## When to use this pattern + +This pattern is useful for: + +- Configuration settings that are shared between multiple applications and application instances, or where a standard configuration must be enforced across multiple applications and application instances. + +- A standard configuration system that doesn't support all of the required configuration settings, such as storing images or complex data types. + +- As a complementary store for some of the settings for applications, perhaps allowing applications to override some or all of the centrally-stored settings. + +- As a way to simplify administration of multiple applications, and optionally for monitoring use of configuration settings by logging some or all types of access to the configuration store. + +## Example + +In a Microsoft Azure hosted application, a typical choice for storing configuration information externally is to use Azure Storage. This is resilient, offers high performance, and is replicated three times with automatic failover to offer high availability. Azure Table storage provides a key/value store with the ability to use a flexible schema for the values. Azure Blob storage provides a hierarchical, container-based store that can hold any type of data in individually named blobs. + +The following example shows how a configuration store can be implemented over Blob storage to store and expose configuration information. The `BlobSettingsStore` class abstracts Blob storage for holding configuration information, and implements the `ISettingsStore` interface shown in the following code. + +> This code is provided in the _ExternalConfigurationStore.Cloud_ project in the _ExternalConfigurationStore_ solution, available from [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/external-configuration-store). + +```csharp +public interface IsettingsStore +{ + string Version { get; } + + Dictionary FindAll(); + + void Update(string key, string value); +} +``` + +This interface defines methods for retrieving and updating configuration settings held in the configuration store, and includes a version number that can be used to detect whether any configuration settings have been modified recently. The `BlobSettingsStore` class uses the `ETag` property of the blob to implement versioning. The `ETag` property is updated automatically each time the blob is written. + +> By design, this simple solution exposes all configuration settings as string values rather than typed values. + +The `ExternalConfigurationManager` class provides a wrapper around a `BlobSettingsStore` object. An application can use this class to store and retrieve configuration information. This class uses the Microsoft [Reactive Extensions](https://msdn.microsoft.com/library/hh242985.aspx) library to expose any changes made to the configuration through an implementation of the `IObservable` interface. If a setting is modified by calling the `SetAppSetting` method, the `Changed` event is raised and all subscribers to this event will be notified. + +Note that all settings are also cached in a `Dictionary` object inside the `ExternalConfigurationManager` class for fast access. The `SetAppSetting` method updates this cache, and the `GetSetting` method used to retrieve a configuration setting reads the data from the cache. If the setting isn't found in the cache, it's retrieved from the `BlobSettingsStore` object instead. + +The `GetSettings` method invokes the `CheckForConfigurationChanges` method to detect whether the configuration information in blob storage has changed. It does this by examining the version number and comparing it with the current version number held by the `ExternalConfigurationManager` object. If one or more changes have occurred, the `Changed` event is raised and the configuration settings cached in the `Dictionary` object are refreshed. This is an application of the [Cache-Aside pattern](cache-aside.md). + +The following code sample shows how the `Changed` event, the `SetAppSettings` method, the `GetSettings` method, and the `CheckForConfigurationChanges` method are implemented: + +```csharp +public class ExternalConfigurationManager : IDisposable +{ + // An abstraction of the configuration store. + private readonly ISettingsStore settings; + private readonly ISubject> changed; + ... + private Dictionary settingsCache; + private string currentVersion; + ... + public ExternalConfigurationManager(ISettingsStore settings, ...) + { + this.settings = settings; + ... + } + ... + public IObservable> Changed + { + get { return this.changed.AsObservable(); } + } + ... + public void SetAppSetting(string key, string value) + { + ... + // Update the setting in the store. + this.settings.Update(key, value); + + // Publish the event. + this.Changed.OnNext( + new KeyValuePair(key, value)); + + // Refresh the settings cache. + this.CheckForConfigurationChanges(); + } + + public string GetAppSetting(string key) + { + ... + // Try to get the value from the settings cache. + // If there's a miss, get the setting from the settings store. + string value; + if (this.settingsCache.TryGetValue(key, out value)) + { + return value; + } + + // Check for changes and refresh the cache. + this.CheckForConfigurationChanges(); + + return this.settingsCache[key]; + } + ... + private void CheckForConfigurationChanges() + { + try + { + + // Assume that updates are infrequent. Lock to avoid + // race conditions when refreshing the cache. + lock (this.settingsSyncObject) + { { + var latestVersion = this.settings.Version; + + // If the versions differ, the configuration has changed. + if (this.currentVersion != latestVersion) + { + // Get the latest settings from the settings store and publish the changes. + var latestSettings = this.settings.FindAll(); + latestSettings.Except(this.settingsCache).ToList().ForEach( + kv => this.changed.OnNext(kv)); + + // Update the current version. + this.currentVersion = latestVersion; + + // Refresh settings cache. + this.settingsCache = latestSettings; + } + } + } + catch (Exception ex) + { + this.changed.OnError(ex); + } + } +} +``` + +> The `ExternalConfigurationManager` class also provides a property named `Environment`. This property supports varying configurations for an application running in different environments, such as staging and production. + +An `ExternalConfigurationManager` object can also query the `BlobSettingsStore` object periodically for any changes (using a timer). The `StartMonitor` and `StopMonitor` methods illustrated in the code sample below start and stop the timer. The `OnTimerElapsed` method runs when the timer expires and invokes the `CheckForConfigurationChanges` method to detect any changes and raise the `Changed` event, as described earlier. + +```csharp +public class ExternalConfigurationManager : IDisposable +{ + ... + private readonly ISubject> changed; + private readonly Timer timer; + private ISettingsStore settings; + ... + public ExternalConfigurationManager(ISettingsStore settings, + TimeSpan interval, ...) + { + ... + + // Set up the timer. + this.timer = new Timer(interval.TotalMilliseconds) + { + AutoReset = false; + }; + this.timer.Elapsed += this.OnTimerElapsed; + + this.changed = new Subject>(); + ... + } + + ... + + public void StartMonitor() + { + if (this.timer.Enabled) + { + return; + } + + lock (this.timerSyncObject) + { + if (this.timer.Enabled) + { + return; + } + this.keepMonitoring = true; + + // Load the local settings cache. + this.CheckForConfigurationChanges(); + + this.timer.Start(); + } + } + + public void StopMonitor() + { + lock (this.timerSyncObject) + { + this.keepMonitoring = false; + this.timer.Stop(); + } + } + + private void OnTimerElapsed(object sender, EventArgs e) + { + Trace.TraceInformation( + "Configuration Manager: checking for configuration changes."); + + try + { + this.CheckForConfigurationChanges(); + } + finally + { + ... + // Restart the timer after each interval. + this.timer.Start(); + ... + } + } + ... +} +``` + +The `ExternalConfigurationManager` class is instantiated as a singleton instance by the `ExternalConfiguration` class shown below. + +```csharp +public static class ExternalConfiguration +{ + private static readonly Lazy configuredInstance + = new Lazy( + () => + { + var environment = CloudConfigurationManager.GetSetting("environment"); + return new ExternalConfigurationManager(environment); + }); + + public static ExternalConfigurationManager Instance + { + get { return configuredInstance.Value; } + } +} +``` + +The following code is taken from the `WorkerRole` class in the _ExternalConfigurationStore.Cloud_ project. It shows how the application uses the `ExternalConfiguration` class to read and update a setting. + +```csharp +public override void Run() +{ + // Start monitoring for configuration changes. + ExternalConfiguration.Instance.StartMonitor(); + + // Get a setting. + var setting = ExternalConfiguration.Instance.GetAppSetting("setting1"); + Trace.TraceInformation("Worker Role: Get setting1, value: " + setting); + + Thread.Sleep(TimeSpan.FromSeconds(10)); + + // Update a setting. + Trace.TraceInformation("Worker Role: Updating configuration"); + ExternalConfiguration.Instance.SetAppSetting("setting1", "new value"); + + this.completeEvent.WaitOne(); +} +The following code, also from the `WorkerRole` class, shows how the application subscribes to configuration events. +C# +public override bool OnStart() +{ + ... + // Subscribe to the event. + ExternalConfiguration.Instance.Changed.Subscribe( + m => Trace.TraceInformation("Configuration has changed. Key:{0} Value:{1}", + m.Key, m.Value), + ex => Trace.TraceError("Error detected: " + ex.Message)); + ... +} +``` + +## Related patterns and guidance + +The following information might also be relevant when implementing this pattern: + +- [Runtime Reconfiguration pattern](runtime-reconfiguration.md). In addition to storing configuration settings externally, it's useful to be able to update them and have the changes applied without restarting the application. Describes how to design an application so that it can be reconfigured without requiring redeployment or restarting. + +- A sample that demonstrates this pattern is available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/external-configuration-store). diff --git a/docs/patterns/federated-identity.md b/docs/patterns/federated-identity.md new file mode 100644 index 00000000000..259734aef22 --- /dev/null +++ b/docs/patterns/federated-identity.md @@ -0,0 +1,101 @@ +--- +title: Federated Identity +description: Delegate authentication to an external identity provider. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [security] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Federated Identity + +[!INCLUDE [header](../_includes/header.md)] + +Delegate authentication to an external identity provider. This can simplify development, minimize the requirement for user administration, and improve the user experience of the application. + +## Context and problem + +Users typically need to work with multiple applications provided and hosted by different organizations they have a business relationship with. These users might be required to use specific (and different) credentials for each one. This can: + +- **Cause a disjointed user experience**. Users often forget sign-in credentials when they have many different ones. + +- **Expose security vulnerabilities**. When a user leaves the company the account must immediately be deprovisioned. It's easy to overlook this in large organizations. + +- **Complicate user management**. Administrators must manage credentials for all of the users, and perform additional tasks such as providing password reminders. + +Users typically prefer to use the same credentials for all these applications. + +## Solution + +Implement an authentication mechanism that can use federated identity. Separate user authentication from the application code, and delegate authentication to a trusted identity provider. This can simplify development and allow users to authenticate using a wider range of identity providers (IdP) while minimizing the administrative overhead. It also allows you to clearly decouple authentication from authorization. + +The trusted identity providers include corporate directories, on-premises federation services, other security token services (STS) provided by business partners, or social identity providers that can authenticate users who have, for example, a Microsoft, Google, Yahoo!, or Facebook account. + +The figure illustrates the Federated Identity pattern when a client application needs to access a service that requires authentication. The authentication is performed by an IdP that works in concert with an STS. The IdP issues security tokens that provide information about the authenticated user. This information, referred to as claims, includes the user’s identity, and might also include other information such as role membership and more granular access rights. + +![An overview of federated authentication](./_images/federated-identity-overview.png) + + +This model is often called claims-based access control. Applications and services authorize access to features and functionality based on the claims contained in the token. The service that requires authentication must trust the IdP. The client application contacts the IdP that performs the authentication. If the authentication is successful, the IdP returns a token containing the claims that identify the user to the STS (note that the IdP and STS can be the same service). The STS can transform and augment the claims in the token based on predefined rules, before returning it to the client. The client application can then pass this token to the service as proof of its identity. + +> There might be additional STSs in the chain of trust. For example, in the scenario described later, an on-premises STS trusts another STS that is responsible for accessing an identity provider to authenticate the user. This approach is common in enterprise scenarios where there's an on-premises STS and directory. + +Federated authentication provides a standards-based solution to the issue of trusting identities across diverse domains, and can support single sign-on. It's becoming more common across all types of applications, especially cloud-hosted applications, because it supports single sign-on without requiring a direct network connection to identity providers. The user doesn't have to enter credentials for every application. This increases security because it prevents the creation of credentials required to access many different applications, and it also hides the user’s credentials from all but the original identity provider. Applications see just the authenticated identity information contained within the token. + +Federated identity also has the major advantage that management of the identity and credentials is the responsibility of the identity provider. The application or service doesn't need to provide identity management features. In addition, in corporate scenarios, the corporate directory doesn't need to know about the user if it trusts the identity provider. This removes all the administrative overhead of managing the user identity within the directory. + +## Issues and considerations + +Consider the following when designing applications that implement federated authentication: + +- Authentication can be a single point of failure. If you deploy your application to multiple datacenters, consider deploying your identity management mechanism to the same datacenters to maintain application reliability and availability. + +- Authentication tools make it possible to configure access control based on role claims contained in the authentication token. This is often referred to as role-based access control (RBAC), and it can allow a more granular level of control over access to features and resources. + +- Unlike a corporate directory, claims-based authentication using social identity providers doesn't usually provide information about the authenticated user other than an email address, and perhaps a name. Some social identity providers, such as a Microsoft account, provide only a unique identifier. The application usually needs to maintain some information on registered users, and be able to match this information to the identifier contained in the claims in the token. Typically this is done through registration when the user first accesses the application, and information is then injected into the token as additional claims after each authentication. + +- If there's more than one identity provider configured for the STS, it must detect which identity provider the user should be redirected to for authentication. This process is called home realm discovery. The STS might be able to do this automatically based on an email address or user name that the user provides, a subdomain of the application that the user is accessing, the user’s IP address scope, or on the contents of a cookie stored in the user’s browser. For example, if the user entered an email address in the Microsoft domain, such as user@live.com, the STS will redirect the user to the Microsoft account sign-in page. On later visits, the STS could use a cookie to indicate that the last sign in was with a Microsoft account. If automatic discovery can't determine the home realm, the STS will display a home realm discovery page that lists the trusted identity providers, and the user must select the one they want to use. + +## When to use this pattern + +This pattern is useful for scenarios such as: + +- **Single sign-on in the enterprise**. In this scenario you need to authenticate employees for corporate applications that are hosted in the cloud outside the corporate security boundary, without requiring them to sign in every time they visit an application. The user experience is the same as when using on-premises applications where they're authenticated when signing in to a corporate network, and from then on have access to all relevant applications without needing to sign in again. + +- **Federated identity with multiple partners**. In this scenario you need to authenticate both corporate employees and business partners who don't have accounts in the corporate directory. This is common in business-to-business applications, applications that integrate with third-party services, and where companies with different IT systems have merged or shared resources. + +- **Federated identity in SaaS applications**. In this scenario independent software vendors provide a ready-to-use service for multiple clients or tenants. Each tenant authenticates using a suitable identity provider. For example, business users will use their corporate credentials, while consumers and clients of the tenant will use their social identity credentials. + +This pattern might not be useful in the following situations: + +- All users of the application can be authenticated by one identity provider, and there's no requirement to authenticate using any other identity provider. This is typical in business applications that use a corporate directory (accessible within the application) for authentication, by using a VPN, or (in a cloud-hosted scenario) through a virtual network connection between the on-premises directory and the application. + +- The application was originally built using a different authentication mechanism, perhaps with custom user stores, or doesn't have the capability to handle the negotiation standards used by claims-based technologies. Retrofitting claims-based authentication and access control into existing applications can be complex, and probably not cost effective. + +## Example + +An organization hosts a multi-tenant software as a service (SaaS) application in Microsoft Azure. The application includes a website that tenants can use to manage the application for their own users. The application allows tenants to access the website by using a federated identity that is generated by Active Directory Federation Services (ADFS) when a user is authenticated by that organization’s own Active Directory. + +![How users at a large enterprise subscriber access the application](./_images/federated-identity-multitenat.png) + + +The figure shows how tenants authenticate with their own identity provider (step 1), in this case ADFS. After successfully authenticating a tenant, ADFS issues a token. The client browser forwards this token to the SaaS application’s federation provider, which trusts tokens issued by the tenant’s ADFS, in order to get back a token that is valid for the SaaS federation provider (step 2). If necessary, the SaaS federation provider performs a transformation on the claims in the token into claims that the application recognizes (step 3) before returning the new token to the client browser. The application trusts tokens issued by the SaaS federation provider and uses the claims in the token to apply authorization rules (step 4). + +Tenants won't need to remember separate credentials to access the application, and an administrator at the tenant’s company can configure in its own ADFS the list of users that can access the application. + +## Related guidance + +- [Microsoft Azure Active Directory](https://azure.microsoft.com/services/active-directory/) +- [Active Directory Domain Services](https://msdn.microsoft.com/library/bb897402.aspx) +- [Active Directory Federation Services](https://msdn.microsoft.com/library/bb897402.aspx) +- [Identity management for multitenant applications in Microsoft Azure](https://azure.microsoft.com/documentation/articles/guidance-multitenant-identity/) +- [Multitenant Applications in Azure](https://azure.microsoft.com/documentation/articles/dotnet-develop-multitenant-applications/) diff --git a/docs/patterns/gatekeeper.md b/docs/patterns/gatekeeper.md new file mode 100644 index 00000000000..99bd18eff5b --- /dev/null +++ b/docs/patterns/gatekeeper.md @@ -0,0 +1,73 @@ +--- +title: Gatekeeper +description: Protect applications and services by using a dedicated host instance that acts as a broker between clients and the application or service, validates and sanitizes requests, and passes requests and data between them. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [security] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Gatekeeper + +[!INCLUDE [header](../_includes/header.md)] + +Protect applications and services by using a dedicated host instance that acts as a broker between clients and the application or service, validates and sanitizes requests, and passes requests and data between them. This can provide an additional layer of security, and limit the attack surface of the system. + +## Context and problem + +Applications expose their functionality to clients by accepting and processing requests. In cloud-hosted scenarios, applications expose endpoints clients connect to, and typically include the code to handle the requests from clients. This code performs authentication and validation, some or all request processing, and is likely to accesses storage and other services on behalf of the client. + +If a malicious user is able to compromise the system and gain access to the application’s hosting environment, the security mechanisms it uses such as credentials and storage keys, and the services and data it accesses, are exposed. As a result, the malicious user can gain unrestrained access to sensitive information and other services. + +## Solution + +To minimize the risk of clients gaining access to sensitive information and services, decouple hosts or tasks that expose public endpoints from the code that processes requests and accesses storage. You can achieve this by using a façade or a dedicated task that interacts with clients and then hands off the request—perhaps through a decoupled interface—to the hosts or tasks that'll handle the request. The figure provides a high-level overview of this pattern. + +![High-level overview of this pattern](./_images/gatekeeper-diagram.png) + + +The gatekeeper pattern can be used to simply protect storage, or it can be used as a more comprehensive façade to protect all of the functions of the application. The important factors are: + +- **Controlled validation.** The gatekeeper validates all requests, and rejects those that don't meet validation requirements. +- **Limited risk and exposure.** The gatekeeper doesn't have access to the credentials or keys used by the trusted host to access storage and services. If the gatekeeper is compromised, the attacker doesn't get access to these credentials or keys. +- **Appropriate security.** The gatekeeper runs in a limited privilege mode, while the rest of the application runs in the full trust mode required to access storage and services. If the gatekeeper is compromised, it can't directly access the application services or data. + +This pattern acts like a firewall in a typical network topography. It allows the gatekeeper to examine requests and make a decision about whether to pass the request on to the trusted host (sometimes called the keymaster) that performs the required tasks. This decision typically requires the gatekeeper to validate and sanitize the request content before passing it on to the trusted host. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +- Ensure that the trusted hosts the gatekeeper passes requests to expose only internal or protected endpoints, and connect only to the gatekeeper. The trusted hosts shouldn't expose any external endpoints or interfaces. +- The gatekeeper must run in a limited privilege mode. Typically this means running the gatekeeper and the trusted host in separate hosted services or virtual machines. +- The gatekeeper shouldn't perform any processing related to the application or services, or access any data. Its function is purely to validate and sanitize requests. The trusted hosts might need to perform additional validation of requests, but the core validation should be performed by the gatekeeper. +- Use a secure communication channel (HTTPS, SSL, or TLS) between the gatekeeper and the trusted hosts or tasks where this is possible. However, some hosting environments don't support HTTPS on internal endpoints. +- Adding the extra layer to the application to implement the gatekeeper pattern is likely to have some impact on performance due to the additional processing and network communication it requires. +- The gatekeeper instance could be a single point of failure. To minimize the impact of a failure, consider deploying additional instances and using an autoscaling mechanism to ensure capacity to maintain availability. + +## When to use this pattern + +This pattern is useful for: + +- Applications that handle sensitive information, expose services that must have a high degree of protection from malicious attacks, or perform mission-critical operations that shouldn't be disrupted. +- Distributed applications where it's necessary to perform request validation separately from the main tasks, or to centralize this validation to simplify maintenance and administration. + +## Example + +In a cloud-hosted scenario, this pattern can be implemented by decoupling the gatekeeper role or virtual machine from the trusted roles and services in an application. Do this by using an internal endpoint, a queue, or storage as an intermediate communication mechanism. The figure illustrates using an internal endpoint. + +![An example of the pattern using Cloud Services web and worker roles](./_images/gatekeeper-endpoint.png) + + +## Related patterns + +The [Valet Key pattern](valet-key.md) might also be relevant when implementing the Gatekeeper pattern. When communicating between the Gatekeeper and trusted roles it's good practice to enhance security by using keys or tokens that limit permissions for accessing resources. Describes how to use a token or key that provides clients with restricted direct access to a specific resource or service. diff --git a/docs/patterns/health-endpoint-monitoring.md b/docs/patterns/health-endpoint-monitoring.md new file mode 100644 index 00000000000..dab1b139354 --- /dev/null +++ b/docs/patterns/health-endpoint-monitoring.md @@ -0,0 +1,210 @@ +--- +title: Health Endpoint Monitoring +description: Implement functional checks in an application that external tools can access through exposed endpoints at regular intervals. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [availability, management-monitoring, resiliency] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Health Endpoint Monitoring + +[!INCLUDE [header](../_includes/header.md)] + +Implement functional checks in an application that external tools can access through exposed endpoints at regular intervals. This can help to verify that applications and services are performing correctly. + +## Context and problem + +It's a good practice, and often a business requirement, to monitor web applications and back-end services, to ensure they're available and performing correctly. However, it's more difficult to monitor services running in the cloud than it is to monitor on-premises services. For example, you don't have full control of the hosting environment, and the services typically depend on other services provided by platform vendors and others. + +There are many factors that affect cloud-hosted applications such as network latency, the performance and availability of the underlying compute and storage systems, and the network bandwidth between them. The service can fail entirely or partially due to any of these factors. Therefore, you must verify at regular intervals that the service is performing correctly to ensure the required level of availability, which might be part of your service level agreement (SLA). + +## Solution + +Implement health monitoring by sending requests to an endpoint on the application. The application should perform the necessary checks, and return an indication of its status. + +A health monitoring check typically combines two factors: + +- The checks (if any) performed by the application or service in response to the request to the health verification endpoint. +- Analysis of the results by the tool or framework that performs the health verification check. + +The response code indicates the status of the application and, optionally, any components or services it uses. The latency or response time check is performed by the monitoring tool or framework. The figure provides an overview of the pattern. + +![Overview of the pattern](./_images/health-endpoint-monitoring-pattern.png) + +Other checks that might be carried out by the health monitoring code in the application include: +- Checking cloud storage or a database for availability and response time. +- Checking other resources or services located in the application, or located elsewhere but used by the application. + +Services and tools are available that monitor web applications by submitting a request to a configurable set of endpoints, and evaluating the results against a set of configurable rules. It's relatively easy to create a service endpoint whose sole purpose is to perform some functional tests on the system. + +Typical checks that can be performed by the monitoring tools include: + +- Validating the response code. For example, an HTTP response of 200 (OK) indicates that the application responded without error. The monitoring system might also check for other response codes to give more comprehensive results. +- Checking the content of the response to detect errors, even when a 200 (OK) status code is returned. This can detect errors that affect only a section of the returned web page or service response. For example, checking the title of a page or looking for a specific phrase that indicates the correct page was returned. +- Measuring the response time, which indicates a combination of the network latency and the time that the application took to execute the request. An increasing value can indicate an emerging problem with the application or network. +- Checking resources or services located outside the application, such as a content delivery network used by the application to deliver content from global caches. +- Checking for expiration of SSL certificates. +- Measuring the response time of a DNS lookup for the URL of the application to measure DNS latency and DNS failures. +- Validating the URL returned by the DNS lookup to ensure correct entries. This can help to avoid malicious request redirection through a successful attack on the DNS server. + +It's also useful, where possible, to run these checks from different on-premises or hosted locations to measure and compare response times. Ideally you should monitor applications from locations that are close to customers to get an accurate view of the performance from each location. In addition to providing a more robust checking mechanism, the results can help you decide on the deployment location for the application—and whether to deploy it in more than one datacenter. + +Tests should also be run against all the service instances that customers use to ensure the application is working correctly for all customers. For example, if customer storage is spread across more than one storage account, the monitoring process should check all of these. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +How to validate the response. For example, is just a single 200 (OK) status code sufficient to verify the application is working correctly? While this provides the most basic measure of application availability, and is the minimum implementation of this pattern, it provides little information about the operations, trends, and possible upcoming issues in the application. + + > Make sure that the application correctly returns a 200 (OK) only when the target resource is found and processed. In some scenarios, such as when using a master page to host the target web page, the server sends back a 200 (OK) status code instead of a 404 (Not Found) code, even when the target content page was not found. + +The number of endpoints to expose for an application. One approach is to expose at least one endpoint for the core services that the application uses and another for lower priority services, allowing different levels of importance to be assigned to each monitoring result. Also consider exposing more endpoints, such as one for each core service, for additional monitoring granularity. For example, a health verification check might check the database, storage, and an external geocoding service that an application uses, with each requiring a different level of uptime and response time. The application could still be healthy if the geocoding service, or some other background task, is unavailable for a few minutes. + +Whether to use the same endpoint for monitoring as is used for general access, but to a specific path designed for health verification checks, for example, /HealthCheck/{GUID}/ on the general access endpoint. This allows some functional tests in the application to be run by the monitoring tools, such as adding a new user registration, signing in, and placing a test order, while also verifying that the general access endpoint is available. + +The type of information to collect in the service in response to monitoring requests, and how to return this information. Most existing tools and frameworks look only at the HTTP status code that the endpoint returns. To return and validate additional information, you might have to create a custom monitoring utility or service. + +How much information to collect. Performing excessive processing during the check can overload the application and impact other users. The time it takes might exceed the timeout of the monitoring system so it marks the application as unavailable. Most applications include instrumentation such as error handlers and performance counters that log performance and detailed error information, this might be sufficient instead of returning additional information from a health verification check. + +How to configure security for the monitoring endpoints to protect them from public access, which might expose the application to malicious attacks, risk the exposure of sensitive information, or attract denial of service (DoS) attacks. Typically this should be done in the application configuration so that it can be updated easily without restarting the application. Consider using one or more of the following techniques: + +- Secure the endpoint by requiring authentication. You can do this by using an authentication security key in the request header or by passing credentials with the request, provided that the monitoring service or tool supports authentication. + + - Use an obscure or hidden endpoint. For example, expose the endpoint on a different IP address to that used by the default application URL, configure the endpoint on a nonstandard HTTP port, and/or use a complex path to the test page. You can usually specify additional endpoint addresses and ports in the application configuration, and add entries for these endpoints to the DNS server if required to avoid having to specify the IP address directly. + + - Expose a method on an endpoint that accepts a parameter such as a key value or an operation mode value. Depending on the value supplied for this parameter, when a request is received the code can perform a specific test or set of tests, or return a 404 (Not Found) error if the parameter value isn't recognized. The recognized parameter values could be set in the application configuration. + + > DoS attacks are likely to have less impact on a separate endpoint that performs basic functional tests without compromising the operation of the application. Ideally, avoid using a test that might expose sensitive information. If you must return information that might be useful to an attacker, consider how you'll protect the endpoint and the data from unauthorized access. In this case just relying on obscurity isn't enough. You should also consider using an HTTPS connection and encrypting any sensitive data, although this will increase the load on the server. + +- How to access an endpoint that's secured using authentication. Not all tools and frameworks can be configured to include credentials with the health verification request. For example, Microsoft Azure built-in health verification features can't provide authentication credentials. Some third-party alternatives are [Pingdom](https://www.pingdom.com/), [Panopta](http://www.panopta.com/), [NewRelic](https://newrelic.com/), and [Statuscake](https://www.statuscake.com/). + +- How to ensure that the monitoring agent is performing correctly. One approach is to expose an endpoint that simply returns a value from the application configuration or a random value that can be used to test the agent. + + > Also ensure that the monitoring system performs checks on itself, such as a self-test and built-in test, to avoid it issuing false positive results. + +## When to use this pattern + +This pattern is useful for: +- Monitoring websites and web applications to verify availability. +- Monitoring websites and web applications to check for correct operation. +- Monitoring middle-tier or shared services to detect and isolate a failure that could disrupt other applications. +- Complementing existing instrumentation in the application, such as performance counters and error handlers. Health verification checking doesn't replace the requirement for logging and auditing in the application. Instrumentation can provide valuable information for an existing framework that monitors counters and error logs to detect failures or other issues. However, it can't provide information if the application is unavailable. + +## Example + +The following code examples, taken from the `HealthCheckController` class (a sample that demonstrates this pattern is available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/health-endpoint-monitoring)), demonstrates exposing an endpoint for performing a range of health checks. + +The `CoreServices` method, shown below in C#, performs a series of checks on services used in the application. If all of the tests run without error, the method returns a 200 (OK) status code. If any of the tests raises an exception, the method returns a 500 (Internal Error) status code. The method could optionally return additional information when an error occurs, if the monitoring tool or framework is able to make use of it. + +```csharp +public ActionResult CoreServices() +{ + try + { + // Run a simple check to ensure the database is available. + DataStore.Instance.CoreHealthCheck(); + + // Run a simple check on our external service. + MyExternalService.Instance.CoreHealthCheck(); + } + catch (Exception ex) + { + Trace.TraceError("Exception in basic health check: {0}", ex.Message); + + // This can optionally return different status codes based on the exception. + // Optionally it could return more details about the exception. + // The additional information could be used by administrators who access the + // endpoint with a browser, or using a ping utility that can display the + // additional information. + return new HttpStatusCodeResult((int)HttpStatusCode.InternalServerError); + } + return new HttpStatusCodeResult((int)HttpStatusCode.OK); +} +``` +The `ObscurePath` method shows how you can read a path from the application configuration and use it as the endpoint for tests. This example, in C#, also shows how you can accept an ID as a parameter and use it to check for valid requests. + +```csharp +public ActionResult ObscurePath(string id) +{ + // The id could be used as a simple way to obscure or hide the endpoint. + // The id to match could be retrieved from configuration and, if matched, + // perform a specific set of tests and return the result. If not matched it + // could return a 404 (Not Found) status. + + // The obscure path can be set through configuration to hide the endpoint. + var hiddenPathKey = CloudConfigurationManager.GetSetting("Test.ObscurePath"); + + // If the value passed does not match that in configuration, return 404 (Not Found). + if (!string.Equals(id, hiddenPathKey)) + { + return new HttpStatusCodeResult((int)HttpStatusCode.NotFound); + } + + // Else continue and run the tests... + // Return results from the core services test. + return this.CoreServices(); +} +``` + +The `TestResponseFromConfig` method shows how you can expose an endpoint that performs a check for a specified configuration setting value. + +```csharp +public ActionResult TestResponseFromConfig() +{ + // Health check that returns a response code set in configuration for testing. + var returnStatusCodeSetting = CloudConfigurationManager.GetSetting( + "Test.ReturnStatusCode"); + + int returnStatusCode; + + if (!int.TryParse(returnStatusCodeSetting, out returnStatusCode)) + { + returnStatusCode = (int)HttpStatusCode.OK; + } + + return new HttpStatusCodeResult(returnStatusCode); +} +``` +## Monitoring endpoints in Azure hosted applications + +Some options for monitoring endpoints in Azure applications are: + +- Use the built-in monitoring features of Azure. + +- Use a third-party service or a framework such as Microsoft System Center Operations Manager. + +- Create a custom utility or a service that runs on your own or on a hosted server. + + > Even though Azure provides a reasonably comprehensive set of monitoring options, you can use additional services and tools to provide extra information. Azure Management Services provides a built-in monitoring mechanism for alert rules. The alerts section of the management services page in the Azure portal allows you to configure up to ten alert rules per subscription for your services. These rules specify a condition and a threshold value for a service such as CPU load, or the number of requests or errors per second, and the service can automatically send email notifications to addresses you define in each rule. + +The conditions you can monitor vary depending on the hosting mechanism you choose for your application (such as Web Sites, Cloud Services, Virtual Machines, or Mobile Services), but all of these include the ability to create an alert rule that uses a web endpoint you specify in the settings for your service. This endpoint should respond in a timely way so that the alert system can detect that the application is operating correctly. + +> Read more information about [creating alert notifications][portal-alerts]. + +If you host your application in Azure Cloud Services web and worker roles or Virtual Machines, you can take advantage of one of the built-in services in Azure called Traffic Manager. Traffic Manager is a routing and load-balancing service that can distribute requests to specific instances of your Cloud Services hosted application based on a range of rules and settings. + +In addition to routing requests, Traffic Manager pings a URL, port, and relative path that you specify on a regular basis to determine which instances of the application defined in its rules are active and are responding to requests. If it detects a status code 200 (OK), it marks the application as available. Any other status code causes Traffic Manager to mark the application as offline. You can view the status in the Traffic Manager console, and configure the rule to reroute requests to other instances of the application that are responding. + +However, Traffic Manager will only wait ten seconds to receive a response from the monitoring URL. Therefore, you should ensure that your health verification code executes in this time, allowing for network latency for the round trip from Traffic Manager to your application and back again. + +> Read more information about using [Traffic Manager to monitor your applications](https://azure.microsoft.com/documentation/services/traffic-manager/). Traffic Manager is also discussed in [Multiple Datacenter Deployment Guidance](https://msdn.microsoft.com/library/dn589779.aspx). + +## Related guidance + +The following guidance can be useful when implementing this pattern: +- [Instrumentation and Telemetry Guidance](https://msdn.microsoft.com/library/dn589775.aspx). Checking the health of services and components is typically done by probing, but it's also useful to have information in place to monitor application performance and detect events that occur at runtime. This data can be transmitted back to monitoring tools as additional information for health monitoring. Instrumentation and Telemetry Guidance explores gathering remote diagnostics information that's collected by instrumentation in applications. +- [Receiving alert notifications][portal-alerts]. +- This pattern includes a downloadable [sample application](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/health-endpoint-monitoring). + +[portal-alerts]: https://azure.microsoft.com/documentation/articles/insights-receive-alert-notifications/ diff --git a/docs/patterns/index-table.md b/docs/patterns/index-table.md new file mode 100644 index 00000000000..582bb6b3b95 --- /dev/null +++ b/docs/patterns/index-table.md @@ -0,0 +1,122 @@ +--- +title: Index Table +description: Create indexes over the fields in data stores that are frequently referenced by queries. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [data-management, performance-scalability] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Index Table + +[!INCLUDE [header](../_includes/header.md)] + +Create indexes over the fields in data stores that are frequently referenced by queries. This pattern can improve query performance by allowing applications to more quickly locate the data to retrieve from a data store. + +## Context and problem + +Many data stores organize the data for a collection of entities using the primary key. An application can use this key to locate and retrieve data. The figure shows an example of a data store holding customer information. The primary key is the Customer ID. The figure shows customer information organized by the primary key (Customer ID). + +![Figure 1 - Customer information organized by the primary key (Customer ID)](./_images/index-table-figure-1.png) + + +While the primary key is valuable for queries that fetch data based on the value of this key, an application might not be able to use the primary key if it needs to retrieve data based on some other field. In the customers example, an application can't use the Customer ID primary key to retrieve customers if it queries data solely by referencing the value of some other attribute, such as the town in which the customer is located. To perform a query such as this, the application might have to fetch and examine every customer record, which could be a slow process. + +Many relational database management systems support secondary indexes. A secondary index is a separate data structure that's organized by one or more nonprimary (secondary) key fields, and it indicates where the data for each indexed value is stored. The items in a secondary index are typically sorted by the value of the secondary keys to enable fast lookup of data. These indexes are usually maintained automatically by the database management system. + +You can create as many secondary indexes as you need to support the different queries that your application performs. For example, in a Customers table in a relational database where the Customer ID is the primary key, it's beneficial to add a secondary index over the town field if the application frequently looks up customers by the town where they reside. + +However, although secondary indexes are common in relational systems, most NoSQL data stores used by cloud applications don't provide an equivalent feature. + +## Solution + +If the data store doesn't support secondary indexes, you can emulate them manually by creating your own index tables. An index table organizes the data by a specified key. Three strategies are commonly used for structuring an index table, depending on the number of secondary indexes that are required and the nature of the queries that an application performs. + +The first strategy is to duplicate the data in each index table but organize it by different keys (complete denormalization). The next figure shows index tables that organize the same customer information by Town and LastName. + +![Figure 2 - Data is duplicated in each index table](./_images/index-table-figure-2.png) + + +This strategy is appropriate if the data is relatively static compared to the number of times it's queried using each key. If the data is more dynamic, the processing overhead of maintaining each index table becomes too large for this approach to be useful. Also, if the volume of data is very large, the amount of space required to store the duplicate data is significant. + +The second strategy is to create normalized index tables organized by different keys and reference the original data by using the primary key rather than duplicating it, as shown in the following figure. The original data is called a fact table. + +![Figure 3 - Data is referenced by each index table](./_images/index-table-figure-3.png) + + +This technique saves space and reduces the overhead of maintaining duplicate data. The disadvantage is that an application has to perform two lookup operations to find data using a secondary key. It has to find the primary key for the data in the index table, and then use the primary key to look up the data in the fact table. + +The third strategy is to create partially normalized index tables organized by different keys that duplicate frequently retrieved fields. Reference the fact table to access less frequently accessed fields. The next figure shows how commonly accessed data is duplicated in each index table. + +![Figure 4 - Commonly accessed data is duplicated in each index table](./_images/index-table-figure-4.png) + + +With this strategy, you can strike a balance between the first two approaches. The data for common queries can be retrieved quickly by using a single lookup, while the space and maintenance overhead isn't as significant as duplicating the entire data set. + +If an application frequently queries data by specifying a combination of values (for example, “Find all customers that live in Redmond and that have a last name of Smith”), you could implement the keys to the items in the index table as a concatenation of the Town attribute and the LastName attribute. The next figure shows an index table based on composite keys. The keys are sorted by Town, and then by LastName for records that have the same value for Town. + +![Figure 5 - An index table based on composite keys](./_images/index-table-figure-5.png) + + +Index tables can speed up query operations over sharded data, and are especially useful where the shard key is hashed. The next figure shows an example where the shard key is a hash of the Customer ID. The index table can organize data by the nonhashed value (Town and LastName), and provide the hashed shard key as the lookup data. This can save the application from repeatedly calculating hash keys (an expensive operation) if it needs to retrieve data that falls within a range, or it needs to fetch data in order of the nonhashed key. For example, a query such as “Find all customers that live in Redmond” can be quickly resolved by locating the matching items in the index table, where they're all stored in a contiguous block. Then, follow the references to the customer data using the shard keys stored in the index table. + +![Figure 6 - An index table providing quick lookup for sharded data](./_images/index-table-figure-6.png) + + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +- The overhead of maintaining secondary indexes can be significant. You must analyze and understand the queries that your application uses. Only create index tables when they're likely to be used regularly. Don't create speculative index tables to support queries that an application doesn't perform, or performs only occasionally. +- Duplicating data in an index table can add significant overhead in storage costs and the effort required to maintain multiple copies of data. +- Implementing an index table as a normalized structure that references the original data requires an application to perform two lookup operations to find data. The first operation searches the index table to retrieve the primary key, and the second uses the primary key to fetch the data. +- If a system incorporates a number of index tables over very large data sets, it can be difficult to maintain consistency between index tables and the original data. It might be possible to design the application around the eventual consistency model. For example, to insert, update, or delete data, an application could post a message to a queue and let a separate task perform the operation and maintain the index tables that reference this data asynchronously. For more information about implementing eventual consistency, see the [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). + + > Microsoft Azure storage tables support transactional updates for changes made to data held in the same partition (referred to as entity group transactions). If you can store the data for a fact table and one or more index tables in the same partition, you can use this feature to help ensure consistency. + +- Index tables might themselves be partitioned or sharded. + +## When to use this pattern + +Use this pattern to improve query performance when an application frequently needs to retrieve data by using a key other than the primary (or shard) key. + +This pattern might not be useful when: + +- Data is volatile. An index table can become out of date very quickly, making it ineffective or making the overhead of maintaining the index table greater than any savings made by using it. +- A field selected as the secondary key for an index table is nondiscriminating and can only have a small set of values (for example, gender). +- The balance of the data values for a field selected as the secondary key for an index table are highly skewed. For example, if 90% of the records contain the same value in a field, then creating and maintaining an index table to look up data based on this field might create more overhead than scanning sequentially through the data. However, if queries very frequently target values that lie in the remaining 10%, this index can be useful. You should understand the queries that your application is performing, and how frequently they're performed. + +## Example + +Azure storage tables provide a highly scalable key/value data store for applications running in the cloud. Applications store and retrieve data values by specifying a key. The data values can contain multiple fields, but the structure of a data item is opaque to table storage, which simply handles a data item as an array of bytes. + +Azure storage tables also support sharding. The sharding key includes two elements, a partition key and a row key. Items that have the same partition key are stored in the same partition (shard), and the items are stored in row key order within a shard. Table storage is optimized for performing queries that fetch data falling within a contiguous range of row key values within a partition. If you're building cloud applications that store information in Azure tables, you should structure your data with this feature in mind. + +For example, consider an application that stores information about movies. The application frequently queries movies by genre (action, documentary, historical, comedy, drama, and so on). You could create an Azure table with partitions for each genre by using the genre as the partition key, and specifying the movie name as the row key, as shown in the next figure. + +![Figure 7 - Movie data stored in an Azure table](./_images/index-table-figure-7.png) + + +This approach is less effective if the application also needs to query movies by starring actor. In this case, you can create a separate Azure table that acts as an index table. The partition key is the actor and the row key is the movie name. The data for each actor will be stored in separate partitions. If a movie stars more than one actor, the same movie will occur in multiple partitions. + +You can duplicate the movie data in the values held by each partition by adopting the first approach described in the Solution section above. However, it's likely that each movie will be replicated several times (once for each actor), so it might be more efficient to partially denormalize the data to support the most common queries (such as the names of the other actors) and enable an application to retrieve any remaining details by including the partition key necessary to find the complete information in the genre partitions. This approach is described by the third option in the Solution section. The next figure shows this approach. + +![Figure 8 - Actor partitions acting as index tables for movie data](./_images/index-table-figure-8.png) + + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: + +- [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). An index table must be maintained as the data that it indexes changes. In the cloud, it might not be possible or appropriate to perform operations that update an index as part of the same transaction that modifies the data. In that case, an eventually consistent approach is more suitable. Provides information on the issues surrounding eventual consistency. +- [Sharding pattern](https://msdn.microsoft.com/library/dn589797.aspx). The Index Table pattern is frequently used in conjunction with data partitioned by using shards. The Sharding pattern provides more information on how to divide a data store into a set of shards. +- [Materialized View pattern](materialized-view.md). Instead of indexing data to support queries that summarize data, it might be more appropriate to create a materialized view of the data. Describes how to support efficient summary queries by generating prepopulated views over data. diff --git a/docs/patterns/index.liquid.md b/docs/patterns/index.liquid.md new file mode 100644 index 00000000000..97cdd2f6e1b --- /dev/null +++ b/docs/patterns/index.liquid.md @@ -0,0 +1,30 @@ +--- +title: Cloud Design Patterns +description: Cloud Design Patterns for Microsoft Azure +keywords: Azure +--- +# Cloud Design Patterns + +[!INCLUDE [header](../../_includes/header.md)] + +These design patterns are useful for building reliable, scalable, secure applications in the cloud. + +Each pattern describes the problem that the pattern addresses, considerations for applying the pattern, and an example based on Microsoft Azure. Most of the patterns include code samples or snippets that show how to implement the pattern on Azure. However, most of the patterns are relevant to any distributed system, whether hosted on Azure or on other cloud platforms. + +## Problem areas in the cloud + +
      +{%- for category in categories %} +
    • + {% include 'pattern-category-card' %} +
    • +{%- endfor %} +
    + +## Catalog of patterns + +| Pattern | Summary | +| ------- | ------- | +{%- for pattern in patterns %} +| [{{ pattern.title }}](./{{ pattern.file }}) | {{ pattern.description }} | +{%- endfor %} \ No newline at end of file diff --git a/docs/patterns/index.md b/docs/patterns/index.md new file mode 100644 index 00000000000..02e43d6fff2 --- /dev/null +++ b/docs/patterns/index.md @@ -0,0 +1,142 @@ +--- +title: Cloud Design Patterns +description: Cloud Design Patterns for Microsoft Azure +keywords: Azure +--- +# Cloud Design Patterns + +[!INCLUDE [header](../_includes/header.md)] + +These design patterns are useful for building reliable, scalable, secure applications in the cloud. + +Each pattern describes the problem that the pattern addresses, considerations for applying the pattern, and an example based on Microsoft Azure. Most of the patterns include code samples or snippets that show how to implement the pattern on Azure. However, most of the patterns are relevant to any distributed system, whether hosted on Azure or on other cloud platforms. + +## Problem areas in the cloud + + + +## Catalog of patterns + +| Pattern | Summary | +| ------- | ------- | +| [Cache-Aside](./cache-aside.md) | Load data on demand into a cache from a data store | +| [Circuit Breaker](./circuit-breaker.md) | Handle faults that might take a variable amount of time to fix when connecting to a remote service or resource. | +| [CQRS](./cqrs.md) | Segregate operations that read data from operations that update data by using separate interfaces. | +| [Compensating Transaction](./compensating-transaction.md) | Undo the work performed by a series of steps, which together define an eventually consistent operation. | +| [Competing Consumers](./competing-consumers.md) | Enable multiple concurrent consumers to process messages received on the same messaging channel. | +| [Compute Resource Consolidation](./compute-resource-consolidation.md) | Consolidate multiple tasks or operations into a single computational unit | +| [Event Sourcing](./event-sourcing.md) | Use an append-only store to record the full series of events that describe actions taken on data in a domain. | +| [External Configuration Store](./external-configuration-store.md) | Move configuration information out of the application deployment package to a centralized location. | +| [Federated Identity](./federated-identity.md) | Delegate authentication to an external identity provider. | +| [Gatekeeper](./gatekeeper.md) | Protect applications and services by using a dedicated host instance that acts as a broker between clients and the application or service, validates and sanitizes requests, and passes requests and data between them. | +| [Health Endpoint Monitoring](./health-endpoint-monitoring.md) | Implement functional checks in an application that external tools can access through exposed endpoints at regular intervals. | +| [Index Table](./index-table.md) | Create indexes over the fields in data stores that are frequently referenced by queries. | +| [Leader Election](./leader-election.md) | Coordinate the actions performed by a collection of collaborating task instances in a distributed application by electing one instance as the leader that assumes responsibility for managing the other instances. | +| [Materialized View](./materialized-view.md) | Generate prepopulated views over the data in one or more data stores when the data isn't ideally formatted for required query operations. | +| [Pipes and Filters](./pipes-and-filters.md) | Break down a task that performs complex processing into a series of separate elements that can be reused. | +| [Priority Queue](./priority-queue.md) | Prioritize requests sent to services so that requests with a higher priority are received and processed more quickly than those with a lower priority. | +| [Queue-Based Load Leveling](./queue-based-load-leveling.md) | Use a queue that acts as a buffer between a task and a service that it invokes in order to smooth intermittent heavy loads. | +| [Retry](./transient-faults.md) | Enable an application to handle anticipated, temporary failures when it tries to connect to a service or network resource by transparently retrying an operation that's previously failed. | +| [Runtime Reconfiguration](./runtime-reconfiguration.md) | Design an application so that it can be reconfigured without requiring redeployment or restarting the application. | +| [Scheduler Agent Supervisor](./scheduler-agent-supervisor.md) | Coordinate a set of actions across a distributed set of services and other remote resources. | +| [Sharding](./sharding.md) | Divide a data store into a set of horizontal partitions or shards. | +| [Static Content Hosting](./static-content-hosting.md) | Deploy static content to a cloud-based storage service that can deliver them directly to the client. | +| [Throttling](./throttling.md) | Control the consumption of resources used by an instance of an application, an individual tenant, or an entire service. | +| [Valet Key](./valet-key.md) | Use a token or key that provides clients with restricted direct access to a specific resource or service. | \ No newline at end of file diff --git a/docs/patterns/leader-election.md b/docs/patterns/leader-election.md new file mode 100644 index 00000000000..f2879b8a1cf --- /dev/null +++ b/docs/patterns/leader-election.md @@ -0,0 +1,205 @@ +--- +title: Leader Election +description: Coordinate the actions performed by a collection of collaborating task instances in a distributed application by electing one instance as the leader that assumes responsibility for managing the other instances. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [design-implementation, resiliency] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Leader Election + +[!INCLUDE [header](../_includes/header.md)] + +Coordinate the actions performed by a collection of collaborating instances in a distributed application by electing one instance as the leader that assumes responsibility for managing the others. This can help to ensure that instances don't conflict with each other, cause contention for shared resources, or inadvertently interfere with the work that other instances are performing. + +## Context and problem + +A typical cloud application has many tasks acting in a coordinated manner. These tasks could all be instances running the same code and requiring access to the same resources, or they might be working together in parallel to perform the individual parts of a complex calculation. + +The task instances might run separately for much of the time, but it might also be necessary to coordinate the actions of each instance to ensure that they don’t conflict, cause contention for shared resources, or accidentally interfere with the work that other task instances are performing. + +For example: + +- In a cloud-based system that implements horizontal scaling, multiple instances of the same task could be running at the same time with each instance serving a different user. If these instances write to a shared resource, it's necessary to coordinate their actions to prevent each instance from overwriting the changes made by the others. +- If the tasks are performing individual elements of a complex calculation in parallel, the results need to be aggregated when they all complete. + +The task instances are all peers, so there isn't a natural leader that can act as the coordinator or aggregator. + +## Solution + +A single task instance should be elected to act as the leader, and this instance should coordinate the actions of the other subordinate task instances. If all of the task instances are running the same code, they are each capable of acting as the leader. Therefore, the election process must be managed carefully to prevent two or more instances taking over the leader role at the same time. + +The system must provide a robust mechanism for selecting the leader. This method has to cope with events such as network outages or process failures. In many solutions, the subordinate task instances monitor the leader through some type of heartbeat method, or by polling. If the designated leader terminates unexpectedly, or a network failure makes the leader unavailable to the subordinate task instances, it's necessary for them to elect a new leader. + +There are several strategies for electing a leader among a set of tasks in a distributed environment, including: +- Selecting the task instance with the lowest-ranked instance or process ID. +- Racing to acquire a shared, distributed mutex. The first task instance that acquires the mutex is the leader. However, the system must ensure that, if the leader terminates or becomes disconnected from the rest of the system, the mutex is released to allow another task instance to become the leader. +- Implementing one of the common leader election algorithms such as the [Bully Algorithm](http://www.cs.colostate.edu/~cs551/CourseNotes/Synchronization/BullyExample.html) or the [Ring Algorithm](http://www.cs.colostate.edu/~cs551/CourseNotes/Synchronization/RingElectExample.html). These algorithms assume that each candidate in the election has a unique ID, and that it can communicate with the other candidates reliably. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: +- The process of electing a leader should be resilient to transient and persistent failures. +- It must be possible to detect when the leader has failed or has become otherwise unavailable (such as due to a communications failure). How quickly detection is needed is system dependent. Some systems might be able to function for a short time without a leader, during which a transient fault might be fixed. In other cases, it might be necessary to detect leader failure immediately and trigger a new election. +- In a system that implements horizontal autoscaling, the leader could be terminated if the system scales back and shuts down some of the computing resources. +- Using a shared, distributed mutex introduces a dependency on the external service that provides the mutex. The service constitutes a single point of failure. If it becomes unavailable for any reason, the system won't be able to elect a leader. +- Using a single dedicated process as the leader is a straightforward approach. However, if the process fails there could be a significant delay while it's restarted. The resulting latency can affect the performance and response times of other processes if they're waiting for the leader to coordinate an operation. +- Implementing one of the leader election algorithms manually provides the greatest flexibility for tuning and optimizing the code. + +## When to use this pattern + +Use this pattern when the tasks in a distributed application, such as a cloud-hosted solution, need careful coordination and there's no natural leader. + +> Avoid making the leader a bottleneck in the system. The purpose of the leader is to coordinate the work of the subordinate tasks, and it doesn't necessarily have to participate in this work itself—although it should be able to do so if the task isn't elected as the leader. + +This pattern might not be useful if: +- There's a natural leader or dedicated process that can always act as the leader. For example, it might be possible to implement a singleton process that coordinates the task instances. If this process fails or becomes unhealthy, the system can shut it down and restart it. +- The coordination between tasks can be achieved using a more lightweight method. For example, if several task instances simply need coordinated access to a shared resource, a better solution is to use optimistic or pessimistic locking to control access. +- A third-party solution is more appropriate. For example, the Microsoft Azure HDInsight service (based on Apache Hadoop) uses the services provided by Apache Zookeeper to coordinate the map and reduce tasks that collect and summarize data. + +## Example + +The DistributedMutex project in the LeaderElection solution (a sample that demonstrates this pattern is available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/leader-election)) shows how to use a lease on an Azure Storage blob to provide a mechanism for implementing a shared, distributed mutex. This mutex can be used to elect a leader among a group of role instances in an Azure cloud service. The first role instance to acquire the lease is elected the leader, and remains the leader until it releases the lease or isn't able to renew the lease. Other role instances can continue to monitor the blob lease in case the leader is no longer available. + +> A blob lease is an exclusive write lock over a blob. A single blob can be the subject of only one lease at any point in time. A role instance can request a lease over a specified blob, and it'll be granted the lease if no other role instance holds a lease over the same blob. Otherwise the request will throw an exception. + +> To avoid a faulted role instance retaining the lease indefinitely, specify a lifetime for the lease. When this expires, the lease becomes available. However, while a role instance holds the lease it can request that the lease is renewed, and it'll be granted the lease for a further period of time. The role instance can continually repeat this process if it wants to retain the lease. +For more information on how to lease a blob, see [Lease Blob (REST API)](https://msdn.microsoft.com/library/azure/ee691972.aspx). + +The `BlobDistributedMutex` class in the C# example below contains the `RunTaskWhenMutexAquired` method that enables a role instance to attempt to acquire a lease over a specified blob. The details of the blob (the name, container, and storage account) are passed to the constructor in a `BlobSettings` object when the `BlobDistributedMutex` object is created (this object is a simple struct that is included in the sample code). The constructor also accepts a `Task` that references the code that the role instance should run if it successfully acquires the lease over the blob and is elected the leader. Note that the code that handles the low-level details of acquiring the lease is implemented in a separate helper class named `BlobLeaseManager`. + +```csharp +public class BlobDistributedMutex +{ + ... + private readonly BlobSettings blobSettings; + private readonly Func taskToRunWhenLeaseAcquired; + ... + + public BlobDistributedMutex(BlobSettings blobSettings, + Func taskToRunWhenLeaseAquired) + { + this.blobSettings = blobSettings; + this.taskToRunWhenLeaseAquired = taskToRunWhenLeaseAquired; + } + + public async Task RunTaskWhenMutexAcquired(CancellationToken token) + { + var leaseManager = new BlobLeaseManager(blobSettings); + await this.RunTaskWhenBlobLeaseAcquired(leaseManager, token); + } + ... +``` + +The `RunTaskWhenMutexAquired` method in the code sample above invokes the `RunTaskWhenBlobLeaseAcquired` method shown in the following code sample to actually acquire the lease. The `RunTaskWhenBlobLeaseAcquired` method runs asynchronously. If the lease is successfully acquired, the role instance has been elected the leader. The purpose of the `taskToRunWhenLeaseAcquired` delegate is to perform the work that coordinates the other role instances. If the lease isn't acquired, another role instance has been elected as the leader and the current role instance remains a subordinate. Note that the `TryAcquireLeaseOrWait` method is a helper method that uses the `BlobLeaseManager` object to acquire the lease. + +```csharp + private async Task RunTaskWhenBlobLeaseAcquired( + BlobLeaseManager leaseManager, CancellationToken token) + { + while (!token.IsCancellationRequested) + { + // Try to acquire the blob lease. + // Otherwise wait for a short time before trying again. + string leaseId = await this.TryAquireLeaseOrWait(leaseManager, token); + + if (!string.IsNullOrEmpty(leaseId)) + { + // Create a new linked cancellation token source so that if either the + // original token is canceled or the lease can't be renewed, the + // leader task can be canceled. + using (var leaseCts = + CancellationTokenSource.CreateLinkedTokenSource(new[] { token })) + { + // Run the leader task. + var leaderTask = this.taskToRunWhenLeaseAquired.Invoke(leaseCts.Token); + ... + } + } + } + ... + } +``` + +The task started by the leader also runs asynchronously. While this task is running, the `RunTaskWhenBlobLeaseAquired` method shown in the following code sample periodically attempts to renew the lease. This helps to ensure that the role instance remains the leader. In the sample solution, the delay between renewal requests is less than the time specified for the duration of the lease in order to prevent another role instance from being elected the leader. If the renewal fails for any reason, the task is canceled. + +If the lease fails to be renewed or the task is canceled (possibly as a result of the role instance shutting down), the lease is released. At this point, this or another role instance might be elected as the leader. The code extract below shows this part of the process. + +```csharp + private async Task RunTaskWhenBlobLeaseAcquired( + BlobLeaseManager leaseManager, CancellationToken token) + { + while (...) + { + ... + if (...) + { + ... + using (var leaseCts = ...) + { + ... + // Keep renewing the lease in regular intervals. + // If the lease can't be renewed, then the task completes. + var renewLeaseTask = + this.KeepRenewingLease(leaseManager, leaseId, leaseCts.Token); + + // When any task completes (either the leader task itself or when it + // couldn't renew the lease) then cancel the other task. + await CancelAllWhenAnyCompletes(leaderTask, renewLeaseTask, leaseCts); + } + } + } + } + ... +} +``` + +The `KeepRenewingLease` method is another helper method that uses the `BlobLeaseManager` object to renew the lease. The `CancelAllWhenAnyCompletes` method cancels the tasks specified as the first two parameters. The following diagram illustrates using the `BlobDistributedMutex` class to elect a leader and run a task that coordinates operations. + +![Figure 1 illustrates the functions of the BlobDistributedMutex class](./_images/leader-election-diagram.png) + + +The following code example shows how to use the `BlobDistributedMutex` class in a worker role. This code acquires a lease over a blob named `MyLeaderCoordinatorTask` in the lease's container in development storage, and specifies that the code defined in the `MyLeaderCoordinatorTask` method should run if the role instance is elected the leader. + +```csharp +var settings = new BlobSettings(CloudStorageAccount.DevelopmentStorageAccount, + "leases", "MyLeaderCoordinatorTask"); +var cts = new CancellationTokenSource(); +var mutex = new BlobDistributedMutex(settings, MyLeaderCoordinatorTask); +mutex.RunTaskWhenMutexAcquired(this.cts.Token); +... + +// Method that runs if the role instance is elected the leader +private static async Task MyLeaderCoordinatorTask(CancellationToken token) +{ + ... +} +``` + +Note the following points about the sample solution: +- The blob is a potential single point of failure. If the blob service becomes unavailable, or is inaccessible, the leader won't be able to renew the lease and no other role instance will be able to acquire the lease. In this case, no role instance will be able to act as the leader. However, the blob service is designed to be resilient, so complete failure of the blob service is considered to be extremely unlikely. +- If the task being performed by the leader stalls, the leader might continue to renew the lease, preventing any other role instance from acquiring the lease and taking over the leader role in order to coordinate tasks. In the real world, the health of the leader should be checked at frequent intervals. +- The election process is nondeterministic. You can't make any assumptions about which role instance will acquire the blob lease and become the leader. +- The blob used as the target of the blob lease shouldn't be used for any other purpose. If a role instance attempts to store data in this blob, this data won't be accessible unless the role instance is the leader and holds the blob lease. + +## Related patterns and guidance + +The following guidance might also be relevant when implementing this pattern: +- This pattern has a downloadable [sample application](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/leader-election). +- [Autoscaling Guidance](https://msdn.microsoft.com/library/dn589774.aspx). It's possible to start and stop instances of the task hosts as the load on the application varies. Autoscaling can help to maintain throughput and performance during times of peak processing. +- [Compute Partitioning Guidance](https://msdn.microsoft.com/library/dn589773.aspx). This guidance describes how to allocate tasks to hosts in a cloud service in a way that helps to minimize running costs while maintaining the scalability, performance, availability, and security of the service. +- The [Task-based Asynchronous Pattern](https://msdn.microsoft.com/library/hh873175.aspx). +- An example illustrating the [Bully Algorithm](http://www.cs.colostate.edu/~cs551/CourseNotes/Synchronization/BullyExample.html). +- An example illustrating the [Ring Algorithm](http://www.cs.colostate.edu/~cs551/CourseNotes/Synchronization/RingElectExample.html). +- The article [Apache Zookeeper on Microsoft Azure](https://msopentech.com/opentech-projects/apache-zookeeper-on-windows-azure-2/) on the Microsoft Open Technologies website. +- The article [Lease Blob (REST API)](https://msdn.microsoft.com/library/azure/ee691972.aspx) on MSDN. diff --git a/docs/patterns/materialized-view.md b/docs/patterns/materialized-view.md new file mode 100644 index 00000000000..8780ac95128 --- /dev/null +++ b/docs/patterns/materialized-view.md @@ -0,0 +1,94 @@ +--- +title: Materialized View +description: Generate prepopulated views over the data in one or more data stores when the data isn't ideally formatted for required query operations. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [data-management, performance-scalability] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Materialized View + +[!INCLUDE [header](../_includes/header.md)] + +Generate prepopulated views over the data in one or more data stores when the data isn't ideally formatted for required query operations. This can help support efficient querying and data extraction, and improve application performance. + +## Context and problem + +When storing data, the priority for developers and data administrators is often focused on how the data is stored, as opposed to how it's read. The chosen storage format is usually closely related to the format of the data, requirements for managing data size and data integrity, and the kind of store in use. For example, when using NoSQL document store, the data is often represented as a series of aggregates, each containing all of the information for that entity. + +However, this can have a negative effect on queries. When a query only needs a subset of the data from some entities, such as a summary of orders for several customers without all of the order details, it must extract all of the data for the relevant entities in order to obtain the required information. + +## Solution + +To support efficient querying, a common solution is to generate, in advance, a view that materializes the data in a format suited to the required results set. The Materialized View pattern describes generating prepopulated views of data in environments where the source data isn't in a suitable format for querying, where generating a suitable query is difficult, or where query performance is poor due to the nature of the data or the data store. + +These materialized views, which only contain data required by a query, allow applications to quickly obtain the information they need. In addition to joining tables or combining data entities, materialized views can include the current values of calculated columns or data items, the results of combining values or executing transformations on the data items, and values specified as part of the query. A materialized view can even be optimized for just a single query. + +A key point is that a materialized view and the data it contains is completely disposable because it can be entirely rebuilt from the source data stores. A materialized view is never updated directly by an application, and so it's a specialized cache. + +When the source data for the view changes, the view must be updated to include the new information. You can schedule this to happen automatically, or when the system detects a change to the original data. In some cases it might be necessary to regenerate the view manually. The figure shows an example of how the Materialized View pattern might be used. + +![Figure 1 shows an example of how the Materialized View pattern might be used](./_images/materialized-view-pattern-diagram.png) + + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +How and when the view will be updated. Ideally it'll regenerate in response to an event indicating a change to the source data, although this can lead to excessive overhead if the source data changes rapidly. Alternatively, consider using a scheduled task, an external trigger, or a manual action to regenerate the view. + +In some systems, such as when using the Event Sourcing pattern to maintain a store of only the events that modified the data, materialized views are necessary. Prepopulating views by examining all events to determine the current state might be the only way to obtain information from the event store. If you're not using Event Sourcing, you need to consider whether a materialized view is helpful or not. Materialized views tend to be specifically tailored to one, or a small number of queries. If many queries are used, materialized views can result in unacceptable storage capacity requirements and storage cost. + +Consider the impact on data consistency when generating the view, and when updating the view if this occurs on a schedule. If the source data is changing at the point when the view is generated, the copy of the data in the view won't be fully consistent with the original data. + +Consider where you'll store the view. The view doesn't have to be located in the same store or partition as the original data. It can be a subset from a few different partitions combined. + +A view can be rebuilt if lost. Because of that, if the view is transient and is only used to improve query performance by reflecting the current state of the data, or to improve scalability, it can be stored in a cache or in a less reliable location. + +When defining a materialized view, maximize its value by adding data items or columns to it based on computation or transformation of existing data items, on values passed in the query, or on combinations of these values when appropriate. + +Where the storage mechanism supports it, consider indexing the materialized view to further increase performance. Most relational databases support indexing for views, as do big data solutions based on Apache Hadoop. + +## When to use this pattern + +This pattern is useful when: +- Creating materialized views over data that's difficult to query directly, or where queries must be very complex to extract data that's stored in a normalized, semi-structured, or unstructured way. +- Creating temporary views that can dramatically improve query performance, or can act directly as source views or data transfer objects for the UI, for reporting, or for display. +- Supporting occasionally connected or disconnected scenarios where connection to the data store isn't always available. The view can be cached locally in this case. +- Simplifying queries and exposing data for experimentation in a way that doesn't require knowledge of the source data format. For example, by joining different tables in one or more databases, or one or more domains in NoSQL stores, and then formatting the data to fit its eventual use. +- Providing access to specific subsets of the source data that, for security or privacy reasons, shouldn't be generally accessible, open to modification, or fully exposed to users. +- Bridging different data stores, to take advantage of their individual capabilities. For example, using a cloud store that's efficient for writing as the reference data store, and a relational database that offers good query and read performance to hold the materialized views. + +This pattern isn't useful in the following situations: +- The source data is simple and easy to query. +- The source data changes very quickly, or can be accessed without using a view. In these cases, you should avoid the processing overhead of creating views. +- Consistency is a high priority. The views might not always be fully consistent with the original data. + +## Example + +The following figure shows an example of using the Materialized View pattern to generate a summary of sales. Data in the Order, OrderItem, and Customer tables in separate partitions in an Azure storage account are combined to generate a view containing the total sales value for each product in the Electronics category, along with a count of the number of customers who made purchases of each item. + +![Figure 2: Using the Materialized View pattern to generate a summary of sales](./_images/materialized-view-summary-diagram.png) + + +Creating this materialized view requires complex queries. However, by exposing the query result as a materialized view, users can easily obtain the results and use them directly or incorporate them in another query. The view is likely to be used in a reporting system or dashboard, and can be updated on a scheduled basis such as weekly. + +> Although this example utilizes Azure table storage, many relational database management systems also provide native support for materialized views. + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: +- [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). The summary information in a materialized view has to be maintained so that it reflects the underlying data values. As the data values change, it might not be practical to update the summary data in real time, and instead you'll have to adopt an eventually consistent approach. Summarizes the issues surrounding maintaining consistency over distributed data, and describes the benefits and tradeoffs of different consistency models. +- [Command and Query Responsibility Segregation (CQRS) pattern](cqrs.md). Use to update the information in a materialized view by responding to events that occur when the underlying data values change. +- [Event Sourcing pattern](event-sourcing.md). Use in conjunction with the CQRS pattern to maintain the information in a materialized view. When the data values a materialized view is based on are changed, the system can raise events that describe these changes and save them in an event store. +- [Index Table pattern](index-table.md). The data in a materialized view is typically organized by a primary key, but queries might need to retrieve information from this view by examining data in other fields. Use to create secondary indexes over data sets for data stores that don't support native secondary indexes. diff --git a/docs/patterns/pipes-and-filters.md b/docs/patterns/pipes-and-filters.md new file mode 100644 index 00000000000..cac8a832783 --- /dev/null +++ b/docs/patterns/pipes-and-filters.md @@ -0,0 +1,286 @@ +--- +title: Pipes and Filters +description: Break down a task that performs complex processing into a series of separate elements that can be reused. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [design-implementation, messaging] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Pipes and Filters + +[!INCLUDE [header](../_includes/header.md)] + +Decompose a task that performs complex processing into a series of separate elements that can be reused. This can improve performance, scalability, and reusability by allowing task elements that perform the processing to be deployed and scaled independently. + +## Context and problem + +An application is required to perform a variety of tasks of varying complexity on the information that it processes. A straightforward but inflexible approach to implementing an application is to perform this processing as a monolithic module. However, this approach is likely to reduce the opportunities for refactoring the code, optimizing it, or reusing it if parts of the same processing are required elsewhere within the application. + +The figure illustrates the issues with processing data using the monolithic approach. An application receives and processes data from two sources. The data from each source is processed by a separate module that performs a series of tasks to transform this data, before passing the result to the business logic of the application. + +![Figure 1 - A solution implemented using monolithic modules](./_images/pipes-and-filters-modules.png) + +Some of the tasks that the monolithic modules perform are functionally very similar, but the modules have been designed separately. The code that implements the tasks is closely coupled in a module, and has been developed with little or no thought given to reuse or scalability. + +However, the processing tasks performed by each module, or the deployment requirements for each task, could change as business requirements are updated. Some tasks might be compute intensive and could benefit from running on powerful hardware, while others might not require such expensive resources. Also, additional processing might be required in the future, or the order in which the tasks performed by the processing could change. A solution is required that addresses these issues, and increases the possibilities for code reuse. + +## Solution + +Break down the processing required for each stream into a set of separate components (or filters), each performing a single task. By standardizing the format of the data that each component receives and sends, these filters can be combined together into a pipeline. This helps to avoid duplicating code, and makes it easy to remove, replace, or integrate additional components if the processing requirements change. The next figure shows a solution implemented using pipes and filters. + +![Figure 2 - A solution implemented using pipes and filters](./_images/pipes-and-filters-solution.png) + + +The time it takes to process a single request depends on the speed of the slowest filter in the pipeline. One or more filters could be a bottleneck, especially if a large number of requests appear in a stream from a particular data source. A key advantage of the pipeline structure is that it provides opportunities for running parallel instances of slow filters, enabling the system to spread the load and improve throughput. + +The filters that make up a pipeline can run on different machines, enabling them to be scaled independently and take advantage of the elasticity that many cloud environments provide. A filter that is computationally intensive can run on high performance hardware, while other less demanding filters can be hosted on less expensive commodity hardware. The filters don't even have to be in the same data center or geographical location, which allows each element in a pipeline to run in an environment that is close to the resources it requires. The next figure shows an example applied to the pipeline for the data from Source 1. + +![Figure 3 shows an example applied to the pipeline for the data from Source 1](./_images/pipes-and-filters-load-balancing.png) + +If the input and output of a filter are structured as a stream, it's possible to perform the processing for each filter in parallel. The first filter in the pipeline can start its work and output its results, which are passed directly on to the next filter in the sequence before the first filter has completed its work. + +Another benefit is the resiliency that this model can provide. If a filter fails or the machine it's running on is no longer available, the pipeline can reschedule the work that the filter was performing and direct this work to another instance of the component. Failure of a single filter doesn't necessarily result in failure of the entire pipeline. + +Using the Pipes and Filters pattern in conjunction with the [Compensating Transaction pattern](compensating-transaction.md) is an alternative approach to implementing distributed transactions. A distributed transaction can be broken down into separate, compensable tasks, each of which can be implemented by using a filter that also implements the Compensating Transaction pattern. The filters in a pipeline can be implemented as separate hosted tasks running close to the data that they maintain. + +## Issues and considerations + +You should consider the following points when deciding how to implement this pattern: +- **Complexity**. The increased flexibility that this pattern provides can also introduce complexity, especially if the filters in a pipeline are distributed across different servers. + +- **Reliability**. Use an infrastructure that ensures that data flowing between filters in a pipeline won't be lost. + +- **Idempotency**. If a filter in a pipeline fails after receiving a message and the work is rescheduled to another instance of the filter, part of the work might have already been completed. If this work updates some aspect of the global state (such as information stored in a database), the same update could be repeated. A similar issue might occur if a filter fails after posting its results to the next filter in the pipeline, but before indicating that it's completed its work successfully. In these cases, the same work could be repeated by another instance of the filter, causing the same results to be posted twice. This could result in subsequent filters in the pipeline processing the same data twice. Therefore filters in a pipeline should be designed to be idempotent. For more information see [Idempotency Patterns](http://blog.jonathanoliver.com/idempotency-patterns/) on Jonathan Oliver’s blog. + +- **Repeated messages**. If a filter in a pipeline fails after posting a message to the next stage of the pipeline, another instance of the filter might be run, and it'll post a copy of the same message to the pipeline. This could cause two instances of the same message to be passed to the next filter. To avoid this, the pipeline should detect and eliminate duplicate messages. + + > If you're implementing the pipeline by using message queues (such as Microsoft Azure Service Bus queues), the message queuing infrastructure might provide automatic duplicate message detection and removal. + +- **Context and state**. In a pipeline, each filter essentially runs in isolation and shouldn't make any assumptions about how it was invoked. This means that each filter should be provided with sufficient context to perform its work. This context could include a large amount of state information. + +## When to use this pattern + +Use this pattern when: +- The processing required by an application can easily be broken down into a set of independent steps. + +- The processing steps performed by an application have different scalability requirements. + + > It's possible to group filters that should scale together in the same process. For more information, see the [Compute Resource Consolidation pattern](compute-resource-consolidation.md). + +- Flexibility is required to allow reordering of the processing steps performed by an application, or the capability to add and remove steps. + +- The system can benefit from distributing the processing for steps across different servers. + +- A reliable solution is required that minimizes the effects of failure in a step while data is being processed. + +This pattern might not be useful when: +- The processing steps performed by an application aren't independent, or they have to be performed together as part of the same transaction. + +- The amount of context or state information required by a step makes this approach inefficient. It might be possible to persist state information to a database instead, but don't use this strategy if the additional load on the database causes excessive contention. + +## Example + +You can use a sequence of message queues to provide the infrastructure required to implement a pipeline. An initial message queue receives unprocessed messages. A component implemented as a filter task listens for a message on this queue, performs its work, and then posts the transformed message to the next queue in the sequence. Another filter task can listen for messages on this queue, process them, post the results to another queue, and so on until the fully transformed data appears in the final message in the queue. The next figure illustrates implementing a pipeline using message queues. + +![Figure 4 - Implementing a pipeline using message queues](./_images/pipes-and-filters-message-queues.png) + + +If you're building a solution on Azure you can use Service Bus queues to provide a reliable and scalable queuing mechanism. The `ServiceBusPipeFilter` class shown below in C# demonstrates how you can implement a filter that receives input messages from a queue, processes these messages, and posts the results to another queue. + +> The `ServiceBusPipeFilter` class is defined in the PipesAndFilters.Shared project available from [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/pipes-and-filters). + +```csharp +public class ServiceBusPipeFilter +{ + ... + private readonly string inQueuePath; + private readonly string outQueuePath; + ... + private QueueClient inQueue; + private QueueClient outQueue; + ... + + public ServiceBusPipeFilter(..., string inQueuePath, string outQueuePath = null) + { + ... + this.inQueuePath = inQueuePath; + this.outQueuePath = outQueuePath; + } + + public void Start() + { + ... + // Create the outbound filter queue if it doesn't exist. + ... + this.outQueue = QueueClient.CreateFromConnectionString(...); + + ... + // Create the inbound and outbound queue clients. + this.inQueue = QueueClient.CreateFromConnectionString(...); + } + + public void OnPipeFilterMessageAsync( + Func> asyncFilterTask, ...) + { + ... + + this.inQueue.OnMessageAsync( + async (msg) => + { + ... + // Process the filter and send the output to the + // next queue in the pipeline. + var outMessage = await asyncFilterTask(msg); + + // Send the message from the filter processor + // to the next queue in the pipeline. + if (outQueue != null) + { + await outQueue.SendAsync(outMessage); + } + + // Note: There's a chance that the same message could be sent twice + // or that a message gets processed by an upstream or downstream + // filter at the same time. + // This would happen in a situation where processing of a message was + // completed, it was sent to the next pipe/queue, and then failed + // to complete when using the PeekLock method. + // Idempotent message processing and concurrency should be considered + // in a real-world implementation. + }, + options); + } + + public async Task Close(TimeSpan timespan) + { + // Pause the processing threads. + this.pauseProcessingEvent.Reset(); + + // There's no clean approach for waiting for the threads to complete + // the processing. This example simply stops any new processing, waits + // for the existing thread to complete, then closes the message pump + // and finally returns. + Thread.Sleep(timespan); + + this.inQueue.Close(); + ... + } + + ... +} +``` + +The `Start` method in the `ServiceBusPipeFilter` class connects to a pair of input and output queues, and the `Close` method disconnects from the input queue. The `OnPipeFilterMessageAsync` method performs the actual processing of messages, the `asyncFilterTask` parameter to this method specifies the processing to be performed. The `OnPipeFilterMessageAsync` method waits for incoming messages on the input queue, runs the code specified by the `asyncFilterTask` parameter over each message as it arrives, and posts the results to the output queue. The queues themselves are specified by the constructor. + +The sample solution implements filters in a set of worker roles. Each worker role can be scaled independently, depending on the complexity of the business processing that it performs or the resources required for processing. Additionally, multiple instances of each worker role can be run in parallel to improve throughput. + +The following code shows an Azure worker role named `PipeFilterARoleEntry`, defined in the PipeFilterA project in the sample solution. + +```csharp +public class PipeFilterARoleEntry : RoleEntryPoint +{ + ... + private ServiceBusPipeFilter pipeFilterA; + + public override bool OnStart() + { + ... + this.pipeFilterA = new ServiceBusPipeFilter( + ..., + Constants.QueueAPath, + Constants.QueueBPath); + + this.pipeFilterA.Start(); + ... + } + + public override void Run() + { + this.pipeFilterA.OnPipeFilterMessageAsync(async (msg) => + { + // Clone the message and update it. + // Properties set by the broker (Deliver count, enqueue time, ...) + // aren't cloned and must be copied over if required. + var newMsg = msg.Clone(); + + await Task.Delay(500); // DOING WORK + + Trace.TraceInformation("Filter A processed message:{0} at {1}", + msg.MessageId, DateTime.UtcNow); + + newMsg.Properties.Add(Constants.FilterAMessageKey, "Complete"); + + return newMsg; + }); + + ... + } + + ... +} +``` + +This role contains a `ServiceBusPipeFilter` object. The `OnStart` method in the role connects to the queues for receiving input messages and posting output messages (the names of the queues are defined in the `Constants` class). The `Run` method invokes the `OnPipeFilterMessagesAsync` method to perform some processing on each message that's received (in this example, the processing is simulated by waiting for a short period of time). When processing is complete, a new message is constructed containing the results (in this case, the input message has a custom property added), and this message is posted to the output queue. + +The sample code contains another worker role named `PipeFilterBRoleEntry` in the PipeFilterB project. This role is similar to `PipeFilterARoleEntry` except that it performs different processing in the `Run` method. In the example solution, these two roles are combined to construct a pipeline, the output queue for the `PipeFilterARoleEntry` role is the input queue for the `PipeFilterBRoleEntry` role. + +The sample solution also provides two additional roles named `InitialSenderRoleEntry` (in the InitialSender project) and `FinalReceiverRoleEntry` (in the FinalReceiver project). The `InitialSenderRoleEntry` role provides the initial message in the pipeline. The `OnStart` method connects to a single queue and the `Run` method posts a method to this queue. This queue is the input queue used by the `PipeFilterARoleEntry` role, so posting a message to it causes the message to be received and processed by the `PipeFilterARoleEntry` role. The processed message then passes through the `PipeFilterBRoleEntry` role. + +The input queue for the `FinalReceiveRoleEntry` role is the output queue for the `PipeFilterBRoleEntry` role. The `Run` method in the `FinalReceiveRoleEntry` role, shown below, receives the message and performs some final processing. Then it writes the values of the custom properties added by the filters in the pipeline to the trace output. + +```csharp +public class FinalReceiverRoleEntry : RoleEntryPoint +{ + ... + // Final queue/pipe in the pipeline to process data from. + private ServiceBusPipeFilter queueFinal; + + public override bool OnStart() + { + ... + // Set up the queue. + this.queueFinal = new ServiceBusPipeFilter(...,Constants.QueueFinalPath); + this.queueFinal.Start(); + ... + } + + public override void Run() + { + this.queueFinal.OnPipeFilterMessageAsync( + async (msg) => + { + await Task.Delay(500); // DOING WORK + + // The pipeline message was received. + Trace.TraceInformation( + "Pipeline Message Complete - FilterA:{0} FilterB:{1}", + msg.Properties[Constants.FilterAMessageKey], + msg.Properties[Constants.FilterBMessageKey]); + + return null; + }); + ... + } + + ... +} +``` + +##Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: +- A sample that demonstrates this pattern is available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/pipes-and-filters). +- [Competing Consumers pattern](competing-consumers.md). A pipeline can contain multiple instances of one or more filters. This approach is useful for running parallel instances of slow filters, enabling the system to spread the load and improve throughput. Each instance of a filter will compete for input with the other instances, two instances of a filter shouldn't be able to process the same data. Provides an explanation of this approach. +- [Compute Resource Consolidation pattern](compute-resource-consolidation.md). It might be possible to group filters that should scale together into the same process. Provides more information about the benefits and tradeoffs of this strategy. +- [Compensating Transaction pattern](compensating-transaction.md). A filter can be implemented as an operation that can be reversed, or that has a compensating operation that restores the state to a previous version in the event of a failure. Explains how this can be implemented to maintain or achieve eventual consistency. +- [Idempotency Patterns](http://blog.jonathanoliver.com/idempotency-patterns/) on Jonathan Oliver’s blog. diff --git a/docs/patterns/priority-queue.md b/docs/patterns/priority-queue.md new file mode 100644 index 00000000000..09c79c179ee --- /dev/null +++ b/docs/patterns/priority-queue.md @@ -0,0 +1,181 @@ +--- +title: Priority Queue +description: Prioritize requests sent to services so that requests with a higher priority are received and processed more quickly than those with a lower priority. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [messaging, performance-scalability] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Priority Queue + +[!INCLUDE [header](../_includes/header.md)] + +Prioritize requests sent to services so that requests with a higher priority are received and processed more quickly than those with a lower priority. This pattern is useful in applications that offer different service level guarantees to individual clients. + +## Context and Problem + +Applications can delegate specific tasks to other services, for example, to perform background processing or to integrate with other applications or services. In the cloud, a message queue is typically used to delegate tasks to background processing. In many cases the order requests are received in by a service isn't important. In some cases, though, it's necessary to prioritize specific requests. These requests should be processed earlier than lower priority requests that were sent previously by the application. + +## Solution + +A queue is usually a first-in, first-out (FIFO) structure, and consumers typically receive messages in the same order that they were posted to the queue. However, some message queues support priority messaging. The application posting a message can assign a priority and the messages in the queue are automatically reordered so that those with a higher priority will be received before those with a lower priority. The figure illustrates a queue with priority messaging. + +![Figure 1 - Using a queuing mechanism that supports message prioritization](./_images/priority-queue-pattern.png) + +> Most message queue implementations support multiple consumers (following the [Competing Consumers pattern](https://msdn.microsoft.com/library/dn568101.aspx)), and the number of consumer processes can be scaled up or down depending on demand. + +In systems that don't support priority-based message queues, an alternative solution is to maintain a separate queue for each priority. The application is responsible for posting messages to the appropriate queue. Each queue can have a separate pool of consumers. Higher priority queues can have a larger pool of consumers running on faster hardware than lower priority queues. The next figure illustrates using separate message queues for each priority. + +![Figure 2 - Using separate message queues for each priority](./_images/priority-queue-separate.png) + + +A variation on this strategy is to have a single pool of consumers that check for messages on high priority queues first, and only then start to fetch messages from lower priority queues. There are some semantic differences between a solution that uses a single pool of consumer processes (either with a single queue that supports messages with different priorities or with multiple queues that each handle messages of a single priority), and a solution that uses multiple queues with a separate pool for each queue. + +In the single pool approach, higher priority messages are always received and processed before lower priority messages. In theory, messages that have a very low priority could be continually superseded and might never be processed. In the multiple pool approach, lower priority messages will always be processed, just not as quickly as those of a higher priority (depending on the relative size of the pools and the resources that they have available). + +Using a priority queuing mechanism can provide the following advantages: + +- It allows applications to meet business requirements that require prioritization of availability or performance, such as offering different levels of service to specific groups of customers. + +- It can help to minimize operational costs. In the single queue approach, you can scale back the number of consumers if necessary. High priority messages will still be processed first (although possibly more slowly), and lower priority messages might be delayed for longer. If you've implemented the multiple message queue approach with separate pools of consumers for each queue, you can reduce the pool of consumers for lower priority queues, or even suspend processing for some very low priority queues by stopping all the consumers that listen for messages on those queues. + +- The multiple message queue approach can help maximize application performance and scalability by partitioning messages based on processing requirements. For example, vital tasks can be prioritized to be handled by receivers that run immediately while less important background tasks can be handled by receivers that are scheduled to run at less busy periods. + +## Issues and Considerations + +Consider the following points when deciding how to implement this pattern: + +Define the priorities in the context of the solution. For example, high priority could mean that messages should be processed within ten seconds. Identify the requirements for handling high priority items, and the other resources that should be allocated to meet these criteria. + +Decide if all high priority items must be processed before any lower priority items. If the messages are being processed by a single pool of consumers, you have to provide a mechanism that can preempt and suspend a task that's handling a low priority message if a higher priority message becomes available. + +In the multiple queue approach, when using a single pool of consumer processes that listen on all queues rather than a dedicated consumer pool for each queue, the consumer must apply an algorithm that ensures it always services messages from higher priority queues before those from lower priority queues. + +Monitor the processing speed on high and low priority queues to ensure that messages in these queues are processed at the expected rates. + +If you need to guarantee that low priority messages will be processed, it's necessary to implement the multiple message queue approach with multiple pools of consumers. Alternatively, in a queue that supports message prioritization, it's possible to dynamically increase the priority of a queued message as it ages. However, this approach depends on the message queue providing this feature. + +Using a separate queue for each message priority works best for systems that have a small number of well-defined priorities. + +Message priorities can be determined logically by the system. For example, rather than having explicit high and low priority messages, they could be designated as “fee paying customer,” or “non-fee paying customer.” Depending on your business model, your system can allocate more resources to processing messages from fee paying customers than non-fee paying ones. + +There might be a financial and processing cost associated with checking a queue for a message (some commercial messaging systems charge a small fee each time a message is posted or retrieved, and each time a queue is queried for messages). This cost increases when checking multiple queues. + +It's possible to dynamically adjust the size of a pool of consumers based on the length of the queue that the pool is servicing. For more information, see the [Autoscaling Guidance](https://msdn.microsoft.com/library/dn589774.aspx). + +## When to use this pattern + +This pattern is useful in scenarios where: + +- The system must handle multiple tasks that have different priorities. + +- Different users or tenants should be served with different priority. + +## Example + +Microsoft Azure doesn't provide a queuing mechanism that natively supports automatic prioritization of messages through sorting. However, it does provide Azure Service Bus topics and subscriptions that support a queuing mechanism that provides message filtering, together with a wide range of flexible capabilities that make it ideal for use in most priority queue implementations. + +An Azure solution can implement a Service Bus topic an application can post messages to, in the same way as a queue. Messages can contain metadata in the form of application-defined custom properties. Service Bus subscriptions can be associated with the topic, and these subscriptions can filter messages based on their properties. When an application sends a message to a topic, the message is directed to the appropriate subscription where it can be read by a consumer. Consumer processes can retrieve messages from a subscription using the same semantics as a message queue (a subscription is a logical queue). The following figure illustrates implementing a priority queue with Azure Service Bus topics and subscriptions. + +![Figure 3 - Implementing a priority queue with Azure Service Bus topics and subscriptions](./_images/priority-queue-service-bus.png) + + +In the figure above, the application creates several messages and assigns a custom property called `Priority` in each message with a value, either `High` or `Low`. The application posts these messages to a topic. The topic has two associated subscriptions that both filter messages by examining the `Priority` property. One subscription accepts messages where the `Priority` property is set to `High`, and the other accepts messages where the `Priority` property is set to `Low`. A pool of consumers reads messages from each subscription. The high priority subscription has a larger pool, and these consumers might be running on more powerful computers with more resources available than the consumers in the low priority pool. + +Note that there's nothing special about the designation of high and low priority messages in this example. They're simply labels specified as properties in each message, and are used to direct messages to a specific subscription. If additional priorities are required, it's relatively easy to create further subscriptions and pools of consumer processes to handle these priorities. + +The PriorityQueue solution available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/priority-queue) contains an implementation of this approach. This solution contains two worker role projects named `PriorityQueue.High` and `PriorityQueue.Low`. These worker roles inherit from the `PriorityWorkerRole` class that contains the functionality for connecting to a specified subscription in the `OnStart` method. + +The `PriorityQueue.High` and `PriorityQueue.Low` worker roles connect to different subscriptions, defined by their configuration settings. An administrator can configure different numbers of each role to be run. Typically there'll be more instances of the `PriorityQueue.High` worker role than the `PriorityQueue.Low` worker role. + +The `Run` method in the `PriorityWorkerRole` class arranges for the virtual `ProcessMessage` method (also defined in the `PriorityWorkerRole` class) to be run for each message received on the queue. The following code shows the `Run` and `ProcessMessage` methods. The `QueueManager` class, defined in the PriorityQueue.Shared project, provides helper methods for using Azure Service Bus queues. + +```csharp +public class PriorityWorkerRole : RoleEntryPoint +{ + private QueueManager queueManager; + ... + + public override void Run() + { + // Start listening for messages on the subscription. + var subscriptionName = CloudConfigurationManager.GetSetting("SubscriptionName"); + this.queueManager.ReceiveMessages(subscriptionName, this.ProcessMessage); + ...; + } + ... + + protected virtual async Task ProcessMessage(BrokeredMessage message) + { + // Simulating processing. + await Task.Delay(TimeSpan.FromSeconds(2)); + } +} +``` +The `PriorityQueue.High` and `PriorityQueue.Low` worker roles both override the default functionality of the `ProcessMessage` method. The code below shows the `ProcessMessage` method for the `PriorityQueue.High` worker role. + +```csharp +protected override async Task ProcessMessage(BrokeredMessage message) +{ + // Simulate message processing for High priority messages. + await base.ProcessMessage(message); + Trace.TraceInformation("High priority message processed by " + + RoleEnvironment.CurrentRoleInstance.Id + " MessageId: " + message.MessageId); +} +``` + +When an application posts messages to the topic associated with the subscriptions used by the `PriorityQueue.High` and `PriorityQueue.Low` worker roles, it specifies the priority by using the `Priority` custom property, as shown in the following code example. This code (implemented in the `WorkerRole` class in the PriorityQueue.Sender project), uses the `SendBatchAsync` helper method of the `QueueManager` class to post messages to a topic in batches. + +```csharp +// Send a low priority batch. +var lowMessages = new List(); + +for (int i = 0; i < 10; i++) +{ + var message = new BrokeredMessage() { MessageId = Guid.NewGuid().ToString() }; + message.Properties["Priority"] = Priority.Low; + lowMessages.Add(message); +} + +this.queueManager.SendBatchAsync(lowMessages).Wait(); +... + +// Send a high priority batch. +var highMessages = new List(); + +for (int i = 0; i < 10; i++) +{ + var message = new BrokeredMessage() { MessageId = Guid.NewGuid().ToString() }; + message.Properties["Priority"] = Priority.High; + highMessages.Add(message); +} + +this.queueManager.SendBatchAsync(highMessages).Wait(); +``` + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: + +- A sample that demonstrates this pattern is available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/priority-queue). + +- [Asynchronous Messaging Primer](https://msdn.microsoft.com/library/dn589781.aspx). A consumer service that processes a request might need to send a reply to the instance of the application that posted the request. Provides information on the strategies that you can use to implement request/response messaging. + +- [Competing Consumers pattern](competing-consumers.md). To increase the throughput of the queues, it’s possible to have multiple consumers that listen on the same queue, and process the tasks in parallel. These consumers will compete for messages, but only one should be able to process each message. Provides more information on the benefits and tradeoffs of implementing this approach. + +- [Throttling pattern](throttling.md). You can implement throttling by using queues. Priority messaging can be used to ensure that requests from critical applications, or applications being run by high-value customers, are given priority over requests from less important applications. + +- [Autoscaling Guidance](https://msdn.microsoft.com/library/dn589774.aspx). It might be possible to scale the size of the pool of consumer processes handling a queue depending on the length of the queue. This strategy can help to improve performance, especially for pools handling high priority messages. + +- [Enterprise Integration Patterns with Service Bus](http://abhishekrlal.com/2013/01/11/enterprise-integration-patterns-with-service-bus-part-2/) on Abhishek Lal’s blog. + diff --git a/docs/patterns/queue-based-load-leveling.md b/docs/patterns/queue-based-load-leveling.md new file mode 100644 index 00000000000..0c96082bc13 --- /dev/null +++ b/docs/patterns/queue-based-load-leveling.md @@ -0,0 +1,81 @@ +--- +title: Queue-Based Load Leveling +description: Use a queue that acts as a buffer between a task and a service that it invokes in order to smooth intermittent heavy loads. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [messaging, availability, performance-scalability, resiliency] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Queue-Based Load Leveling + +[!INCLUDE [header](../_includes/header.md)] + +Use a queue that acts as a buffer between a task and a service it invokes in order to smooth intermittent heavy loads that can cause the service to fail or the task to time out. This can help to minimize the impact of peaks in demand on availability and responsiveness for both the task and the service. + +## Context and problem + +Many solutions in the cloud involve running tasks that invoke services. In this environment, if a service is subjected to intermittent heavy loads, it can cause performance or reliability issues. + +A service could be part of the same solution as the tasks that use it, or it could be a third-party service providing access to frequently used resources such as a cache or a storage service. If the same service is used by a number of tasks running concurrently, it can be difficult to predict the volume of requests to the service at any time. + +A service might experience peaks in demand that cause it to overload and be unable to respond to requests in a timely manner. Flooding a service with a large number of concurrent requests can also result in the service failing if it's unable to handle the contention these requests cause. + +## Solution + +Refactor the solution and introduce a queue between the task and the service. The task and the service run asynchronously. The task posts a message containing the data required by the service to a queue. The queue acts as a buffer, storing the message until it's retrieved by the service. The service retrieves the messages from the queue and processes them. Requests from a number of tasks, which can be generated at a highly variable rate, can be passed to the service through the same message queue. This figure shows using a queue to level the load on a service. + +![Figure 1 - Using a queue to level the load on a service](./_images/queue-based-load-leveling-pattern.png) + +The queue decouples the tasks from the service, and the service can handle the messages at its own pace regardless of the volume of requests from concurrent tasks. Additionally, there's no delay to a task if the service isn't available at the time it posts a message to the queue. + +This pattern provides the following benefits: + +- It can help to maximize availability because delays arising in services won't have an immediate and direct impact on the application, which can continue to post messages to the queue even when the service isn't available or isn't currently processing messages. +- It can help to maximize scalability because both the number of queues and the number of services can be varied to meet demand. +- It can help to control costs because the number of service instances deployed only have to be adequate to meet average load rather than the peak load. + + > Some services implement throttling when demand reaches a threshold beyond which the system could fail. Throttling can reduce the functionality available. You can implement load leveling with these services to ensure that this threshold isn't reached. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +- It's necessary to implement application logic that controls the rate at which services handle messages to avoid overwhelming the target resource. Avoid passing spikes in demand to the next stage of the system. Test the system under load to ensure that it provides the required leveling, and adjust the number of queues and the number of service instances that handle messages to achieve this. +- Message queues are a one-way communication mechanism. If a task expects a reply from a service, it might be necessary to implement a mechanism that the service can use to send a response. For more information, see the [Asynchronous Messaging Primer](https://msdn.microsoft.com/library/dn589781.aspx). +- Be careful if you apply autoscaling to services that are listening for requests on the queue. This can result in increased contention for any resources that these services share and diminish the effectiveness of using the queue to level the load. + +## When to use this pattern + +This pattern is useful to any application that uses services that are subject to overloading. + +This pattern isn't useful if the application expects a response from the service with minimal latency. + +## Example + +A Microsoft Azure web role stores data using a separate storage service. If a large number of instances of the web role run concurrently, it's possible that the storage service will be unable to respond to requests quickly enough to prevent these requests from timing out or failing. This figure highlights a service being overwhelmed by a large number of concurrent requests from instances of a web role. + +![Figure 2 - A service being overwhelmed by a large number of concurrent requests from instances of a web role](./_images/queue-based-load-leveling-overwhelmed.png) + + +To resolve this, you can use a queue to level the load between the web role instances and the storage service. However, the storage service is designed to accept synchronous requests and can't be easily modified to read messages and manage throughput. You can introduce a worker role to act as a proxy service that receives requests from the queue and forwards them to the storage service. The application logic in the worker role can control the rate at which it passes requests to the storage service to prevent the storage service from being overwhelmed. This figure illustrates sing a queue and a worker role to level the load between instances of the web role and the service. + +![Figure 3 - Using a queue and a worker role to level the load between instances of the web role and the service](./_images/queue-based-load-leveling-worker-role.png) + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: + +- [Asynchronous Messaging Primer](https://msdn.microsoft.com/library/dn589781.aspx). Message queues are inherently asynchronous. It might be necessary to redesign the application logic in a task if it's adapted from communicating directly with a service to using a message queue. Similarly, it might be necessary to refactor a service to accept requests from a message queue. Alternatively, it might be possible to implement a proxy service, as described in the example. +- [Competing Consumers pattern](competing-consumers.md). It might be possible to run multiple instances of a service, each acting as a message consumer from the load-leveling queue. You can use this approach to adjust the rate at which messages are received and passed to a service. +- [Throttling pattern](throttling.md). A simple way to implement throttling with a service is to use queue-based load leveling and route all requests to a service through a message queue. The service can process requests at a rate that ensures that resources required by the service aren't exhausted, and to reduce the amount of contention that could occur. +- [Queue Service Concepts](https://msdn.microsoft.com/library/azure/dd179353.aspx). Information about choosing a messaging and queuing mechanism in Azure applications. diff --git a/docs/patterns/retry.md b/docs/patterns/retry.md new file mode 100644 index 00000000000..359237442b1 --- /dev/null +++ b/docs/patterns/retry.md @@ -0,0 +1,180 @@ +--- +title: Retry +description: Enable an application to handle anticipated, temporary failures when it tries to connect to a service or network resource by transparently retrying an operation that's previously failed. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [resiliency] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Retry + +[!INCLUDE [header](../_includes/header.md)] + +Enable an application to handle transient failures when it tries to connect to a service or network resource, by transparently retrying a failed operation. This can improve the stability of the application. + +## Context and problem + +An application that communicates with elements running in the cloud has to be sensitive to the transient faults that can occur in this environment. Faults include the momentary loss of network connectivity to components and services, the temporary unavailability of a service, or timeouts that occur when a service is busy. + +These faults are typically self-correcting, and if the action that triggered a fault is repeated after a suitable delay it's likely to be successful. For example, a database service that's processing a large number of concurrent requests can implement a throttling strategy that temporarily rejects any further requests until its workload has eased. An application trying to access the database might fail to connect, but if it tries again after a delay it might succeed. + +## Solution + +In the cloud, transient faults aren't uncommon and an application should be designed to handle them elegantly and transparently. This minimizes the effects faults can have on the business tasks the application is performing. + +If an application detects a failure when it tries to send a request to a remote service, it can handle the failure using the following strategies: + +- **Cancel**. If the fault indicates that the failure isn't transient or is unlikely to be successful if repeated, the application should cancel the operation and report an exception. For example, an authentication failure caused by providing invalid credentials is not likely to succeed no matter how many times it's attempted. + +- **Retry**. If the specific fault reported is unusual or rare, it might have been caused by unusual circumstances such as a network packet becoming corrupted while it was being transmitted. In this case, the application could retry the failing request again immediately because the same failure is unlikely to be repeated and the request will probably be successful. + +- **Retry after delay.** If the fault is caused by one of the more commonplace connectivity or busy failures, the network or service might need a short period while the connectivity issues are corrected or the backlog of work is cleared. The application should wait for a suitable time before retrying the request. + +For the more common transient failures, the period between retries should be chosen to spread requests from multiple instances of the application as evenly as possible. This reduces the chance of a busy service continuing to be overloaded. If many instances of an application are continually overwhelming a service with retry requests, it'll take the service longer to recover. + +If the request still fails, the application can wait and make another attempt. If necessary, this process can be repeated with increasing delays between retry attempts, until some maximum number of requests have been attempted. The delay can be increased incrementally or exponentially, depending on the type of failure and the probability that it'll be corrected during this time. + +The following diagram illustrates invoking an operation in a hosted service using this pattern. If the request is unsuccessful after a predefined number of attempts, the application should treat the fault as an exception and handle it accordingly. + +![Figure 1 - Invoking an operation in a hosted service using the Retry pattern](./_images/retry-pattern.png) + +The application should wrap all attempts to access a remote service in code that implements a retry policy matching one of the strategies listed above. Requests sent to different services can be subject to different policies. Some vendors provide libraries that implement retry policies, where the application can specify the maximum number of retries, the time between retry attempts, and other parameters. + +An application should log the details of faults and failing operations. This information is useful to operators. If a service is frequently unavailable or busy, it's often because the service has exhausted its resources. You can reduce the frequency of these faults by scaling out the service. For example, if a database service is continually overloaded, it might be beneficial to partition the database and spread the load across multiple servers. + +> Microsoft Azure provides extensive support for the Retry pattern. The patterns & practices [Transient Fault Handling Block](https://msdn.microsoft.com/library/hh680934.aspx) enables an application to handle transient faults in many Azure services using a range of retry strategies. The [Microsoft Entity Framework version 6](https://msdn.microsoft.com/en-us/data/dn456835.aspx) provides facilities for retrying database operations. Additionally, many of the Azure Service Bus and Azure Storage APIs implement retry logic transparently. + +## Issues and considerations + +You should consider the following points when deciding how to implement this pattern. + +The retry policy should be tuned to match the business requirements of the application and the nature of the failure. For some noncritical operations, it's better to fail fast rather than retry several times and impact the throughput of the application. For example, in an interactive web application accessing a remote service, it's better to fail after a smaller number of retries with only a short delay between retry attempts, and display a suitable message to the user (for example, “please try again later”). For a batch application, it might be more appropriate to increase the number of retry attempts with an exponentially increasing delay between attempts. + +An aggressive retry policy with minimal delay between attempts, and a large number of retries, could further degrade a busy service that's running close to or at capacity. This retry policy could also affect the responsiveness of the application if it's continually trying to perform a failing operation. + +If a request still fails after a significant number of retries, it's better for the application to prevent further requests going to the same resource and simply report a failure immediately. When the period expires, the application can tentatively allow one or more requests through to see whether they're successful. For more details of this strategy, see the [Circuit Breaker pattern](circuit-breaker.md). + +Consider whether the operation is idempotent. If so, it's inherently safe to retry. Otherwise, retries could cause the operation to be executed more than once, with unintended side effects. For example, a service might receive the request, process the request successfully, but fail to send a response. At that point, the retry logic might re-send the request, assuming that the first request wasn't received. + +A request to a service can fail for a variety of reasons raising different exceptions depending on the nature of the failure. Some exceptions indicate a failure that can be resolved quickly, while others indicate that the failure is longer lasting. It's useful for the retry policy to adjust the time between retry attempts based on the type of the exception. + +Consider how retrying an operation that's part of a transaction will affect the overall transaction consistency. Fine tune the retry policy for transactional operations to maximize the chance of success and reduce the need to undo all the transaction steps. + +Ensure that all retry code is fully tested against a variety of failure conditions. Check that it doesn't severely impact the performance or reliability of the application, cause excessive load on services and resources, or generate race conditions or bottlenecks. + +Implement retry logic only where the full context of a failing operation is understood. For example, if a task that contains a retry policy invokes another task that also contains a retry policy, this extra layer of retries can add long delays to the processing. It might be better to configure the lower-level task to fail fast and report the reason for the failure back to the task that invoked it. This higher-level task can then handle the failure based on its own policy. + +It's important to log all connectivity failures that cause a retry so that underlying problems with the application, services, or resources can be identified. + +Investigate the faults that are most likely to occur for a service or a resource to discover if they're likely to be long lasting or terminal. If they are, it's better to handle the fault as an exception. The application can report or log the exception, and then try to continue either by invoking an alternative service (if one is available), or by offering degraded functionality. For more information on how to detect and handle long-lasting faults, see the [Circuit Breaker pattern](circuit-breaker.md). + +## When to use this pattern + +Use this pattern when an application could experience transient faults as it interacts with a remote service or accesses a remote resource. These faults are expected to be short lived, and repeating a request that has previously failed could succeed on a subsequent attempt. + +This pattern might not be useful: + +- When a fault is likely to be long lasting, because this can affect the responsiveness of an application. The application might be wasting time and resources trying to repeat a request that's likely to fail. +- For handling failures that aren't due to transient faults, such as internal exceptions caused by errors in the business logic of an application. +- As an alternative to addressing scalability issues in a system. If an application experiences frequent busy faults, it's often a sign that the service or resource being accessed should be scaled up. + +## Example + +This example in C# illustrates an implementation of the Retry pattern. The `OperationWithBasicRetryAsync` method, shown below, invokes an external service asynchronously through the `TransientOperationAsync` method (the details of this method will be specific to the service and are omitted from the sample code). + +```csharp +private int retryCount = 3; +... + +public async Task OperationWithBasicRetryAsync() +{ + int currentRetry = 0; + + for (; ;) + { + try + { + // Calling external service. + await TransientOperationAsync(); + + // Return or break. + break; + } + catch (Exception ex) + { + Trace.TraceError("Operation Exception"); + + currentRetry++; + + // Check if the exception thrown was a transient exception + // based on the logic in the error detection strategy. + // Determine whether to retry the operation, as well as how + // long to wait, based on the retry strategy. + if (currentRetry > this.retryCount || !IsTransient(ex)) + { + // If this isn't a transient error + // or we shouldn't retry, rethrow the exception. + throw; + } + } + + // Wait to retry the operation. + // Consider calculating an exponential delay here and + // using a strategy best suited for the operation and fault. + Await.Task.Delay(); + } +} + +// Async method that wraps a call to a remote service (details not shown). +private async Task TransientOperationAsync() +{ + ... +} +``` + +The statement that invokes this method is contained in a try/catch block wrapped in a for loop. The for loop exits if the call to the `TransientOperationAsync` method succeeds without throwing an exception. If the `TransientOperationAsync` method fails, the catch block examines the reason for the failure. If it's believed to be a transient error the code waits for a short delay before retrying the operation. + +The for loop also tracks the number of times that the operation has been attempted, and if the code fails three times the exception is assumed to be more long lasting. If the exception isn't transient or it's long lasting, the catch handler throws an exception. This exception exits the for loop and should be caught by the code that invokes the `OperationWithBasicRetryAsync` method. + +The `IsTransient` method, shown below, checks for a specific set of exceptions that are relevant to the environment the code is run in. The definition of a transient exception will vary according to the resources being accessed and the environment the operation is being performed in. + +```csharp +private bool IsTransient(Exception ex) +{ + // Determine if the exception is transient. + // In some cases this is as simple as checking the exception type, in other + // cases it might be necessary to inspect other properties of the exception. + if (ex is OperationTransientException) + return true; + + var webException = ex as WebException; + if (webException != null) + { + // If the web exception contains one of the following status values + // it might be transient. + return new[] {WebExceptionStatus.ConnectionClosed, + WebExceptionStatus.Timeout, + WebExceptionStatus.RequestCanceled }. + Contains(webException.Status); + } + + // Additional exception checking logic goes here. + return false; +} +``` + +## Related patterns and guidance + +- [Circuit Breaker pattern](circuit-breaker.md). The Retry pattern is useful for handling transient faults. If a failure is expected to be more long lasting, it might be more appropriate to implement the Circuit Breaker pattern. The Retry pattern can also be used in conjunction with a circuit breaker to provide a comprehensive approach to handling faults. +- [Transient Fault Handling Application Block](https://msdn.microsoft.com/library/hh680934.aspx). +- [Connection Resiliency / Retry Logic](https://msdn.microsoft.com/en-us/data/dn456835.aspx) diff --git a/docs/patterns/runtime-reconfiguration.md b/docs/patterns/runtime-reconfiguration.md new file mode 100644 index 00000000000..7def84d1df6 --- /dev/null +++ b/docs/patterns/runtime-reconfiguration.md @@ -0,0 +1,199 @@ +--- +title: Runtime Reconfiguration +description: Design an application so that it can be reconfigured without requiring redeployment or restarting the application. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [design-implementation, management-monitoring] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Runtime Reconfiguration + +[!INCLUDE [header](../_includes/header.md)] + +Design an application so that it can be reconfigured without requiring redeployment or restarting the application. This helps to maintain availability and minimize downtime. + +## Context and problem + +A primary goal for applications such as commercial and business websites is to minimize downtime and interruption to customers and users. Sometimes it's necessary to reconfigure the application to change specific behavior or settings while it's deployed and in use. Therefore, it's an advantage for the application to be designed to allow these configuration changes to be applied while it's running, and for the components of the application to detect the changes and apply them as soon as possible. + +Examples of the kinds of configuration changes to be applied might be adjusting the granularity of logging to assist in debugging a problem with the application, swapping connection strings to use a different data store, or turning on or off specific sections or functionality of the application. + +## Solution + +The solution for implementing this pattern depends on the features available in the application hosting environment. Typically, the application code will respond to one or more events that are raised by the hosting infrastructure when it detects a change to the application configuration. This is usually the result of uploading a new configuration file, or in response to changes in the configuration through the administration portal or by accessing an API. + +Code that handles the configuration change events can examine the changes and apply them to the components of the application. These components have to detect and react to the changes, and so the values they use will usually be exposed as writable properties or methods that the code in the event handler can set to new values or execute. From this point, the components should use the new values so that the required changes to the application behavior occur. + +If it isn't possible for the components to apply the changes at runtime, it'll be necessary to restart the application so that these changes are applied when the application starts up again. In some hosting environments it's possible to detect these types of changes, and indicate to the environment that the application must be restarted. In other cases it might be necessary to implement code that analyses the setting changes and forces an application restart when necessary. + +The figure shows an overview of this pattern. + +![Figure 1 - A basic overview of this pattern](./_images/runtime-reconfiguration-pattern.png) + + +Most environments expose events raised in response to configuration changes. In those that don't, it will be necessary to have a polling mechanism that regularly checks for changes to the configuration and applies these changes. It might also be necessary to restart the application if the changes can't be applied at runtime. For example, it's possible to compare the date and time of a configuration file at preset intervals, and run code to apply the changes when a newer version is found. Another approach is to incorporate a control in the administration UI of the application, or expose a secured endpoint that can be accessed from outside the application, that executes code that reads and applies the updated configuration. + +Alternatively, the application can react to some other change in the environment. For example, occurrences of a specific runtime error might change the logging configuration to automatically collect additional information, or the code could use the current date to read and apply a theme that reflects the season or a special event. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +The configuration settings must be stored outside of the deployed application so they can be updated without requiring the entire package to be redeployed. Typically, the settings are stored in a configuration file, or in an external repository such as a database or online storage. Access to the runtime configuration mechanism should be strictly controlled, as well as strictly audited when used. + +If the hosting infrastructure doesn't automatically detect configuration change events, and expose these events to the application code, you must implement an alternative mechanism to detect and apply the changes. This can be through a polling mechanism, or by exposing an interactive control or endpoint that initiates the update process. + +If you need to implement a polling mechanism, consider how often checks for updates to the configuration should take place. A long polling interval means that changes might not be applied for some time. A short interval might adversely affect operation by absorbing available compute and I/O resources. + +If there's more than one instance of the application, additional factors should be considered, depending on how changes are detected. If changes are detected automatically through events raised by the hosting infrastructure, they might not be detected by all application instances at the same time. This means that some instances will be using the original configuration for a period while others will use the new settings. If the update is detected through a polling mechanism, this must communicate the change to all instances in order to maintain consistency. + +Some configuration changes require the application to be restarted, or even require the hosting server to be rebooted. You must identify these types of configuration settings and perform the appropriate action for each one. For example, a change that requires the application be restarted might do this automatically, or it might be the responsibility of the administrator to initiate the restart when the application isn't under excessive load and other instances of the application can handle the load. + +Plan for a staged rollout of updates and confirm they're successful, and that the updated application instances are performing correctly, before applying the update to all instances. This can prevent a total outage of the application should an error occur. Where the update requires a restart or a reboot of the application, particularly where the application has a significant start up or warm up time, use a staged rollout to prevent multiple instances being offline at the same time. + +Consider how you'll roll back configuration changes that cause issues, or that result in failure of the application. For example, it should be possible to roll back a change immediately instead of waiting for a polling interval to detect the change. + +Consider how the location of the configuration settings might affect application performance. For example, handle any errors that might occur if the external store is unavailable when the application starts, or when configuration changes are applied. You can do this using a default configuration or by caching the settings locally on the server and reusing these values while retrying access to the remote data store. + +Caching can help to reduce delays if a component needs to repeatedly access configuration settings. However, when the configuration changes, the application code has to invalidate the cached settings, and the component must use the updated settings. + +## When to use this pattern + +This pattern is useful for: + +- Applications that have to avoid all unnecessary downtime, while still being able to apply changes to the application configuration. + +- Environments that expose events raised automatically when the main configuration changes. Typically this is when a new configuration file is detected, or when changes are made to an existing configuration file. + +- Applications where the configuration changes often and the changes can be applied to components without requiring the application to be restarted, or without requiring the hosting server to be rebooted. + +This pattern might not be useful if the runtime components are designed so they can only be configured at initialization time, and the effort of updating those components can't be justified in comparison to restarting the application and enduring a short downtime. + +## Example + +Microsoft Azure Cloud Services roles detect and expose two events that are raised when the hosting environment detects a change to the ServiceConfiguration.cscfg files: + +- **RoleEnvironment.Changing**. This event is raised after a configuration change is detected, but before it's applied to the application. You can handle the event to query the changes and to cancel the runtime reconfiguration. If you cancel the change, the web or worker role will be restarted automatically so that the new configuration is used by the application. +- **RoleEnvironment.Changed**. This event is raised after the application configuration has been applied. You can handle the event to query the changes that were applied. + +When you cancel a change in the `RoleEnvironment.Changing` event you're indicating to Azure that a new setting can't be applied while the application is running, and that it must be restarted in order to use the new value. Effectively, you'll cancel a change only if your application or component can't react to the change at runtime, and requires a restart in order to use the new value. + +> For more information, see [RoleEnvironment.Changing Event](https://msdn.microsoft.com/library/azure/microsoft.windowsazure.serviceruntime.roleenvironment.changing.aspx). + +To handle the `RoleEnvironment.Changing` and `RoleEnvironment.Changed` events you typically add a custom handler to the event. For example, the following code from the `Global.asax.cs` class (in the Runtime Reconfiguration solution available from [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/runtime-reconfiguration)) shows how to add a custom function named `RoleEnvironment_Changed` to the event handler chain. This is from the Global.asax.cs file of the example. + +> The examples for this pattern are in the RuntimeReconfiguration.Web project of the RuntimeReconfiguration solution. + +```csharp +protected void Application_Start(object sender, EventArgs e) +{ + ConfigureFromSetting(CustomSettingName); + RoleEnvironment.Changed += this.RoleEnvironment_Changed; +} +``` + +In a web or worker role you can use similar code in the `OnStart` event handler of the role to handle the `RoleEnvironment.Changing` event. This is from the WebRole.cs file of the example. + +```csharp +public override bool OnStart() +{ + // Add the trace listener. The web role process isn't configured by web.config. + Trace.Listeners.Add(new DiagnosticMonitorTraceListener()); + + RoleEnvironment.Changing += this.RoleEnvironment_Changing; + return base.OnStart(); +} +``` + +Be aware that, in the case of web roles, the `OnStart` event handler runs in a separate process from the web application process. This is why you typically handle the `RoleEnvironment.Changed` event handler in the Global.asax file so that you can update the runtime configuration of your web application, and the `RoleEnvironment.Changing` event in the role. In the case of a worker role, you can subscribe to both the `RoleEnvironment.Changing` and `RoleEnvironment.Changed` events in the `OnStart` event handler. + +> You can store custom configuration settings in the service configuration file, in a custom configuration file, in a database such as Azure SQL Database or SQL Server in a Virtual Machine, or in Azure blob or table storage. You'll need to create code that can access the custom configuration settings and apply these to the application—typically by setting the properties of components in the application. + +For example, the following custom function reads the value of a setting from the Azure service configuration file and then applies it to the current instance of a runtime component named `SomeRuntimeComponent`. This is from the Global.asax.cs file of the example. + +> Some configuration settings, such as those for Windows Identity Framework, can't be stored in the Azure service configuration file and must be in the App.config or Web.config file. + +```csharp +private static void ConfigureFromSetting(string settingName) +{ + var value = RoleEnvironment.GetConfigurationSettingValue(settingName); + SomeRuntimeComponent.Instance.CurrentValue = value; +} +``` + +In Azure, some configuration changes are detected and applied automatically. This includes the configuration of the Azure diagnostics system in the Diagnostics.wadcfg file that specifies the types of information to collect and how to persist the log files. Therefore, it's only necessary to write code that handles the custom settings you add to the service configuration file. Your code should either: + +- Apply the custom settings from an updated configuration to the appropriate components of your application at runtime so that their behavior reflects the new configuration. + +- Cancel the change to tell Azure that the new value can't be applied at runtime, and that the application must be restarted in order for the change to be applied. + +For example, the following code shows how you can use the `RoleEnvironment.Changing` event to cancel the update for all settings except the ones that can be applied at runtime without requiring a restart. This example allows a change to the "CustomSetting" to be applied at runtime without restarting the application. The component that uses this setting will be able to read the new value and change its behavior accordingly at runtime. Any other change to the configuration will automatically cause the web or worker role to restart. + +```csharp +private void RoleEnvironment_Changing(object sender, + RoleEnvironmentChangingEventArgs e) +{ + var changedSettings = e.Changes.OfType() + .Select(c => c.ConfigurationSettingName).ToList(); + Trace.TraceInformation("Changing notification. Settings being changed: " + + string.Join(", ", changedSettings)); + + if (changedSettings + .Any(settingName => !string.Equals(settingName, CustomSettingName, + StringComparison.Ordinal))) + { + Trace.TraceInformation("Cancelling dynamic configuration change (restarting)."); + + // Setting this to true will restart the role gracefully. If Cancel isn't + // set to true, and the change isn't handled by the application, the + // application won't use the new value until it's restarted (either + // manually or for some other reason). + e.Cancel = true; + } + Else + { + Trace.TraceInformation("Handling configuration change without restarting. "); + } +} +``` + +> This approach demonstrates good practice because it ensures that a change to any setting that the application code isn't aware of (and so can't be sure it can be applied at runtime) will cause a restart. If any one of the changes is canceled, the role will be restarted. + +Updates that aren't canceled in the `RoleEnvironment.Changing` event handler can then be detected and applied to the application components after the new configuration has been accepted by the Azure framework. For example, the following code in the `Global.asax` file of the example solution handles the `RoleEnvironment.Changed` event. It examines each configuration setting and, when it finds the “CustomSetting”, calls a function that applies the new setting to the appropriate component in the application. + +```csharp +private void RoleEnvironment_Changed(object sender, + RoleEnvironmentChangedEventArgs e) +{ + Trace.TraceInformation("Updating instance with new configuration settings."); + + foreach (var settingChange in + e.Changes.OfType()) + { + if (string.Equals(settingChange.ConfigurationSettingName, + CustomSettingName, + StringComparison.Ordinal)) + { + // Execute a function to update the configuration of the component. + ConfigureFromSetting(CustomSettingName ); + } + } +} +``` + +Note that if you fail to cancel a configuration change, but don't apply the new value to your application component, then the change won't take effect until the next time that the application is restarted. This can lead to unpredictable behavior, particularly if the hosting role instance is restarted automatically by Azure as part of its regular maintenance operations—at which point the new setting value will be applied. + +## Related patterns and guidance + +- A sample that demonstrates this pattern is available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/runtime-reconfiguration). +- Moving configuration information out of the application deployment package to a centralized location can provide easier management and control of configuration data, and allows sharing of configuration data across applications and application instances. For more information, see [External Configuration Store pattern](external-configuration-store.md). diff --git a/docs/patterns/scheduler-agent-supervisor.md b/docs/patterns/scheduler-agent-supervisor.md new file mode 100644 index 00000000000..048893dd304 --- /dev/null +++ b/docs/patterns/scheduler-agent-supervisor.md @@ -0,0 +1,148 @@ +--- +title: Scheduler Agent Supervisor +description: Coordinate a set of actions across a distributed set of services and other remote resources. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [messaging, resiliency] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Scheduler Agent Supervisor + +[!INCLUDE [header](../_includes/header.md)] + +Coordinate a set of distributed actions as a single operation. If any of the actions fail, try to handle the failures transparently, or else undo the work that was performed, so the entire operation succeeds or fails as a whole. This can add resiliency to a distributed system, by enabling it to recover and retry actions that fail due to transient exceptions, long-lasting faults, and process failures. + +## Context and problem + +An application performs tasks that include a number of steps, some of which might invoke remote services or access remote resources. The individual steps might be independent of each other, but they are orchestrated by the application logic that implements the task. + +Whenever possible, the application should ensure that the task runs to completion and resolve any failures that might occur when accessing remote services or resources. Failures can occur for many reasons. For example, the network might be down, communications could be interrupted, a remote service might be unresponsive or in an unstable state, or a remote resource might be temporarily inaccessible, perhaps due to resource constraints. In many cases the failures will be transient and can be handled by using the [Retry pattern](transient-faults.md). + +If the application detects a more permanent fault it can't easily recover from, it must be able to restore the system to a consistent state and ensure integrity of the entire operation. + +## Solution + +The Scheduler Agent Supervisor pattern defines the following actors. These actors orchestrate the steps to be performed as part of the overall task. + +- The **Scheduler** arranges for the steps that make up the task to be executed and orchestrates their operation. These steps can be combined into a pipeline or workflow. The Scheduler is responsible for ensuring that the steps in this workflow are performed in the right order. As each step is performed, the Scheduler records the state of the workflow, such as "step not yet started," "step running," or "step completed." The state information should also include an upper limit of the time allowed for the step to finish, called the complete-by time. If a step requires access to a remote service or resource, the Scheduler invokes the appropriate Agent, passing it the details of the work to be performed. The Scheduler typically communicates with an Agent using asynchronous request/response messaging. This can be implemented using queues, although other distributed messaging technologies could be used instead. + + > The Scheduler performs a similar function to the Process Manager in the [Process Manager pattern](http://www.enterpriseintegrationpatterns.com/patterns/messaging/ProcessManager.html). The actual workflow is typically defined and implemented by a workflow engine that's controlled by the Scheduler. This approach decouples the business logic in the workflow from the Scheduler. + +- The **Agent** contains logic that encapsulates a call to a remote service, or access to a remote resource referenced by a step in a task. Each Agent typically wraps calls to a single service or resource, implementing the appropriate error handling and retry logic (subject to a timeout constraint, described later). If the steps in the workflow being run by the Scheduler use several services and resources across different steps, each step might reference a different Agent (this is an implementation detail of the pattern). + +- The **Supervisor** monitors the status of the steps in the task being performed by the Scheduler. It runs periodically (the frequency will be system specific), and examines the status of steps maintained by the Scheduler. If it detects any that have timed out or failed, it arranges for the appropriate Agent to recover the step or execute the appropriate remedial action (this might involve modifying the status of a step). Note that the recovery or remedial actions are implemented by the Scheduler and Agents. The Supervisor should simply request that these actions be performed. + +The Scheduler, Agent, and Supervisor are logical components and their physical implementation depends on the technology being used. For example, several logical agents might be implemented as part of a single web service. + +The Scheduler maintains information about the progress of the task and the state of each step in a durable data store, called the state store. The Supervisor can use this information to help determine whether a step has failed. The figure illustrates the relationship between the Scheduler, the Agents, the Supervisor, and the state store. + +![Figure 1 - The actors in the Scheduler Agent Supervisor pattern](./_images/scheduler-agent-supervisor-pattern.png) + + +> This diagram shows a simplified version of the pattern. In a real implementation, there might be many instances of the Scheduler running concurrently, each a subset of tasks. Similarly, the system could run multiple instances of each Agent, or even multiple Supervisors. In this case, Supervisors must coordinate their work with each other carefully to ensure that they don’t compete to recover the same failed steps and tasks. The [Leader Election pattern](leader-election.md) provides one possible solution to this problem. + +When the application is ready to run a task, it submits a request to the Scheduler. The Scheduler records initial state information about the task and its steps (for example, step not yet started) in the state store and then starts performing the operations defined by the workflow. As the Scheduler starts each step, it updates the information about the state of that step in the state store (for example, step running). + +If a step references a remote service or resource, the Scheduler sends a message to the appropriate Agent. The message contains the information that the Agent needs to pass to the service or access the resource, in addition to the complete-by time for the operation. If the Agent completes its operation successfully, it returns a response to the Scheduler. The Scheduler can then update the state information in the state store (for example, step completed) and perform the next step. This process continues until the entire task is complete. + +An Agent can implement any retry logic that's necessary to perform its work. However, if the Agent doesn't complete its work before the complete-by period expires, the Scheduler will assume that the operation has failed. In this case, the Agent should stop its work and not try to return anything to the Scheduler (not even an error message), or try any form of recovery. The reason for this restriction is that, after a step has timed out or failed, another instance of the Agent might be scheduled to run the failing step (this process is described later). + +If the Agent fails, the Scheduler won't receive a response. The pattern doesn't make a distinction between a step that has timed out and one that has genuinely failed. + +If a step times out or fails, the state store will contain a record that indicates that the step is running, but the complete-by time will have passed. The Supervisor looks for steps like this and tries to recover them. One possible strategy is for the Supervisor to update the complete-by value to extend the time available to complete the step, and then send a message to the Scheduler identifying the step that has timed out. The Scheduler can then try to repeat this step. However, this design requires the tasks to be idempotent. + +The Supervisor might need to prevent the same step from being retried if it continually fails or times out. To do this, the Supervisor could maintain a retry count for each step, along with the state information, in the state store. If this count exceeds a predefined threshold the Supervisor can adopt a strategy of waiting for an extended period before notifying the Scheduler that it should retry the step, in the expectation that the fault will be resolved during this period. Alternatively, the Supervisor can send a message to the Scheduler to request the entire task be undone by implementing a [Compensating Transaction pattern])compensating-transaction.md). This approach will depend on the Scheduler and Agents providing the information necessary to implement the compensating operations for each step that completed successfully. + +> It isn't the purpose of the Supervisor to monitor the Scheduler and Agents, and restart them if they fail. This aspect of the system should be handled by the infrastructure these components are running in. Similarly, the Supervisor shouldn't have knowledge of the actual business operations that the tasks being performed by the Scheduler are running (including how to compensate should these tasks fail). This is the purpose of the workflow logic implemented by the Scheduler. The sole responsibility of the Supervisor is to determine whether a step has failed and arrange either for it to be repeated or for the entire task containing the failed step to be undone. + +If the Scheduler is restarted after a failure, or the workflow being performed by the Scheduler terminates unexpectedly, the Scheduler should be able to determine the status of any inflight task that it was handling when it failed, and be prepared to resume this task from that point. The implementation details of this process are likely to be system specific. If the task can't be recovered, it might be necessary to undo the work already performed by the task. This might also require implementing a [compensating transaction](compensating-transaction.md). + +The key advantage of this pattern is that the system is resilient in the event of unexpected temporary or unrecoverable failures. The system can be constructed to be self healing. For example, if an Agent or the Scheduler fails, a new one can be started and the Supervisor can arrange for a task to be resumed. If the Supervisor fails, another instance can be started and can take over from where the failure occurred. If the Supervisor is scheduled to run periodically, a new instance can be automatically started after a predefined interval. The state store can be replicated to reach an even greater degree of resiliency. + +## Issues and considerations + +You should consider the following points when deciding how to implement this pattern: + +- This pattern can be difficult to implement and requires thorough testing of each possible failure mode of the system. + +- The recovery/retry logic implemented by the Scheduler is complex and dependent on state information held in the state store. It might also be necessary to record the information required to implement a compensating transaction in a durable data store. + +- How often the Supervisor runs will be important. It should run often enough to prevent any failed steps from blocking an application for an extended period, but it shouldn't run so often that it becomes an overhead. + +- The steps performed by an Agent could be run more than once. The logic that implements these steps should be idempotent. + +## When to use this pattern + +Use this pattern when a process that runs in a distributed environment, such as the cloud, must be resilient to communications failure and/or operational failure. + +This pattern might not be suitable for tasks that don't invoke remote services or access remote resources. + +## Example + +A web application that implements an ecommerce system has been deployed on Microsoft Azure. Users can run this application to browse the available products and to place orders. The user interface runs as a web role, and the order processing elements of the application are implemented as a set of worker roles. Part of the order processing logic involves accessing a remote service, and this aspect of the system could be prone to transient or more long-lasting faults. For this reason, the designers used the Scheduler Agent Supervisor pattern to implement the order processing elements of the system. + +When a customer places an order, the application constructs a message that describes the order and posts this message to a queue. A separate submission process, running in a worker role, retrieves the message, inserts the order details into the orders database, and creates a record for the order process in the state store. Note that the inserts into the orders database and the state store are performed as part of the same operation. The submission process is designed to ensure that both inserts complete together. + +The state information that the submission process creates for the order includes: + +- **OrderID**. The ID of the order in the orders database. + +- **LockedBy**. The instance ID of the worker role handling the order. There might be multiple current instances of the worker role running the Scheduler, but each order should only be handled by a single instance. + +- **CompleteBy**. The time the order should be processed by. + +- **ProcessState**. The current state of the task handling the order. The possible states are: + + - **Pending**. The order has been created but processing hasn't yet been started. + - **Processing**. The order is currently being processed. + - **Processed**. The order has been processed successfully. + - **Error**. The order processing has failed. + +- **FailureCount**. The number of times that processing has been tried for the order. + +In this state information, the `OrderID` field is copied from the order ID of the new order. The `LockedBy` and `CompleteBy` fields are set to `null`, the `ProcessState` field is set to `Pending`, and the `FailureCount` field is set to 0. + +> In this example, the order handling logic is relatively simple and only has a single step that invokes a remote service. In a more complex multistep scenario, the submission process would likely involve several steps, and so several records would be created in the state store—each one describing the state of an individual step. + +The Scheduler also runs as part of a worker role and implements the business logic that handles the order. An instance of the Scheduler polling for new orders examines the state store for records where the `LockedBy` field is null and the `ProcessState` field is pending. When the Scheduler finds a new order, it immediately populates the `LockedBy` field with its own instance ID, sets the `CompleteBy` field to an appropriate time, and sets the `ProcessState` field to processing. The code is designed to be exclusive and atomic to ensure that two concurrent instances of the Scheduler can't try to handle the same order simultaneously. + +The Scheduler then runs the business workflow to process the order asynchronously, passing it the value in the `OrderID` field from the state store. The workflow handling the order retrieves the details of the order from the orders database and performs its work. When a step in the order processing workflow needs to invoke the remote service, it uses an Agent. The workflow step communicates with the Agent using a pair of Azure Service Bus message queues acting as a request/response channel. The figure shows a high level view of the solution. + +![Figure 2 - Using the Scheduler Agent Supervisor pattern to handle orders in a Azure solution](./_images/scheduler-agent-supervisor-solution.png) + +The message sent to the Agent from a workflow step describes the order and includes the complete-by time. If the Agent receives a response from the remote service before the complete-by time expires, it posts a reply message on the Service Bus queue on which the workflow is listening. When the workflow step receives the valid reply message, it completes its processing and the Scheduler sets the `ProcessState field of the order state to processed. At this point, the order processing has completed successfully. + +If the complete-by time expires before the Agent receives a response from the remote service, the Agent simply halts its processing and terminates handling the order. Similarly, if the workflow handling the order exceeds the complete-by time, it also terminates. In both cases, the state of the order in the state store remains set to processing, but the complete-by time indicates that the time for processing the order has passed and the process is deemed to have failed. Note that if the Agent that's accessing the remote service, or the workflow that's handling the order (or both) terminate unexpectedly, the information in the state store will again remain set to processing and eventually will have an expired complete-by value. + +If the Agent detects an unrecoverable, nontransient fault while it's trying to contact the remote service, it can send an error response back to the workflow. The Scheduler can set the status of the order to error and raise an event that alerts an operator. The operator can then try to resolve the reason for the failure manually and resubmit the failed processing step. + +The Supervisor periodically examines the state store looking for orders with an expired complete-by value. If the Supervisor finds a record, it increments the `FailureCount` field. If the failure count value is below a specified threshold value, the Supervisor resets the `LockedBy` field to null, updates the `CompleteBy` field with a new expiration time, and sets the `ProcessState` field to pending. An instance of the Scheduler can pick up this order and perform its processing as before. If the failure count value exceeds a specified threshold, the reason for the failure is assumed to be nontransient. The Supervisor sets the status of the order to error and raises an event that alerts an operator. + +> In this example, the Supervisor is implemented in a separate worker role. You can use a variety of strategies to arrange for the Supervisor task to be run, including using the Azure Scheduler service (not to be confused with the Scheduler component in this pattern). For more information about the Azure Scheduler service, visit the [Scheduler](https://azure.microsoft.com/services/scheduler/) page. + +Although it isn't shown in this example, the Scheduler might need to keep the application that submitted the order informed about the progress and status of the order. The application and the Scheduler are isolated from each other to eliminate any dependencies between them. The application has no knowledge of which instance of the Scheduler is handling the order, and the Scheduler is unaware of which specific application instance posted the order. + +To allow the order status to be reported, the application could use its own private response queue. The details of this response queue would be included as part of the request sent to the submission process, which would include this information in the state store. The Scheduler would then post messages to this queue indicating the status of the order (request received, order completed, order failed, and so on). It should include the order ID in these messages so they can be correlated with the original request by the application. + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: +- [Retry pattern](transient-faults.md). An Agent can use this pattern to transparently retry an operation that accesses a remote service or resource that has previously failed. Use when the expectation is that the cause of the failure is transient and can be corrected. +- [Circuit Breaker pattern](circuit-breaker.md). An Agent can use this pattern to handle faults that take a variable amount of time to correct when connecting to a remote service or resource. +- [Compensating Transaction pattern](compensating-transaction.md). If the workflow being performed by a Scheduler can't be completed successfully, it might be necessary to undo any work it's previously performed. The Compensating Transaction pattern describes how this can be achieved for operations that follow the eventual consistency model. These types of operations are commonly implemented by a Scheduler that performs complex business processes and workflows. +- [Asynchronous Messaging Primer](https://msdn.microsoft.com/library/dn589781.aspx). The components in the Scheduler Agent Supervisor pattern typically run decoupled from each other and communicate asynchronously. Describes some of the approaches that can be used to implement asynchronous communication based on message queues. +- [Leader Election pattern](leader-election.md). It might be necessary to coordinate the actions of multiple instances of a Supervisor to prevent them from attempting to recover the same failed process. The Leader Election pattern describes how to do this. +- [Cloud Architecture: The Scheduler-Agent-Supervisor Pattern](https://blogs.msdn.microsoft.com/clemensv/2010/09/27/cloud-architecture-the-scheduler-agent-supervisor-pattern/) on Clemens Vasters' blog +- [Process Manager pattern](http://www.enterpriseintegrationpatterns.com/patterns/messaging/ProcessManager.html) +- [Reference 6: A Saga on Sagas](https://msdn.microsoft.com/library/jj591569.aspx). An example showing how the CQRS pattern uses a process manager (part of the CQRS Journey guidance). +- [Microsoft Azure Scheduler](https://azure.microsoft.com/services/scheduler/) diff --git a/docs/patterns/sharding.md b/docs/patterns/sharding.md new file mode 100644 index 00000000000..9cca378ee5a --- /dev/null +++ b/docs/patterns/sharding.md @@ -0,0 +1,226 @@ +--- +title: Sharding +description: Divide a data store into a set of horizontal partitions or shards. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [data-management, performance-scalability] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Sharding + +[!INCLUDE [header](../_includes/header.md)] + +Divide a data store into a set of horizontal partitions or shards. This can improve scalability when storing and accessing large volumes of data. + +## Context and problem + +A data store hosted by a single server might be subject to the following limitations: + +- **Storage space**. A data store for a large-scale cloud application is expected to contain a huge volume of data that could increase significantly over time. A server typically provides only a finite amount of disk storage, but you can replace existing disks with larger ones, or add further disks to a machine as data volumes grow. However, the system will eventually reach a limit where it isn't possible to easily increase the storage capacity on a given server. + +- **Computing resources**. A cloud application is required to support a large number of concurrent users, each of which run queries that retrieve information from the data store. A single server hosting the data store might not be able to provide the necessary computing power to support this load, resulting in extended response times for users and frequent failures as applications attempting to store and retrieve data time out. It might be possible to add memory or upgrade processors, but the system will reach a limit when it isn't possible to increase the compute resources any further. + +- **Network bandwidth**. Ultimately, the performance of a data store running on a single server is governed by the rate the server can receive requests and send replies. It's possible that the volume of network traffic might exceed the capacity of the network used to connect to the server, resulting in failed requests. + +- **Geography**. It might be necessary to store data generated by specific users in the same region as those users for legal, compliance, or performance reasons, or to reduce latency of data access. If the users are dispersed across different countries or regions, it might not be possible to store the entire data for the application in a single data store. + +Scaling vertically by adding more disk capacity, processing power, memory, and network connections can postpone the effects of some of these limitations, but it's likely to only be a temporary solution. A commercial cloud application capable of supporting large numbers of users and high volumes of data must be able to scale almost indefinitely, so vertical scaling isn't necessarily the best solution. + +## Solution + +Divide the data store into horizontal partitions or shards. Each shard has the same schema, but holds its own distinct subset of the data. A shard is a data store in its own right (it can contain the data for many entities of different types), running on a server acting as a storage node. + +This pattern has the following benefits: + +- You can scale the system out by adding further shards running on additional storage nodes. + +- A system can use off-the-shelf hardware rather than specialized and expensive computers for each storage node. + +- You can reduce contention and improve performance by balancing the workload across shards. + +- In the cloud, shards can be located physically close to the users that'll access the data. + +When dividing a data store up into shards, decide which data should be placed in each shard. A shard typically contains items that fall within a specified range determined by one or more attributes of the data. These attributes form the shard key (sometimes referred to as the partition key). The shard key should be static. It shouldn't be based on data that might change. + +Sharding physically organizes the data. When an application stores and retrieves data, the sharding logic directs the application to the appropriate shard. This sharding logic can be implemented as part of the data access code in the application, or it could be implemented by the data storage system if it transparently supports sharding. + +Abstracting the physical location of the data in the sharding logic provides a high level of control over which shards contain which data. It also enables data to migrate between shards without reworking the business logic of an application if the data in the shards need to be redistributed later (for example, if the shards become unbalanced). The tradeoff is the additional data access overhead required in determining the location of each data item as it's retrieved. + +To ensure optimal performance and scalability, it's important to split the data in a way that's appropriate for the types of queries that the application performs. In many cases, it's unlikely that the sharding scheme will exactly match the requirements of every query. For example, in a multi-tenant system an application might need to retrieve tenant data using the tenant ID, but it might also need to look up this data based on some other attribute such as the tenant’s name or location. To handle these situations, implement a sharding strategy with a shard key that supports the most commonly performed queries. + +If queries regularly retrieve data using a combination of attribute values, you can likely define a composite shard key by linking attributes together. Alternatively, use a pattern such as [Index Table](index-table.md) to provide fast lookup to data based on attributes that aren't covered by the shard key. + +## Sharding strategies + +Three strategies are commonly used when selecting the shard key and deciding how to distribute data across shards. Note that there doesn't have to be a one-to-one correspondence between shards and the servers that host them—a single server can host multiple shards. The strategies are: + +**The Lookup strategy**. In this strategy the sharding logic implements a map that routes a request for data to the shard that contains that data using the shard key. In a multi-tenant application all the data for a tenant might be stored together in a shard using the tenant ID as the shard key. Multiple tenants might share the same shard, but the data for a single tenant won't be spread across multiple shards. The figure illustrates sharding tenant data based on tenant IDs. + + ![Figure 1 - Sharding tenant data based on tenant IDs](./_images/sharding-tenant.png) + + + The mapping between the shard key and the physical storage can be based on physical shards where each shard key maps to a physical partition. Alternatively, a more flexible technique for rebalancing shards is virtual partitioning, where shard keys map to the same number of virtual shards, which in turn map to fewer physical partitions. In this approach, an application locates data using a shard key that refers to a virtual shard, and the system transparently maps virtual shards to physical partitions. The mapping between a virtual shard and a physical partition can change without requiring the application code be modified to use a different set of shard keys. + +**The Range strategy**. This strategy groups related items together in the same shard, and orders them by shard key—the shard keys are sequential. It's useful for applications that frequently retrieve sets of items using range queries (queries that return a set of data items for a shard key that falls within a given range). For example, if an application regularly needs to find all orders placed in a given month, this data can be retrieved more quickly if all orders for a month are stored in date and time order in the same shard. If each order was stored in a different shard, they'd have to be fetched individually by performing a large number of point queries (queries that return a single data item). The next figure illustrates storing sequential sets (ranges) of data in shard. + + ![Figure 2 - Storing sequential sets (ranges) of data in shards](./_images/sharding-sequential-sets.png) + +In this example, the shard key is a composite key containing the order month as the most significant element, followed by the order day and the time. The data for orders is naturally sorted when new orders are created and added to a shard. Some data stores support two-part shard keys containing a partition key element that identifies the shard and a row key that uniquely identifies an item in the shard. Data is usually held in row key order in the shard. Items that are subject to range queries and need to be grouped together can use a shard key that has the same value for the partition key but a unique value for the row key. + +**The Hash strategy**. The purpose of this strategy is to reduce the chance of hotspots (shards that receive a disproportionate amount of load). It distributes the data across the shards in a way that achieves a balance between the size of each shard and the average load that each shard will encounter. The sharding logic computes the shard to store an item in based on a hash of one or more attributes of the data. The chosen hashing function should distribute data evenly across the shards, possibly by introducing some random element into the computation. The next figure illustrates sharding tenant data based on a hash of tenant IDs. + + ![Figure 3 - Sharding tenant data based on a hash of tenant IDs](./_images/sharding-data-hash.png) + +To understand the advantage of the Hash strategy over other sharding strategies, consider how a multi-tenant application that enrolls new tenants sequentially might assign the tenants to shards in the data store. When using the Range strategy, the data for tenants 1 to n will all be stored in shard A, the data for tenants n+1 to m will all be stored in shard B, and so on. If the most recently registered tenants are also the most active, most data activity will occur in a small number of shards, which could cause hotspots. In contrast, the Hash strategy allocates tenants to shards based on a hash of their tenant ID. This means that sequential tenants are most likely to be allocated to different shards, which will distribute the load across them. The previous figure shows this for tenants 55 and 56. + +The three sharding strategies have the following advantages and considerations: + +- **Lookup**. This offers more control over the way that shards are configured and used. Using virtual shards reduces the impact when rebalancing data because new physical partitions can be added to even out the workload. The mapping between a virtual shard and the physical partitions that implement the shard can be modified without affecting application code that uses a shard key to store and retrieve data. Looking up shard locations can impose an additional overhead. + +- **Range**. This is easy to implement and works well with range queries because they can often fetch multiple data items from a single shard in a single operation. This strategy offers easier data management. For example, if users in the same region are in the same shard, updates can be scheduled in each time zone based on the local load and demand pattern. However, this strategy doesn't provide optimal balancing between shards. Rebalancing shards is difficult and might not resolve the problem of uneven load if the majority of activity is for adjacent shard keys. + +- **Hash**. This strategy offers a better chance of more even data and load distribution. Request routing can be accomplished directly by using the hash function. There's no need to maintain a map. Note that computing the hash might impose an additional overhead. Also, rebalancing shards is difficult. + +Most common sharding systems implement one of the approaches described above, but you should also consider the business requirements of your applications and their patterns of data usage. For example, in a multi-tenant application: + +- You can shard data based on workload. You could segregate the data for highly volatile tenants in separate shards. The speed of data access for other tenants might be improved as a result. + +- You can shard data based on the location of tenants. You can take the data for tenants in a specific geographic region offline for backup and maintenance during off-peak hours in that region, while the data for tenants in other regions remains online and accessible during their business hours. + +- High-value tenants could be assigned their own private, high performing, lightly loaded shards, whereas lower-value tenants might be expected to share more densely-packed, busy shards. + +- The data for tenants that need a high degree of data isolation and privacy can be stored on a completely separate server. + +## Scaling and data movement operations + +Each of the sharding strategies implies different capabilities and levels of complexity for managing scale in, scale out, data movement, and maintaining state. + +The Lookup strategy permits scaling and data movement operations to be carried out at the user level, either online or offline. The technique is to suspend some or all user activity (perhaps during off-peak periods), move the data to the new virtual partition or physical shard, change the mappings, invalidate or refresh any caches that hold this data, and then allow user activity to resume. Often this type of operation can be centrally managed. The Lookup strategy requires state to be highly cacheable and replica friendly. + +The Range strategy imposes some limitations on scaling and data movement operations, which must typically be carried out when a part or all of the data store is offline because the data must be split and merged across the shards. Moving the data to rebalance shards might not resolve the problem of uneven load if the majority of activity is for adjacent shard keys or data identifiers that are within the same range. The Range strategy might also require some state to be maintained in order to map ranges to the physical partitions. + +The Hash strategy makes scaling and data movement operations more complex because the partition keys are hashes of the shard keys or data identifiers. The new location of each shard must be determined from the hash function, or the function modified to provide the correct mappings. However, the Hash strategy doesn't require maintenance of state. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +- Sharding is complementary to other forms of partitioning, such as vertical partitioning and functional partitioning. For example, a single shard can contain entities that have been partitioned vertically, and a functional partition can be implemented as multiple shards. For more information about partitioning, see the [Data Partitioning Guidance](https://msdn.microsoft.com/library/dn589795.aspx). + +- Keep shards balanced so they all handle a similar volume of I/O. As data is inserted and deleted, it's necessary to periodically rebalance the shards to guarantee an even distribution and to reduce the chance of hotspots. Rebalancing can be an expensive operation. To reduce the necessity of rebalancing, plan for growth by ensuring that each shard contains sufficient free space to handle the expected volume of changes. You should also develop strategies and scripts you can use to quickly rebalance shards if this becomes necessary. + +- Use stable data for the shard key. If the shard key changes, the corresponding data item might have to move between shards, increasing the amount of work performed by update operations. For this reason, avoid basing the shard key on potentially volatile information. Instead, look for attributes that are invariant or that naturally form a key. + +- Ensure that shard keys are unique. For example, avoid using autoincrementing fields as the shard key. Is some systems, autoincremented fields can't be coordinated across shards, possibly resulting in items in different shards having the same shard key. + + > Autoincremented values in other fields that are not shard keys can also cause problems. For example, if you use autoincremented fields to generate unique IDs, then two different items located in different shards might be assigned the same ID. + +- It might not be possible to design a shard key that matches the requirements of every possible query against the data. Shard the data to support the most frequently performed queries, and if necessary create secondary index tables to support queries that retrieve data using criteria based on attributes that aren't part of the shard key. For more information, see the [Index Table pattern](index-table.md). + +- Queries that access only a single shard are more efficient than those that retrieve data from multiple shards, so avoid implementing a sharding system that results in applications performing large numbers of queries that join data held in different shards. Remember that a single shard can contain the data for multiple types of entities. Consider denormalizing your data to keep related entities that are commonly queried together (such as the details of customers and the orders that they have placed) in the same shard to reduce the number of separate reads that an application performs. + + > If an entity in one shard references an entity stored in another shard, include the shard key for the second entity as part of the schema for the first entity. This can help to improve the performance of queries that reference related data across shards. + +- If an application must perform queries that retrieve data from multiple shards, it might be possible to fetch this data by using parallel tasks. Examples include fan-out queries, where data from multiple shards is retrieved in parallel and then aggregated into a single result. However, this approach inevitably adds some complexity to the data access logic of a solution. + +- For many applications, creating a larger number of small shards can be more efficient than having a small number of large shards because they can offer increased opportunities for load balancing. This can also be useful if you anticipate the need to migrate shards from one physical location to another. Moving a small shard is quicker than moving a large one. + +- Make sure the resources available to each shard storage node are sufficient to handle the scalability requirements in terms of data size and throughput. For more information, see the section “Designing Partitions for Scalability” in the [Data Partitioning Guidance](https://msdn.microsoft.com/library/dn589795.aspx). + +- Consider replicating reference data to all shards. If an operation that retrieves data from a shard also references static or slow-moving data as part of the same query, add this data to the shard. The application can then fetch all of the data for the query easily, without having to make an additional round trip to a separate data store. + + > If reference data held in multiple shards changes, the system must synchronize these changes across all shards. The system can experience a degree of inconsistency while this synchronization occurs. If you do this, you should design your applications to be able to handle it. + +- It can be difficult to maintain referential integrity and consistency between shards, so you should minimize operations that affect data in multiple shards. If an application must modify data across shards, evaluate whether complete data consistency is actually required. Instead, a common approach in the cloud is to implement eventual consistency. The data in each partition is updated separately, and the application logic must take responsibility for ensuring that the updates all complete successfully, as well as handling the inconsistencies that can arise from querying data while an eventually consistent operation is running. For more information about implementing eventual consistency, see the [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). + +- Configuring and managing a large number of shards can be a challenge. Tasks such as monitoring, backing up, checking for consistency, and logging or auditing must be accomplished on multiple shards and servers, possibly held in multiple locations. These tasks are likely to be implemented using scripts or other automation solutions, but that might not completely eliminate the additional administrative requirements. + +- Shards can be geolocated so that the data that they contain is close to the instances of an application that use it. This approach can considerably improve performance, but requires additional consideration for tasks that must access multiple shards in different locations. + +## When to use this pattern + +Use this pattern when a data store is likely to need to scale beyond the resources available to a single storage node, or to improve performance by reducing contention in a data store. + +> The primary focus of sharding is to improve the performance and scalability of a system, but as a by-product it can also improve availability due to how the data is divided into separate partitions. A failure in one partition doesn't necessarily prevent an application from accessing data held in other partitions, and an operator can perform maintenance or recovery of one or more partitions without making the entire data for an application inaccessible. For more information, see the [Data Partitioning Guidance](https://msdn.microsoft.com/library/dn589795.aspx). + +## Example + +The following example in C# uses a set of SQL Server databases acting as shards. Each database holds a subset of the data used by an application. The application retrieves data that's distributed across the shards using its own sharding logic (this is an example of a fan-out query). The details of the data that's located in each shard is returned by a method called `GetShards`. This method returns an enumerable list of `ShardInformation` objects, where the `ShardInformation` type contains an identifier for each shard and the SQL Server connection string that an application should use to connect to the shard (the connection strings aren't shown in the code example). + +```csharp +private IEnumerable GetShards() +{ + // This retrieves the connection information from a shard store + // (commonly a root database). + return new[] + { + new ShardInformation + { + Id = 1, + ConnectionString = ... + }, + new ShardInformation + { + Id = 2, + ConnectionString = ... + } + }; +} +``` + +The code below shows how the application uses the list of `ShardInformation` objects to perform a query that fetches data from each shard in parallel. The details of the query aren't shown, but in this example the data that's retrieved contains a string that could hold information such as the name of a customer if the shards contain the details of customers. The results are aggregated into a `ConcurrentBag` collection for processing by the application. + +```csharp +// Retrieve the shards as a ShardInformation[] instance. +var shards = GetShards(); + +var results = new ConcurrentBag(); + +// Execute the query against each shard in the shard list. +// This list would typically be retrieved from configuration +// or from a root/master shard store. +Parallel.ForEach(shards, shard => +{ + // NOTE: Transient fault handling isn't included, + // but should be incorporated when used in a real world application. + using (var con = new SqlConnection(shard.ConnectionString)) + { + con.Open(); + var cmd = new SqlCommand("SELECT ... FROM ...", con); + + Trace.TraceInformation("Executing command against shard: {0}", shard.Id); + + var reader = cmd.ExecuteReader(); + // Read the results in to a thread-safe data structure. + while (reader.Read()) + { + results.Add(reader.GetString(0)); + } + } +}); + +Trace.TraceInformation("Fanout query complete - Record Count: {0}", + results.Count); +``` + +## Related patterns and guidance + +The following patterns and guidance might also be relevant when implementing this pattern: +- [Data Consistency Primer](https://msdn.microsoft.com/library/dn589800.aspx). It might be necessary to maintain consistency for data distributed across different shards. Summarizes the issues surrounding maintaining consistency over distributed data, and describes the benefits and tradeoffs of different consistency models. +- [Data Partitioning Guidance](https://msdn.microsoft.com/library/dn589795.aspx). Sharding a data store can introduce a range of additional issues. Describes these issues in relation to partitioning data stores in the cloud to improve scalability, reduce contention, and optimize performance. +- [Index Table pattern](index-table.md). Sometimes it isn't possible to completely support queries just through the design of the shard key. Enables an application to quickly retrieve data from a large data store by specifying a key other than the shard key. +- [Materialized View pattern](materialized-view.md). To maintain the performance of some query operations, it's useful to create materialized views that aggregate and summarize data, especially if this summary data is based on information that's distributed across shards. Describes how to generate and populate these views. +- [Shard Lessons](http://www.addsimplicity.com/adding_simplicity_an_engi/2008/08/shard-lessons.html) on the Adding Simplicity blog. +- [Database Sharding](http://dbshards.com/database-sharding/) on the CodeFutures web site. +- [Scalability Strategies Primer: Database Sharding](http://blog.maxindelicato.com/2008/12/scalability-strategies-primer-database-sharding.html) on Max Indelicato's blog. +- [Building Scalable Databases: Pros and Cons of Various Database Sharding Schemes](http://www.25hoursaday.com/weblog/2009/01/16/BuildingScalableDatabasesProsAndConsOfVariousDatabaseShardingSchemes.aspx) on Dare Obasanjo's blog. diff --git a/docs/patterns/static-content-hosting.md b/docs/patterns/static-content-hosting.md new file mode 100644 index 00000000000..8200ba1fe36 --- /dev/null +++ b/docs/patterns/static-content-hosting.md @@ -0,0 +1,173 @@ +--- +title: Static Content Hosting +description: Deploy static content to a cloud-based storage service that can deliver them directly to the client. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [data-management, design-implementation, performance-scalability] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Static Content Hosting + +[!INCLUDE [header](../_includes/header.md)] + +Deploy static content to a cloud-based storage service that can deliver them directly to the client. This can reduce the need for potentially expensive compute instances. + +## Context and problem + +Web applications typically include some elements of static content. This static content might include HTML pages and other resources such as images and documents that are available to the client, either as part of an HTML page (such as inline images, style sheets, and client-side JavaScript files) or as separate downloads (such as PDF documents). + +Although web servers are well tuned to optimize requests through efficient dynamic page code execution and output caching, they still have to handle requests to download static content. This consumes processing cycles that could often be put to better use. + +## Solution + +In most cloud hosting environments it's possible to minimize the need for compute instances (for example, use a smaller instance or fewer instances), by locating some of an application’s resources and static pages in a storage service. The cost for cloud-hosted storage is typically much less than for compute instances. + +When hosting some parts of an application in a storage service, the main considerations are related to deployment of the application and to securing resources that aren't intended to be available to anonymous users. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +- The hosted storage service must expose an HTTP endpoint that users can access to download the static resources. Some storage services also support HTTPS, so it's possible to host resources in storage services that require SSL. + +- For maximum performance and availability, consider using a content delivery network (CDN) to cache the contents of the storage container in multiple datacenters around the world. However, you'll likely have to pay for using the CDN. + +- Storage accounts are often geo-replicated by default to provide resiliency against events that might affect a datacenter. This means that the IP address might change, but the URL will remain the same. + +- When some content is located in a storage account and other content is in a hosted compute instance it becomes more challenging to deploy an application and to update it. You might have to perform separate deployments, and version the application and content to manage it more easily—especially when the static content includes script files or UI components. However, if only static resources have to be updated, they can simply be uploaded to the storage account without needing to redeploy the application package. + +- Storage services might not support the use of custom domain names. In this case it's necessary to specify the full URL of the resources in links because they'll be in a different domain from the dynamically-generated content containing the links. + +- The storage containers must be configured for public read access, but it's vital to ensure that they aren't configured for public write access to prevent users being able to upload content. Consider using a valet key or token to control access to resources that shouldn't be available anonymously—see the [Valet Key pattern](valet-key.md) for more information. + +## When to use this pattern + +This pattern is useful for: + +- Minimizing the hosting cost for websites and applications that contain some static resources. + +- Minimizing the hosting cost for websites that consist of only static content and resources. Depending on the capabilities of the hosting provider’s storage system, it might be possible to entirely host a fully static website in a storage account. + +- Exposing static resources and content for applications running in other hosting environments or on-premises servers. + +- Locating content in more than one geographical area using a content delivery network that caches the contents of the storage account in multiple datacenters around the world. + +- Monitoring costs and bandwidth usage. Using a separate storage account for some or all of the static content allows the costs to be more easily separated from hosting and runtime costs. + +This pattern might not be useful in the following situations: + +- The application needs to perform some processing on the static content before delivering it to the client. For example, it might be necessary to add a timestamp to a document. + +- The volume of static content is very small. The overhead of retrieving this content from separate storage can outweigh the cost benefit of separating it out from the compute resource. + +## Example + +Static content located in Azure Blob storage can be accessed directly by a web browser. Azure provides an HTTP-based interface over storage that can be publicly exposed to clients. For example, content in an Azure Blob storage container is exposed using a URL with the following form: + +`http://[ storage-account-name ].blob.core.windows.net/[ container-name ]/[ file-name ]` + + +When uploading the content it's necessary to create one or more blob containers to hold the files and documents. Note that the default permission for a new container is Private, and you must change this to Public to allow clients to access the contents. If it's necessary to protect the content from anonymous access, you can implement the [Valet Key pattern](valet-key.md) so users must present a valid token to download the resources. + +> [Blob Service Concepts](https://msdn.microsoft.com/library/azure/dd179376.aspx) has information about blob storage, and the ways that you can access and use it. + +The links in each page will specify the URL of the resource and the client will access it directly from the storage service. The figure illustrates delivering static parts of an application directly from a storage service. + +![Figure 1 - Delivering static parts of an application directly from a storage service](./_images/static-content-hosting-pattern.png) + + +The links in the pages delivered to the client must specify the full URL of the blob container and resource. For example, a page that contains a link to an image in a public container might contain the following HTML. + +```html +My image +``` + +> If the resources are protected by using a valet key, such as an Azure shared access signature, this signature must be included in the URLs in the links. + +A solution named StaticContentHosting that demonstrates using external storage for static resources is available from [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/static-content-hosting). The StaticContentHosting.Cloud project contains configuration files that specify the storage account and container that holds the static content. + +```xml + + +``` + +The `Settings` class in the file Settings.cs of the StaticContentHosting.Web project contains methods to extract these values and build a string value containing the cloud storage account container URL. + +```csharp +public class Settings +{ + public static string StaticContentStorageConnectionString { + get + { + return RoleEnvironment.GetConfigurationSettingValue( + "StaticContent.StorageConnectionString"); + } + } + + public static string StaticContentContainer + { + get + { + return RoleEnvironment.GetConfigurationSettingValue("StaticContent.Container"); + } + } + + public static string StaticContentBaseUrl + { + get + { + var account = CloudStorageAccount.Parse(StaticContentStorageConnectionString); + + return string.Format("{0}/{1}", account.BlobEndpoint.ToString().TrimEnd('/'), + StaticContentContainer.TrimStart('/')); + } + } +} +``` + +The `StaticContentUrlHtmlHelper` class in the file StaticContentUrlHtmlHelper.cs exposes a method named `StaticContentUrl` that generates a URL containing the path to the cloud storage account if the URL passed to it starts with the ASP.NET root path character (~). + +```csharp +public static class StaticContentUrlHtmlHelper +{ + public static string StaticContentUrl(this HtmlHelper helper, string contentPath) + { + if (contentPath.StartsWith("~")) + { + contentPath = contentPath.Substring(1); + } + + contentPath = string.Format("{0}/{1}", Settings.StaticContentBaseUrl.TrimEnd('/'), + contentPath.TrimStart('/')); + + var url = new UrlHelper(helper.ViewContext.RequestContext); + + return url.Content(contentPath); + } +} +``` + +The file Index.cshtml in the Views\Home folder contains an image element that uses the `StaticContentUrl` method to create the URL for its `src` attribute. + +```html +Test Image +``` + +## Related patterns and guidance + +- A sample that demonstrates this pattern is available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/static-content-hosting). +- [Valet Key pattern](valet-key.md). If the target resources aren't supposed to be available to anonymous users it's necessary to implement security over the store that holds the static content. Describes how to use a token or key that provides clients with restricted direct access to a specific resource or service such as a cloud-hosted storage service. +- [An efficient way of deploying a static web site on Azure](http://www.infosysblogs.com/microsoft/2010/06/an_efficient_way_of_deploying.html) on the Infosys blog. +- [Blob Service Concepts](https://msdn.microsoft.com/library/azure/dd179376.aspx) diff --git a/docs/patterns/throttling.md b/docs/patterns/throttling.md new file mode 100644 index 00000000000..9b2b6f318a1 --- /dev/null +++ b/docs/patterns/throttling.md @@ -0,0 +1,107 @@ +--- +title: Throttling +description: Control the consumption of resources used by an instance of an application, an individual tenant, or an entire service. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [availability, performance-scalability] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Throttling + +[!INCLUDE [header](../_includes/header.md)] + +Control the consumption of resources used by an instance of an application, an individual tenant, or an entire service. This can allow the system to continue to function and meet service level agreements, even when an increase in demand places an extreme load on resources. + +## Context and problem + +The load on a cloud application typically varies over time based on the number of active users or the types of activities they are performing. For example, more users are likely to be active during business hours, or the system might be required to perform computationally expensive analytics at the end of each month. There might also be sudden and unanticipated bursts in activity. If the processing requirements of the system exceed the capacity of the resources that are available, it'll suffer from poor performance and can even fail. If the system has to meet an agreed level of service, such failure could be unacceptable. + +There're many strategies available for handling varying load in the cloud, depending on the business goals for the application. One strategy is to use autoscaling to match the provisioned resources to the user needs at any given time. This has the potential to consistently meet user demand, while optimizing running costs. However, while autoscaling can trigger the provisioning of additional resources, this provisioning isn't immediate. If demand grows quickly, there can be a window of time where there's a resource deficit. + +## Solution + +An alternative strategy to autoscaling is to allow applications to use resources only up to a limit, and then throttle them when this limit is reached. The system should monitor how it's using resources so that, when usage exceeds the threshold, it can throttle requests from one or more users. This will enable the system to continue functioning and meet any service level agreements (SLAs) that are in place. For more information on monitoring resource usage, see the [Instrumentation and Telemetry Guidance](https://msdn.microsoft.com/library/dn589775.aspx). + +The system could implement several throttling strategies, including: + +- Rejecting requests from an individual user who's already accessed system APIs more than n times per second over a given period of time. This requires the system to meter the use of resources for each tenant or user running an application. For more information, see the [Service Metering Guidance](https://msdn.microsoft.com/library/dn589796.aspx). + +- Disabling or degrading the functionality of selected nonessential services so that essential services can run unimpeded with sufficient resources. For example, if the application is streaming video output, it could switch to a lower resolution. + +- Using load leveling to smooth the volume of activity (this approach is covered in more detail by the [Queue-based Load Leveling pattern](queue-based-load-leveling.md)). In a multi-tenant environment, this approach will reduce the performance for every tenant. If the system must support a mix of tenants with different SLAs, the work for high-value tenants might be performed immediately. Requests for other tenants can be held back, and handled when the backlog has eased. The [Priority Queue pattern][] could be used to help implement this approach. + +- Deferring operations being performed on behalf of lower priority applications or tenants. These operations can be suspended or limited, with an exception generated to inform the tenant that the system is busy and that the operation should be retried later. + +The figure shows an area graph for resource use (a combination of memory, CPU, bandwidth, and other factors) against time for applications that are making use of three features. A feature is an area of functionality, such as a component that performs a specific set of tasks, a piece of code that performs a complex calculation, or an element that provides a service such as an in-memory cache. These features are labeled A, B, and C. + +![Figure 1 - Graph showing resource use against time for applications running on behalf of three users](./_images/throttling-resource-utilization.png) + + +> The area immediately below the line for a feature indicates the resources that are used by applications when they invoke this feature. For example, the area below the line for Feature A shows the resources used by applications that are making use of Feature A, and the area between the lines for Feature A and Feature B indicates the resources used by applications invoking Feature B. Aggregating the areas for each feature shows the total resource use of the system. + +The previous figure illustrates the effects of deferring operations. Just prior to time T1, the total resources allocated to all applications using these features reach a threshold (the limit of resource use). At this point, the applications are in danger of exhausting the resources available. In this system, Feature B is less critical than Feature A or Feature C, so it's temporarily disabled and the resources that it was using are released. Between times T1 and T2, the applications using Feature A and Feature C continue running as normal. Eventually, the resource use of these two features diminishes to the point when, at time T2, there is sufficient capacity to enable Feature B again. + +The autoscaling and throttling approaches can also be combined to help keep the applications responsive and within SLAs. If the demand is expected to remain high, throttling provides a temporary solution while the system scales out. At this point, the full functionality of the system can be restored. + +The next figure shows an area graph of the overall resource use by all applications running in a system against time, and illustrates how throttling can be combined with autoscaling. + +![Figure 2 - Graph showing the effects of combining throttling with autoscaling](./_images/throttling-autoscaling.png) + + +At time T1, the threshold specifying the soft limit of resource use is reached. At this point, the system can start to scale out. However, if the new resources don't become available quickly enough, then the existing resources might be exhausted and the system could fail. To prevent this from occurring, the system is temporarily throttled, as described earlier. When autoscaling has completed and the additional resources are available, throttling can be relaxed. + +## Issues and considerations + +You should consider the following points when deciding how to implement this pattern: + +- Throttling an application, and the strategy to use, is an architectural decision that impacts the entire design of a system. Throttling should be considered early in the application design process because it isn't easy to add once a system has been implemented. + +- Throttling must be performed quickly. The system must be capable of detecting an increase in activity and react accordingly. The system must also be able to revert to its original state quickly after the load has eased. This requires that the appropriate performance data is continually captured and monitored. + +- If a service needs to temporarily deny a user request, it should return a specific error code so the client application understands that the reason for the refusal to perform an operation is due to throttling. The client application can wait for a period before retrying the request. + +- Throttling can be used as a temporary measure while a system autoscales. In some cases it's better to simply throttle, rather than to scale, if a burst in activity is sudden and isn't expected to be long lived because scaling can add considerably to running costs. + +- If throttling is being used as a temporary measure while a system autoscales, and if resource demands grow very quickly, the system might not be able to continue functioning—even when operating in a throttled mode. If this isn't acceptable, consider maintaining larger capacity reserves and configuring more aggressive autoscaling. + +## When to use this pattern + +Use this pattern: + +- To ensure that a system continues to meet service level agreements. + +- To prevent a single tenant from monopolizing the resources provided by an application. + +- To handle bursts in activity. + +- To help cost-optimize a system by limiting the maximum resource levels needed to keep it functioning. + +## Example + +The final figure illustrates how throttling can be implemented in a multi-tenant system. Users from each of the tenant organizations access a cloud-hosted application where they fill out and submit surveys. The application contains instrumentation that monitors the rate at which these users are submitting requests to the application. + +In order to prevent the users from one tenant affecting the responsiveness and availability of the application for all other users, a limit is applied to the number of requests per second the users from any one tenant can submit. The application blocks requests that exceed this limit. + +![Figure 3 - Implementing throttling in a multi-tenant application](./_images/throttling-multi-tenant.png) + + +## Related patterns and guidance + +The following patterns and guidance may also be relevant when implementing this pattern: +- [Instrumentation and Telemetry Guidance](https://msdn.microsoft.com/library/dn589775.aspx). Throttling depends on gathering information about how heavily a service is being used. Describes how to generate and capture custom monitoring information. +- [Service Metering Guidance](https://msdn.microsoft.com/library/dn589796.aspx). Describes how to meter the use of services in order to gain an understanding of how they are used. This information can be useful in determining how to throttle a service. +- [Autoscaling Guidance](https://msdn.microsoft.com/library/dn589774.aspx). Throttling can be used as an interim measure while a system autoscales, or to remove the need for a system to autoscale. Contains information on autoscaling strategies. +- [Queue-based Load Leveling pattern](queue-based-load-leveling.md). Queue-based load leveling is a commonly used mechanism for implementing throttling. A queue can act as a buffer that helps to even out the rate at which requests sent by an application are delivered to a service. +- [Priority Queue pattern][]. A system can use priority queuing as part of its throttling strategy to maintain performance for critical or higher value applications, while reducing the performance of less important applications. + +[Priority Queue pattern]: priority-queue.md \ No newline at end of file diff --git a/docs/patterns/toc.md b/docs/patterns/toc.md new file mode 100644 index 00000000000..79b18039dd9 --- /dev/null +++ b/docs/patterns/toc.md @@ -0,0 +1,36 @@ +# [Cloud Design Patterns](./index.md) + +## Categories +### [Availability](./category/availability.md) +### [Data management](./category/data-management.md) +### [Design and implementation](./category/design-implementation.md) +### [Messaging](./category/messaging.md) +### [Management and monitoring](./category/management-monitoring.md) +### [Performance and scalability](./category/performance-scalability.md) +### [Resiliency](./category/resiliency.md) +### [Security](./category/security.md) + +## [Cache-Aside](./cache-aside.md) +## [Circuit Breaker](./circuit-breaker.md) +## [Command and Query Responsibility Segregation (CQRS)](./cqrs.md) +## [Compensating Transaction](./compensating-transaction.md) +## [Competing Consumers](./competing-consumers.md) +## [Compute Resource Consolidation](./compute-resource-consolidation.md) +## [Event Sourcing](./event-sourcing.md) +## [External Configuration Store](./external-configuration-store.md) +## [Federated Identity](./federated-identity.md) +## [Gatekeeper](./gatekeeper.md) +## [Health Endpoint Monitoring](./health-endpoint-monitoring.md) +## [Index Table](./index-table.md) +## [Leader Election](./leader-election.md) +## [Materialized View](./materialized-view.md) +## [Pipes and Filters](./pipes-and-filters.md) +## [Priority Queue](./priority-queue.md) +## [Queue-Based Load Leveling](./queue-based-load-leveling.md) +## [Retry](./transient-faults.md) +## [Runtime Reconfiguration](./runtime-reconfiguration.md) +## [Scheduler Agent Supervisor](./scheduler-agent-supervisor.md) +## [Sharding](./sharding.md) +## [Static Content Hosting](./static-content-hosting.md) +## [Throttling](./throttling.md) +## [Valet Key](./valet-key.md) diff --git a/docs/patterns/valet-key.md b/docs/patterns/valet-key.md new file mode 100644 index 00000000000..85c3f734ad0 --- /dev/null +++ b/docs/patterns/valet-key.md @@ -0,0 +1,171 @@ +--- +title: Valet Key +description: Use a token or key that provides clients with restricted direct access to a specific resource or service. +keywords: design pattern +author: dragon119 +manager: bennage + +pnp.series.title: Cloud Design Patterns +pnp.pattern.categories: [data-management, security] + +ms.service: guidance +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.author: mwasson +ms.date: 12/14/2016 +--- + +# Valet Key + +[!INCLUDE [header](../_includes/header.md)] + +Use a token that provides clients with restricted direct access to a specific resource, in order to offload data transfer from the application. This is particularly useful in applications that use cloud-hosted storage systems or queues, and can minimize cost and maximize scalability and performance. + +## Context and problem + +Client programs and web browsers often need to read and write files or data streams to and from an application’s storage. Typically, the application will handle the movement of the data &emdash;either by fetching it from storage and streaming it to the client, or by reading the uploaded stream from the client and storing it in the data store. However, this approach absorbs valuable resources such as compute, memory, and bandwidth. + +Data stores have the ability to handle upload and download of data directly, without requiring that the application perform any processing to move this data. But, this typically requires the client to have access to the security credentials for the store. This can be a useful technique to minimize data transfer costs and the requirement to scale out the application, and to maximize performance. It means, though, that the application is no longer able to manage the security of the data. After the client has a connection to the data store for direct access, the application can't act as the gatekeeper. It's no longer in control of the process and can't prevent subsequent uploads or downloads from the data store. + +This isn't a realistic approach in distributed systems that need to serve untrusted clients. Instead, applications must be able to securely control access to data in a granular way, but still reduce the load on the server by setting up this connection and then allowing the client to communicate directly with the data store to perform the required read or write operations. + +## Solution + +You need to resolve the problem of controlling access to a data store where the store can't manage authentication and authorization of clients. One typical solution is to restrict access to the data store’s public connection and provide the client with a key or token that the data store can validate. + +This key or token is usually referred to as a valet key. It provides time-limited access to specific resources and allows only predefined operations such as reading and writing to storage or queues, or uploading and downloading in a web browser. Applications can create and issue valet keys to client devices and web browsers quickly and easily, allowing clients to perform the required operations without requiring the application to directly handle the data transfer. This removes the processing overhead, and the impact on performance and scalability, from the application and the server. + +The client uses this token to access a specific resource in the data store for only a specific period, and with specific restrictions on access permissions, as shown in the figure. After the specified period, the key becomes invalid and won't allow access to the resource. + +![Figure 1 - Overview of the pattern](./_images/valet-key-pattern.png) + +It's also possible to configure a key that has other dependencies, such as the scope of the data. For example, depending on the data store capabilities, the key can specify a complete table in a data store, or only specific rows in a table. In cloud storage systems the key can specify a container, or just a specific item within a container. + +The key can also be invalidated by the application. This is a useful approach if the client notifies the server that the data transfer operation is complete. The server can then invalidate that key to prevent further. + +Using this pattern can simplify managing access to resources because there's no requirement to create and authenticate a user, grant permissions, and then remove the user again. It also makes it easy to limit the location, the permission, and the validity period—all by simply generating a key at runtime. The important factors are to limit the validity period, and especially the location of the resource, as tightly as possible so that the recipient can only use it for the intended purpose. + +## Issues and considerations + +Consider the following points when deciding how to implement this pattern: + +**Manage the validity status and period of the key**. If leaked or compromised, the key effectively unlocks the target item and makes it available for malicious use during the validity period. A key can usually be revoked or disabled, depending on how it was issued. Server-side policies can be changed or, the server key it was signed with can be invalidated. Specify a short validity period to minimize the risk of allowing unauthorized operations to take place against the data store. However, if the validity period is too short, the client might not be able to complete the operation before the key expires. Allow authorized users to renew the key before the validity period expires if multiple accesses to the protected resource are required. + +**Control the level of access the key will provide**. Typically, the key should allow the user to only perform the actions necessary to complete the operation, such as read-only access if the client shouldn't be able to upload data to the data store. For file uploads, it's common to specify a key that provides write-only permission, as well as the location and the validity period. It's critical to accurately specify the resource or the set of resources to which the key applies. + +**Consider how to control users’ behavior**. Implementing this pattern means some loss of control over the resources users are granted access to. The level of control that can be exerted is limited by the capabilities of the policies and permissions available for the service or the target data store. For example, it's usually not possible to create a key that limits the size of the data to be written to storage, or the number of times the key can be used to access a file. This can result in huge unexpected costs for data transfer, even when used by the intended client, and might be caused by an error in the code that causes repeated upload or download. To limit the number of times a file can be uploaded, where possible, force the client to notify the application when one operation has completed. For example, some data stores raise events the application code can use to monitor operations and control user behavior. However, it's hard to enforce quotas for individual users in a multi-tenant scenario where the same key is used by all the users from one tenant. + +**Validate, and optionally sanitize, all uploaded data**. A malicious user that gains access to the key could upload data designed to compromise the system. Alternatively, authorized users might upload data that's invalid and, when processed, could result in an error or system failure. To protect against this, ensure that all uploaded data is validated and checked for malicious content before use. + +**Audit all operations**. Many key-based mechanisms can log operations such as uploads, downloads, and failures. These logs can usually be incorporated into an audit process, and also used for billing if the user is charged based on file size or data volume. Use the logs to detect authentication failures that might be caused by issues with the key provider, or accidental removal of a stored access policy. + +**Deliver the key securely**. It can be embedded in a URL that the user activates in a web page, or it can be used in a server redirection operation so that the download occurs automatically. Always use HTTPS to deliver the key over a secure channel. + +**Protect sensitive data in transit**. Sensitive data delivered through the application will usually take place using SSL or TLS, and this should be enforced for clients accessing the data store directly. + +Other issues to be aware of when implementing this pattern are: + +- If the client doesn't, or can't, notify the server of completion of the operation, and the only limit is the expiration period of the key, the application won't be able to perform auditing operations such as counting the number of uploads or downloads, or preventing multiple uploads or downloads. + +- The flexibility of key policies that can be generated might be limited. For example, some mechanisms only allow the use of a timed expiration period. Others aren't able to specify a sufficient granularity of read/write permissions. + +- If the start time for the key or token validity period is specified, ensure that it's a little earlier than the current server time to allow for client clocks that might be slightly out of synchronization. The default, if not specified, is usually the current server time. + +- The URL containing the key will be recorded in server log files. While the key will typically have expired before the log files are used for analysis, ensure that you limit access to them. If log data is transmitted to a monitoring system or stored in another location, consider implementing a delay to prevent leakage of keys until after their validity period has expired. + +- If the client code runs in a web browser, the browser might need to support cross-origin resource sharing (CORS) to enable code that executes within the web browser to access data in a different domain from the one that served the page. Some older browsers and some data stores don't support CORS, and code that runs in these browsers might be able to use a valet key to provide access to data in a different domain, such as a cloud storage account. + +## When to use this pattern + +This pattern is useful for the following situations: + +- To minimize resource loading and maximize performance and scalability. Using a valet key doesn't require the resource to be locked, no remote server call is required, there's no limit on the number of valet keys that can be issued, and it avoids a single point of failure resulting from performing the data transfer through the application code. Creating a valet key is typically a simple cryptographic operation of signing a string with a key. + +- To minimize operational cost. Enabling direct access to stores and queues is resource and cost efficient, can result in fewer network round trips, and might allow for a reduction in the number of compute resources required. + +- When clients regularly upload or download data, particularly where there's a large volume or when each operation involves large files. + +- When the application has limited compute resources available, either due to hosting limitations or cost considerations. In this scenario, the pattern is even more helpful if there are many concurrent data uploads or downloads because it relieves the application from handling the data transfer. + +- When the data is stored in a remote data store or a different datacenter. If the application was required to act as a gatekeeper, there might be a charge for the additional bandwidth of transferring the data between datacenters, or across public or private networks between the client and the application, and then between the application and the data store. + +This pattern might not be useful in the following situations: + +- If the application must perform some task on the data before it's stored or before it's sent to the client. For example, if the application needs to perform validation, log access success, or execute a transformation on the data. However, some data stores and clients are able to negotiate and carry out simple transformations such as compression and decompression (for example, a web browser can usually handle GZip formats). + +- If the design of an existing application makes it difficult to incorporate the pattern. Using this pattern typically requires a different architectural approach for delivering and receiving data. + +- If it's necessary to maintain audit trails or control the number of times a data transfer operation is executed, and the valet key mechanism in use doesn't support notifications that the server can use to manage these operations. + +- If it's necessary to limit the size of the data, especially during upload operations. The only solution to this is for the application to check the data size after the operation is complete, or check the size of uploads after a specified period or on a scheduled basis. + +## Example + +Azure supports shared access signatures on Azure Storage for granular access control to data in blobs, tables, and queues, and for Service Bus queues and topics. A shared access signature token can be configured to provide specific access rights such as read, write, update, and delete to a specific table; a key range within a table; a queue; a blob; or a blob container. The validity can be a specified time period or with no time limit. + +Azure shared access signatures also support server-stored access policies that can be associated with a specific resource such as a table or blob. This feature provides additional control and flexibility compared to application-generated shared access signature tokens, and should be used whenever possible. Settings defined in a server-stored policy can be changed and are reflected in the token without requiring a new token to be issued, but settings defined in the token can't be changed without issuing a new token. This approach also makes it possible to revoke a valid shared access signature token before it's expired. + +> For more information see [Introducing Table SAS (Shared Access Signature), Queue SAS and update to Blob SAS](https://blogs.msdn.microsoft.com/windowsazurestorage/2012/06/12/introducing-table-sas-shared-access-signature-queue-sas-and-update-to-blob-sas/) and [Using Shared Access Signatures](https://azure.microsoft.com/documentation/articles/storage-dotnet-shared-access-signature-part-1/) on MSDN. + +The following code shows how to create a shared access signature token that's valid for five minutes. The `GetSharedAccessReferenceForUpload` method returns a shared access signatures token that can be used to upload a file to Azure Blob Storage. + +```csharp +public class ValuesController : ApiController +{ + private readonly CloudStorageAccount account; + private readonly string blobContainer; + ... + /// + /// Return a limited access key that allows the caller to upload a file + /// to this specific destination for a defined period of time. + /// + private StorageEntitySas GetSharedAccessReferenceForUpload(string blobName) + { + var blobClient = this.account.CreateCloudBlobClient(); + var container = blobClient.GetContainerReference(this.blobContainer); + + var blob = container.GetBlockBlobReference(blobName); + + var policy = new SharedAccessBlobPolicy + { + Permissions = SharedAccessBlobPermissions.Write, + + // Specify a start time five minutes earlier to allow for client clock skew. + SharedAccessStartTime = DateTime.UtcNow.AddMinutes(-5), + + // Specify a validity period of five minutes starting from now. + SharedAccessExpiryTime = DateTime.UtcNow.AddMinutes(5) + }; + + // Create the signature. + var sas = blob.GetSharedAccessSignature(policy); + + return new StorageEntitySas + { + BlobUri = blob.Uri, + Credentials = sas, + Name = blobName + }; + } + + public struct StorageEntitySas + { + public string Credentials; + public Uri BlobUri; + public string Name; + } +} +``` + +> The complete sample is available in the ValetKey solution available for download from [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/valet-key). The ValetKey.Web project in this solution contains a web application that includes the `ValuesController` class shown above. A sample client application that uses this web application to retrieve a shared access signatures key and upload a file to blob storage is available in the ValetKey.Client project. + +## Next steps + +The following patterns and guidance might also be relevant when implementing this pattern: +- A sample that demonstrates this pattern is available on [GitHub](https://github.com/mspnp/cloud-design-patterns/tree/master/samples/valet-key). +- [Gatekeeper pattern](gatekeeper.md). This pattern can be used in conjunction with the Valet Key pattern to protect applications and services by using a dedicated host instance that acts as a broker between clients and the application or service. The gatekeeper validates and sanitizes requests, and passes requests and data between the client and the application. Can provide an additional layer of security, and reduce the attack surface of the system. +- [Static Content Hosting pattern](static-content-hosting.md). Describes how to deploy static resources to a cloud-based storage service that can deliver these resources directly to the client to reduce the requirement for expensive compute instances. Where the resources aren't intended to be publicly available, the Valet Key pattern can be used to secure them. +- [Introducing Table SAS (Shared Access Signature), Queue SAS and update to Blob SAS](https://blogs.msdn.microsoft.com/windowsazurestorage/2012/06/12/introducing-table-sas-shared-access-signature-queue-sas-and-update-to-blob-sas/) +- [Using Shared Access Signatures](https://azure.microsoft.com/documentation/articles/storage-dotnet-shared-access-signature-part-1/) +- [Shared Access Signature Authentication with Service Bus](https://azure.microsoft.com/documentation/articles/service-bus-shared-access-signature-authentication/) diff --git a/docs/resiliency/disaster-recovery-azure-applications.md b/docs/resiliency/disaster-recovery-azure-applications.md new file mode 100644 index 00000000000..29ee97092b4 --- /dev/null +++ b/docs/resiliency/disaster-recovery-azure-applications.md @@ -0,0 +1,296 @@ +--- +title: Disaster recovery for Azure applications +description: Technical overview and in-depth information about designing applications for disaster recovery on Microsoft Azure. +services: '' +documentationcenter: na +author: adamglick +manager: saladki +editor: '' + +ms.assetid: f9e0cbdc-ec82-46dc-bee6-558b35518252 +ms.service: resiliency +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: aglick + +--- +[!INCLUDE [header](../_includes/header.md)] +# Disaster recovery for applications built on Microsoft Azure +Whereas high availability is about temporary failure management, disaster recovery (DR) is about the catastrophic loss of application functionality. For example, consider the scenario where a region goes down. In this case, you need to have a plan to run your application or access your data outside the Azure region. Execution of this plan involves people, processes, and supporting applications that allow the system to function. The business and technology owners who define the system's operational mode for a disaster also determine the level of functionality for the service during a disaster. The level of functionality can take a few forms: completely unavailable, partially available (degraded functionality or delayed processing), or fully available. + +## Azure disaster recovery features +As with availability considerations, Azure has [resiliency technical guidance](./index.md) that's designed to support disaster recovery. There is also a relationship between some of the availability features of Azure and disaster recovery. For example, the management of roles across fault domains increases the availability of an application. Without that management, an unhandled hardware failure would become a “disaster” scenario. So the correct application of availability features and strategies is an important part of disaster-proofing your application. However, this article goes beyond general availability issues to more serious (and rarer) disaster events. + +## Multiple datacenter regions +Azure maintains datacenters in many regions around the world. This infrastructure supports several disaster recovery scenarios, such as the system-provided geo-replication of Azure Storage to secondary regions. It also means that you can easily and inexpensively deploy a cloud service to multiple locations around the world. Compare this with the cost and difficulty of running your own datacenters in multiple regions. Deploying data and services to multiple regions helps protect your application from major outages in a single region. + +## Azure Traffic Manager +When a region-specific failure occurs, you must redirect traffic to services or deployments in another region. You can do this routing manually, but it's more efficient to use an automated process. Azure Traffic Manager is designed for this task. You can use it to automatically manage the failover of user traffic to another region in case the primary region fails. Because traffic management is an important part of the overall strategy, it's important to understand the basics of Traffic Manager. + +In the following diagram, users connect to a URL that's specified for Traffic Manager (`http://myATMURL.trafficmanager.net`) and that abstracts the actual site URLs (`http://app1URL.cloudapp.net` and `http://app2URL.cloudapp.net`). Based on how you configure the criteria for when to route users, users will be sent to the correct actual site when the policy dictates. The policy options are round-robin, performance, or failover. For the sake of this article, we will be concerned with only the failover option. + +![Routing via Azure Traffic Manager](./images/disaster-recovery-azure-applications/routing-using-azure-traffic-manager.png) + +When you're configuring Traffic Manager, you provide a new Traffic Manager DNS prefix. This is the URL prefix that you'll provide to your users to access your service. Traffic Manager now abstracts load balancing one level up and not at the region level. The Traffic Manager DNS maps to a CNAME for all the deployments that it manages. + +Within Traffic Manager, you specify the priority of the deployments that users will be routed to when failure occurs. Traffic Manager monitors the endpoints of the deployments and notes when the primary deployment fails. At failure, Traffic Manager analyzes the prioritized list of deployments and routes users to the next one on the list. + +Although Traffic Manager decides where to go in a failover, you can decide whether your failover domain is dormant or active while you're not in failover mode. That functionality has nothing to do with Azure Traffic Manager. Traffic Manager detects a failure in the primary site and rolls over to the failover site. Traffic Manager rolls over regardless of whether that site is currently serving users or not. + +For more information on how Azure Traffic Manager works, refer to: + +* [Traffic Manager overview](/azure/traffic-manager/traffic-manager-overview/) +* [Configure failover routing method](/azure/traffic-manager/traffic-manager-configure-failover-routing-method/) + +## Azure disaster scenarios +The following sections cover several different types of disaster scenarios. Region-wide service disruptions are not the only cause of application-wide failures. Poor design or administration errors can also lead to outages. It's important to consider the possible causes of a failure during both the design and testing phases of your recovery plan. A good plan takes advantage of Azure features and augments them with application-specific strategies. The chosen response is dictated by the importance of the application, the recovery point objective (RPO), and the recovery time objective (RTO). + +### Application failure +Azure Traffic Manager automatically handles failures that result from the underlying hardware or operating system software in the host virtual machine. Azure creates a new instance of the role on a functioning server and adds it to the load-balancer rotation. If the number of role instances is greater than one, Azure shifts processing to the other running role instances while replacing the failed node. + +There are serious application errors that happen independently of any hardware or operating system failures. The application might fail due to the catastrophic exceptions caused by bad logic or data integrity issues. You must incorporate enough telemetry into the code so that a monitoring system can detect failure conditions and notify an application administrator. An administrator who has full knowledge of the disaster recovery processes can make a decision to invoke a failover process. Alternatively, an administrator can simply accept an availability outage to resolve the critical errors. + +### Data corruption +Azure automatically stores your Azure SQL Database and Azure Storage data three times redundantly within different fault domains in the same region. If you use geo-replication, the data is stored three additional times in a different region. However, if your users or your application corrupts that data in the primary copy, the data quickly replicates to the other copies. Unfortunately, this results in three copies of corrupt data. + +To manage potential corruption of your data, you have two options. First, you can manage a custom backup strategy. You can store your backups in Azure or on-premises, depending on your business requirements or governance regulations. Another option is to use the new point-in-time restore option for recovering a SQL database. For more information, see the section on [data strategies for disaster recovery](#data-strategies-for-disaster-recovery). + +### Network outage +When parts of the Azure network are inaccessible, you might not be able to get to your application or data. If one or more role instances are unavailable due to network issues, Azure uses the remaining available instances of your application. If your application can’t access its data because of an Azure network outage, you can potentially run in degraded mode locally by using cached data. You need to architect the disaster recovery strategy for running in degraded mode in your application. For some applications, this might not be practical. + +Another option is to store data in an alternate location until connectivity is restored. If degraded mode is not an option, the remaining options are application downtime or failover to an alternate region. The design of an application running in degraded mode is as much a business decision as a technical one. This is discussed further in the section on [degraded application functionality](#degraded-application-functionality). + +### Failure of a dependent service +Azure provides many services that can experience periodic downtime. Consider [Azure Redis Cache](https://azure.microsoft.com/services/cache/) as an example. This multi-tenant service provides caching capabilities to your application. It's important to consider what happens in your application if the dependent service is unavailable. In many ways, this scenario is similar to the network outage scenario. However, considering each service independently results in potential improvements to your overall plan. + +Azure Redis Cache provides caching to your application from within your cloud service deployment, which provides disaster recovery benefits. First, the service now runs on roles that are local to your deployment. Therefore, you're better able to monitor and manage the status of the cache as part of your overall management processes for the cloud service. This type of caching also exposes new features. One of the new features is high availability for cached data. This helps to preserve cached data if a single node fails by maintaining duplicate copies on other nodes. + +Note that high availability decreases throughput and increases latency because of the updating of the secondary copy on writes. It also doubles the amount of memory that's used for each item, so plan for that. This specific example demonstrates that each dependent service might have capabilities that improve your overall availability and resistance to catastrophic failures. + +With each dependent service, you should understand the implications of a service disruption. In the caching example, it might be possible to access the data directly from a database until you restore your cache. This would be a degraded mode in terms of performance but would provide full functionality with regard to data. + +### Region-wide service disruption +The previous failures have primarily been failures that can be managed within the same Azure region. However, you must also prepare for the possibility that there is a service disruption of the entire region. If a region-wide service disruption occurs, the locally redundant copies of your data are not available. If you have enabled geo-replication, there are three additional copies of your blobs and tables in a different region. If Microsoft declares the region lost, Azure remaps all of the DNS entries to the geo-replicated region. + +> [!NOTE] +> Be aware that you don't have any control over this process, and it will occur only for region-wide service disruption. Because of this, you must rely on other application-specific backup strategies to achieve the highest level of availability. For more information, see the section on [data strategies for disaster recovery](#data-strategies-for-disaster-recovery). +> +> + +### Azure-wide service disruption +In disaster planning, you must consider the entire range of possible disasters. One of the most severe service disruptions would involve all Azure regions simultaneously. As with other service disruptions, you might decide that you'll take the risk of temporary downtime in that event. Widespread service disruptions that span regions should be much rarer than isolated service disruptions that involve dependent services or single regions. + +However, for some mission-critical applications, you might decide that there must be a backup plan for this scenario as well. The plan for this event might include failing over to services in an [alternative cloud](#alternative-cloud) or a [hybrid on-premises and cloud solution](#hybrid-on-premises-and-cloud-solution). + +### Degraded application functionality +A well-designed application typically uses a collection of modules that communicate with each other though the implementation of loosely coupled information-interchange patterns. A DR-friendly application requires separation of tasks at the module level. This is to prevent the disruption of a dependent service from bringing down the entire application. For example, consider a web commerce application for Company Y. The following modules might constitute the application: + +* **Product Catalog** allows users to browse products. +* **Shopping Cart** allows users to add/remove products in their shopping cart. +* **Order Status** shows the shipping status of user orders. +* **Order Submission** finalizes the shopping session by submitting the order with payment. +* **Order Processing** validates the order for data integrity and performs a quantity availability check. + +When a dependent of a module in this application goes down, how does the module function until that part recovers? A well-architected system implements isolation boundaries through separation of tasks both at design time and at runtime. You can categorize every failure as recoverable and non-recoverable. Non-recoverable errors will take down the module, but you can mitigate a recoverable error through alternatives. As discussed in the high-availability section, you can hide some problems from users by handling faults and taking alternate actions. During a more serious service disruption, the application might be completely unavailable. However, a third option is to continue servicing users in degraded mode. + +For instance, if the database for hosting orders goes down, the Order Processing module loses its ability to process sales transactions. Depending on the architecture, it might be hard or impossible for the Order Submission and Order Processing parts of the application to continue. If the application is not designed to handle this scenario, the entire application might go offline. + +However, in this same scenario, it's possible that the product data is stored in a different location. In that case, the Product Catalog module can still be used for viewing products. In degraded mode, the application continues to be available to users for available functionality like viewing the product catalog. Other parts of the application, however, are unavailable, such as ordering or inventory queries. + +Another variation of degraded mode centers on performance rather than capabilities. For example, consider a scenario where the product catalog is being cached through Azure Redis Cache. If caching becomes unavailable, the application might go directly to the server storage to retrieve product catalog information. But this access might be slower than the cached version. Because of this, the application performance is degraded until the caching service is fully restored. + +Deciding how much of an application will continue to function in degraded mode is both a business decision and a technical decision. The application must also decide how to inform the users of the temporary problems. In this example, the application might allow viewing products and even adding them to a shopping cart. However, when the user attempts to make a purchase, the application notifies the user that the sales module is temporarily inaccessible. It isn't ideal for the customer, but it does prevent an application-wide service disruption. + +## Data strategies for disaster recovery +Handling data correctly is the hardest area to get right in any disaster recovery plan. Restoring data is also the part of the recovery process that typically takes the most time. Different choices in degradation modes result in difficult challenges for data recovery from failure and consistency after failure. + +One of the factors is the need to restore or maintain a copy of the application’s data. You will use this data for reference and transactional purposes at a secondary site. An on-premises setting requires an expensive and lengthy planning process to implement a multiple-region disaster recovery strategy. Conveniently, most cloud providers, including Azure, readily allow the deployment of applications to multiple regions. These regions are geographically distributed in such a way that multiple-region service disruption should be extremely rare. The strategy for handling data across regions is one of the contributing factors for the success of any disaster recovery plan. + +The following sections discuss disaster recovery techniques related to data backups, reference data, and transactional data. + +### Backup and restore +Regular backups of application data can support some disaster recovery scenarios. Different storage resources require different techniques. + +For the Basic, Standard, and Premium SQL Database tiers, you can take advantage of point-in-time restore to recover your database. For more information, see [Overview: Cloud business continuity and database disaster recovery with SQL Database](/azure/sql-database/sql-database-business-continuity/). Another option is to use Active Geo-Replication for SQL Database. This automatically replicates database changes to secondary databases in the same Azure region or even in a different Azure region. This provides a potential alternative to some of the more manual data synchronization techniques presented in this article. For more information, see [Overview: SQL Database Active Geo-Replication](/azure/sql-database/sql-database-geo-replication-overview/). + +You can also use a more manual approach for backup and restore. Use the DATABASE COPY command to create a copy of the database. You must use this command to get a backup with transactional consistency. You can also use the import/export service of Azure SQL Database. This supports exporting databases to BACPAC files that are stored in Azure Blob storage. + +The built-in redundancy of Azure Storage creates two replicas of the backup file in the same region. However, the frequency of running the backup process determines your RPO, which is the amount of data you might lose in disaster scenarios. For example, imagine that you perform a backup at the top of the hour, and a disaster occurs two minutes before the top of the hour. You lose 58 minutes of data that happened after the last backup was performed. Also, to protect against a region-wide service disruption, you should copy the BACPAC files to an alternate region. You then have the option of restoring those backups in the alternate region. For more details, see [Overview: Cloud business continuity and database disaster recovery with SQL Database](/azure/sql-database/sql-database-business-continuity/). + +For Azure Storage, you can develop your own custom backup process or use one of many third-party backup tools. Note that most application designs have additional complexities where storage resources reference each other. For example, consider a SQL database that has a column that links to a blob in Azure Storage. If the backups do not happen simultaneously, the database might have the pointer to a blob that was not backed up before the failure. The application or disaster recovery plan must implement processes to handle this inconsistency after a recovery. + +### Reference data pattern for disaster recovery +Reference data is read-only data that supports application functionality. It typically does not change frequently. Although backup and restore is one method to handle region-wide service disruptions, the RTO is relatively long. When you deploy the application to a secondary region, some strategies can improve the RTO for reference data. + +Because reference data changes infrequently, you can improve the RTO by maintaining a permanent copy of the reference data in the secondary region. This eliminates the time required to restore backups in the event of a disaster. To meet the multiple-region disaster recovery requirements, you must deploy the application and the reference data together in multiple regions. As mentioned in [Reference data pattern for high availability](high-availability-azure-applications.md#reference-data-pattern-for-high-availability), you can deploy reference data to the role itself, to external storage, or to a combination of both. + +The reference data deployment model within compute nodes implicitly satisfies the disaster recovery requirements. Reference data deployment to SQL Database requires that you deploy a copy of the reference data to each region. The same strategy applies to Azure Storage. You must deploy a copy of any reference data that's stored in Azure Storage to the primary and secondary regions. + +![Reference data publication to both primary and secondary regions](./images/disaster-recovery-azure-applications/reference-data-publication-to-both-primary-and-secondary-regions.png) + +You must implement your own application-specific backup routines for all data, including reference data. Geo-replicated copies across regions are used only in a region-wide service disruption. To prevent extended downtime, deploy the mission-critical parts of the application’s data to the secondary region. For an example of this topology, see the [active-passive model](#active-passive). + +### Transactional data pattern for disaster recovery +Implementation of a fully functional disaster mode strategy requires asynchronous replication of the transactional data to the secondary region. The practical time windows within which the replication can occur will determine the RPO characteristics of the application. You might still recover the data that was lost from the primary region during the replication window. You might also be able to merge with the secondary region later. + +The following architecture examples provide some ideas on different ways of handling transactional data in a failover scenario. It's important to note that these examples are not exhaustive. For example, intermediate storage locations such as queues might be replaced with Azure SQL Database. The queues themselves might be either Azure Storage or Azure Service Bus queues (see [Azure queues and Service Bus queues--compared and contrasted](/azure/service-bus-messaging/service-bus-azure-and-service-bus-queues-compared-contrasted/)). Server storage destinations might also vary, such as Azure tables instead of SQL Database. In addition, worker roles might be inserted as intermediaries in various steps. The important thing is not to emulate these architectures exactly, but to consider various alternatives in the recovery of transactional data and related modules. + +#### Replication of transactional data in preparation for disaster recovery +Consider an application that uses Azure Storage queues to hold transactional data. This allows worker roles to process the transactional data to the server database in a decoupled architecture. This requires the transactions to use some form of temporary caching if the front-end roles require the immediate query of that data. Depending on the level of data-loss tolerance, you might choose to replicate the queues, the database, or all of the storage resources. With only database replication, if the primary region goes down, you can still recover the data in the queues when the primary region comes back. + +The following diagram shows an architecture where the server database is synchronized across regions. + +![Replication of transactional data in preparation for disaster recovery](./images/disaster-recovery-azure-applications/replicate-transactional-data-in-preparation-for-disaster-recovery.png) + +The biggest challenge to implementing this architecture is the replication strategy between regions. The Azure SQL Data Sync service enables this type of replication. However, the service is still in preview and is not recommended for production environments. For more information, see [Overview: Cloud business continuity and database disaster recovery with SQL Database](/azure/sql-database/sql-database-business-continuity/). For production applications, you must invest in a third-party solution or create your own replication logic in code. Depending on the architecture, the replication might be bidirectional, which is also more complex. + +One potential implementation might make use of the intermediate queue in the previous example. The worker role that processes the data to the final storage destination might make the change in both the primary region and the secondary region. These are not trivial tasks, and complete guidance for replication code is beyond the scope of this article. The important point is that a lot of your time and testing should focus on how you replicate your data to the secondary region. Additional processing and testing can help ensure that the failover and recovery processes correctly handle any possible data inconsistencies or duplicate transactions. + +> [!NOTE] +> Most of this paper focuses on platform as a service (PaaS). However, additional replication and availability options for hybrid applications use Azure Virtual Machines. These hybrid applications use infrastructure as a service (IaaS) to host SQL Server on virtual machines in Azure. This allows traditional availability approaches in SQL Server, such as AlwaysOn Availability Groups or Log Shipping. Some techniques, such as AlwaysOn, work only between on-premises SQL Server instances and Azure virtual machines. For more information, see [High availability and disaster recovery for SQL Server in Azure Virtual Machines](/azure/virtual-machines/windows/sql/virtual-machines-windows-sql-high-availability-dr/). +> +> + +#### Degraded application mode for transaction capture +Consider a second architecture that operates in degraded mode. The application on the secondary region deactivates all the functionality, such as reporting, business intelligence (BI), or draining queues. It accepts only the most important types of transactional workflows, as defined by business requirements. The system captures the transactions and writes them to queues. The system might postpone processing the data during the initial stage of the service disruption. If the system on the primary region is reactivated within the expected time window, the worker roles in the primary region can drain the queues. This process eliminates the need for database merging. If the primary region service disruption goes beyond the tolerable window, the application can start processing the queues. + +In this scenario, the database on the secondary contains incremental transactional data that must be merged after the primary is reactivated. The following diagram shows this strategy for temporarily storing transactional data until the primary region is restored. + +![Degraded application mode for transaction capture](./images/disaster-recovery-azure-applications/degraded-application-mode-for-transaction-capture.png) + +For more discussion of data management techniques for resilient Azure applications, see [Failsafe: Guidance for Resilient Cloud Architectures](https://channel9.msdn.com/Series/FailSafe). + +## Deployment topologies for disaster recovery +You must prepare mission-critical applications for the possibility of a region-wide service disruption. You do this by incorporating a multiple-region deployment strategy into the operational planning. + +Multiple-region deployments might involve IT-pro processes to publish the application and reference data to the secondary region after a disaster. If the application requires instant failover, the deployment process might involve an active/passive setup or an active/active setup. This type of deployment has existing instances of the application running in the alternate region. A routing tool such as Azure Traffic Manager provides load-balancing services at the DNS level. It can detect service disruptions and route the users to different regions when needed. + +Part of a successful Azure disaster recovery is architecting that recovery into the solution from the start. The cloud provides additional options for recovering from failures during a disaster that are not available in a traditional hosting provider. Specifically, you can dynamically and quickly allocate resources to a different region. Therefore, you won’t pay a lot for idle resources while you're waiting for a failure to occur. + +The following sections cover different deployment topologies for disaster recovery. Typically, there's a tradeoff in increased cost or complexity for additional availability. + +### Single-region deployment +A single-region deployment is not really a disaster recovery topology, but is meant to contrast with the other architectures. Single-region deployments are common for applications in Azure. However, this type of deployment is not a serious contender for a disaster recovery plan. + +The following diagram depicts an application running in a single Azure region. Azure Traffic Manager and the use of fault and upgrade domains increase availability of the application within the region. + +![Single-region deployment](./images/disaster-recovery-azure-applications/single-region-deployment.png) + +Here, it's apparent that the database is a single point of failure. Even though Azure replicates the data across different fault domains to internal replicas, this all occurs in the same region. The application cannot withstand a catastrophic failure. If the region goes down, all of the fault domains go down--including all service instances and storage resources. + +For all but the least critical applications, you must devise a plan to deploy your applications across multiple regions. You should also consider RTO and cost constraints in considering which deployment topology to use. + +Let’s take a look now at specific patterns to support failover across different regions. These examples all use two regions to describe the process. + +### Redeployment to a secondary Azure region +In the pattern of redeployment to a secondary region, only the primary region has applications and databases running. The secondary region is not set up for an automatic failover. So when a disaster occurs, you must spin up all the parts of the service in the new region. This includes uploading a cloud service to Azure, deploying the cloud service, restoring the data, and changing DNS to reroute the traffic. + +Although this is the most affordable of the multiple-region options, it has the worst RTO characteristics. In this model, the service package and database backups are stored either on-premises or in the Azure Blob storage instance of the secondary region. However, you must deploy a new service and restore the data before it resumes operation. Even if you fully automate the data transfer from backup storage, spinning up the new database environment consumes a lot of time. Moving data from the backup disk storage to the empty database on the secondary region is the most expensive part of the restore process. You must do this, however, to bring the new database to an operational state because it isn't replicated. + +The best approach is to store the service packages in Blob storage in the secondary region. This eliminates the need to upload the package to Azure, which is what happens when you deploy from an on-premises development machine. You can quickly deploy the service packages to a new cloud service from Blob storage by using PowerShell scripts. + +This option is practical only for non-critical applications that can tolerate a high RTO. For instance, this might work for an application that can be down for several hours but should be running again within 24 hours. + +![Redeployment to a secondary Azure region](./images/disaster-recovery-azure-applications/redeploy-to-a-secondary-azure-region.png) + +### Active-passive +The active-passive pattern is the choice that many companies favor. This pattern provides improvements to the RTO with a relatively small increase in cost over the redeployment pattern. +In this scenario, there is again a primary and a secondary Azure region. All of the traffic goes to the active deployment on the primary region. The secondary region is better prepared for disaster recovery because the database is running on both regions. Additionally, a synchronization mechanism is in place between them. This standby approach can involve two variations: a database-only approach or a complete deployment in the secondary region. + +#### Database only +In the first variation of the active-passive pattern, only the primary region has a deployed cloud service application. However, unlike the redeployment pattern, both regions are synchronized with the contents of the database. (For more information, see the section on [transactional data pattern for disaster recovery](#transactional-data-pattern-for-disaster-recovery).) When a disaster occurs, there are fewer activation requirements. You start the application in the secondary region, change connection strings to the new database, and change the DNS entries to reroute traffic. + +Like the redeployment pattern, you should have already stored the service packages in Azure Blob storage in the secondary region for faster deployment. Unlike the redeployment pattern, you don’t incur the majority of the overhead that database restore operation requires. The database is ready and running. This saves a significant amount of time, making this an affordable DR pattern. It's also the most popular DR pattern. + +![Active-passive, database only](./images/disaster-recovery-azure-applications/active-passive-database-only.png) + +#### Full replica +In the second variation of the active-passive pattern, both the primary region and the secondary region have a full deployment. This deployment includes the cloud services and a synchronized database. However, only the primary region is actively handling network requests from the users. The secondary region becomes active only when the primary region experiences a service disruption. In that case, all new network requests route to the secondary region. Azure Traffic Manager can manage this failover automatically. + +Failover occurs faster than the database-only variation because the services are already deployed. This pattern provides a very low RTO. The secondary failover region must be ready to go immediately after failure of the primary region. + +Along with a quicker response time, this pattern has the advantage of pre-allocating and deploying backup services. You don’t have to worry about a region not having the space to allocate new instances in a disaster. This is important if your secondary Azure region is nearing capacity. There is no guarantee (service-level agreement) that you will instantly be able to deploy a number of new cloud services in any region. + +For the fastest response time with this model, you must have similar scale (number of role instances) in the primary and secondary regions. Despite the advantages, paying for unused compute instances is costly, and this might not be the most prudent financial choice. Because of this, it's more common to use a slightly scaled-down version of cloud services on the secondary region. Then you can quickly fail over and scale out the secondary deployment if necessary. You should automate the failover process so that after the primary region is inaccessible, you activate additional instances, depending on the load. This might involve the use of an autoscaling mechanism like [virtual machine scale sets](/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-overview/). + +The following diagram shows the model where the primary and secondary regions contain a fully deployed cloud service in an active-passive pattern. + +![Active-passive, full replica](./images/disaster-recovery-azure-applications/active-passive-full-replica.png) + +### Active-active +By now, you’re probably figuring out the evolution of the patterns: decreasing the RTO increases costs and complexity. The active-active solution actually breaks this tendency with regard to cost. + +In an active-active pattern, the cloud services and database are fully deployed in both regions. Unlike the active-passive model, both regions receive user traffic. This option yields the quickest recovery time. The services are already scaled to handle a portion of the load at each region. DNS is already enabled to use the secondary region. There's additional complexity in determining how to route users to the appropriate region. Round-robin scheduling might be possible. It's more likely that certain users would use a specific region where the primary copy of their data resides. + +In case of failover, simply disable DNS to the primary region. This routes all traffic to the secondary region. + +Even in this model, there are some variations. For example, the following diagram shows a model where the primary region owns the master copy of the database. The cloud services in both regions write to that primary database. The secondary deployment can read from the primary or replicated database. Replication in this example happens one way. + +![Active-active](./images/disaster-recovery-azure-applications/active-active.png) + +There is a downside to the active-active architecture in the preceding diagram. The second region must access the database in the first region because the master copy resides there. Performance significantly drops off when you access data from outside a region. In cross-region database calls, you should consider some type of batching strategy to improve the performance of these calls. For more information, see [How to use batching to improve SQL Database application performance](/azure/sql-database/sql-database-use-batching-to-improve-performance/). + +An alternative architecture might involve each region accessing its own database directly. In that model, some type of bidirectional replication is required to synchronize the databases in each region. + +In the active-active pattern, you might not need as many instances on the primary region as you would in the active-passive pattern. If you have 10 instances on the primary region in an active-passive architecture, you might need only 5 in each region in an active-active architecture. Both regions now share the load. This might be a cost savings over the active-passive pattern if you keep a warm standby on the passive region with 10 instances waiting for failover. + +Realize that until you restore the primary region, the secondary region might receive a sudden surge of new users. If there are 10,000 users on each server when the primary region experiences a service disruption, the secondary region suddenly has to handle 20,000 users. Monitoring rules on the secondary region must detect this increase and double the instances in the secondary region. For more information on this, see the section on [failure detection](#failure-detection). + +## Hybrid on-premises and cloud solution +One additional strategy for disaster recovery is to architect a hybrid application that runs on-premises and in the cloud. Depending on the application, the primary region might be either location. Consider the previous architectures and imagine the primary or secondary region as an on-premises location. + +There are some challenges in these hybrid architectures. First, most of this article has addressed PaaS architecture patterns. Typical PaaS applications in Azure rely on Azure-specific constructs such as roles, cloud services, and Traffic Manager. To create an on-premises solution for this type of PaaS application would require a significantly different architecture. This might not be feasible from a management or cost perspective. + +However, a hybrid solution for disaster recovery has fewer challenges for traditional architectures that have simply moved to the cloud. This is true of architectures that use IaaS. IaaS applications use virtual machines in the cloud that can have direct on-premises equivalents. You can also use virtual networks to connect machines in the cloud with on-premises network resources. This opens up several possibilities that are not possible with PaaS-only applications. For example, SQL Server can take advantage of disaster recovery solutions such as AlwaysOn Availability Groups and database mirroring. For details, see [High availability and disaster recovery for SQL Server in Azure virtual machines](/azure/virtual-machines/windows/sql/virtual-machines-windows-sql-high-availability-dr/). + +IaaS solutions also provide an easier path for on-premises applications to use Azure as the failover option. You might have a fully functioning application in an existing on-premises region. However, what if you lack the resources to maintain a geographically separate region for failover? You might decide to use virtual machines and virtual networks to get your application running in Azure. In that case, define processes that synchronize data to the cloud. The Azure deployment then becomes the secondary region to use for failover. The primary region remains the on-premises application. For more information about IaaS architectures and capabilities, see the [Virtual Machines documentation](https://azure.microsoft.com/documentation/services/virtual-machines/). + +## Alternative cloud +There are situations where even the robustness of the Microsoft Cloud might not meet internal compliance rules or policies that your organization requires. Even the best preparation and design to implement backup systems during a disaster fall short if there's a global service disruption of a cloud service provider. + +You’ll want to compare availability requirements with the cost and complexity of increased availability. Perform a risk analysis, and define the RTO and RPO for your solution. If your application cannot tolerate any downtime, it might make sense for you to consider using another cloud solution. Unless the entire Internet goes down, another cloud solution might still be available if Azure becomes globally inaccessible. + +As with the hybrid scenario, the failover deployments in the previous disaster recovery architectures can also exist within another cloud solution. Alternative cloud DR sites should be used only for solutions whose RTO allows very little, if any, downtime. Note that a solution that uses a DR site outside Azure will require more work to configure, develop, deploy, and maintain. It's also more difficult to implement best practices in a cross-cloud architecture. Although cloud platforms have similar high-level concepts, the APIs and architectures are different. + +If you decide to split your DR among different platforms, it would make sense to architect abstraction layers in the design of the solution. If you do this, you won’t need to develop and maintain two different versions of the same application for different cloud platforms in case of disaster. As with the hybrid scenario, the use of Azure Virtual Machines or Azure Container Service might be easier in these cases than the use of cloud-specific PaaS designs. + +## Automation +Some of the patterns that we just discussed require quick activation of offline deployments as well as restoration of specific parts of a system. Automation, or scripting, supports the ability to activate resources on demand and deploy solutions rapidly. In this article, DR-related automation is equated with [Azure PowerShell](https://msdn.microsoft.com/library/azure/jj156055.aspx), but the [Service Management REST API](https://msdn.microsoft.com/library/azure/ee460799.aspx) is also an option. + +Developing scripts helps to manage the parts of DR that Azure does not transparently handle. This has the benefit of producing consistent results each time, which minimizes the chance of human error. Having predefined DR scripts also reduces the time to rebuild a system and its constituent parts in the midst of a disaster. You don’t want to try to manually figure out how to restore your site while it's down and losing money every minute. + +After you create the scripts, test them repeatedly from start to finish. After you verify their basic functionality, make sure that you test them in [disaster simulation](#disaster-simulation). This helps uncover flaws in the scripts or processes. + +A best practice with automation is to create a repository of PowerShell scripts or command-line interface (CLI) scripts for Azure disaster recovery. Clearly mark and categorize them for easy lookup. Designate one person to manage the repository and versioning of the scripts. Document them well with explanations of parameters and examples of script use. Also ensure that you keep this documentation in sync with your Azure deployments. This underscores the purpose of having one person in charge of all parts of the repository. + +## Failure detection +To correctly handle problems with availability and disaster recovery, you must be able to detect and diagnose failures. You should do advanced server and deployment monitoring so you can quickly know when a system or its parts are suddenly down. Monitoring tools that look at the overall health of the cloud service and its dependencies can perform part of this work. One Microsoft tool is [System Center 2016](https://www.microsoft.com/en-us/server-cloud/products/system-center-2016/). Third-party tools can also provide monitoring capabilities. Most monitoring solutions track key performance counters and service availability. + +Although these tools are vital, they do not replace the need to plan for fault detection and reporting within a cloud service. You must plan to properly use Azure Diagnostics. Custom performance counters or event-log entries can also be part of the overall strategy. This provides more data during failures to quickly diagnose the problem and restore full capabilities. It also provides additional metrics that the monitoring tools can use to determine application health. For more information, see [Enabling Azure Diagnostics in Azure Cloud Services](/azure/cloud-services/cloud-services-dotnet-diagnostics/). For a discussion of how to plan for an overall “health model,” see [Failsafe: Guidance for Resilient Cloud Architectures](https://channel9.msdn.com/Series/FailSafe). + +## Disaster simulation +Simulation testing involves creating small real-life situations on the work floor to observe how the team members react. Simulations also show how effective the solutions are in the recovery plan. Carry out simulations in such a way that the created scenarios don't disrupt actual business while still feeling like real situations. + +Consider architecting a type of “switchboard” in the application to manually simulate availability issues. For instance, through a soft switch, trigger database access exceptions for an ordering module by causing it to malfunction. You can take similar lightweight approaches for other modules at the network interface level. + +The simulation highlights any issues that were inadequately addressed. The simulated scenarios must be completely controllable. This means that, even if the recovery plan seems to be failing, you can restore the situation back to normal without causing any significant damage. It’s also important that you inform higher-level management about when and how the simulation exercises will be executed. This plan should include information on the time or resources that might become unproductive while the simulation test is running. When you're subjecting your disaster recovery plan to a test, it's also important to define how success will be measured. + +There are several other techniques that you can use to test disaster recovery plans. However, most of them are simply altered versions of these basic techniques. The main motive behind this testing is to evaluate how feasible and how workable the recovery plan is. Disaster recovery testing focuses on the details to discover holes in the basic recovery plan. + +## Service-specific guidance + +The following topics describe disaster recovery specific Azure services: + +| Service | Topic | +|---------|-------| +| Cloud Services | [What to do in the event of an Azure service disruption that impacts Azure Cloud Services](/azure/cloud-services/cloud-services-disaster-recovery-guidance) | +| Key Vault | [Azure Key Vault availability and redundancy](/azure/key-vault/key-vault-disaster-recovery-guidance) | +|Storage | [What to do if an Azure Storage outage occurs](/azure/storage/storage-disaster-recovery-guidance) | +| SQL Database | [Restore an Azure SQL Database or failover to a secondary](/azure/sql-database/sql-database-disaster-recovery) | +| Virtual machines | [What to do in the event that an Azure service disruption impacts Azure virtual machines](/azure/virtual-machines/virtual-machines-disaster-recovery-guidance) | +| Virtual networks | [Virtual Network – Business Continuity](/azure/virtual-network/virtual-network-disaster-recovery-guidance) | + + diff --git a/docs/resiliency/disaster-recovery-high-availability-azure-applications.md b/docs/resiliency/disaster-recovery-high-availability-azure-applications.md new file mode 100644 index 00000000000..a78aac7de36 --- /dev/null +++ b/docs/resiliency/disaster-recovery-high-availability-azure-applications.md @@ -0,0 +1,103 @@ +--- +title: Disaster Recovery and High Availability for Azure Applications +description: Technical overviews and depth information on designing applications for high availability and disaster recovery of applications built on Microsoft Azure. +services: '' +documentationcenter: na +author: adamglick +manager: saladki +editor: '' + +ms.assetid: e13d5f49-2b91-44ba-829a-1e0f1fceaae8 +ms.service: resiliency +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: aglick + +--- +[!INCLUDE [header](../_includes/header.md)] +# Disaster recovery and high availability for applications built on Microsoft Azure +## Introduction +This article focuses on high availability for applications running in Azure. An overall strategy for high availability also includes the aspect of disaster recovery. Planning for failures and disasters in the cloud requires you to recognize the failures quickly. You then implement a strategy that matches your tolerance for the application’s downtime. Additionally, you have to consider the extent of data loss the application can tolerate without causing adverse business consequences as it is restored. + +Most companies say they are prepared for temporary and large-scale failures. However, before you answer that question for yourself, does your company rehearse these failures? Do you test the recovery of databases to ensure you have the correct processes in place? Probably not. That’s because successful disaster recovery starts with lots of planning and architecting to implement these processes. Just like many other non-functional requirements, such as security, disaster recovery rarely gets the up-front analysis and time allocation it requires. Also, most companies don’t have the budget for geographically distributed regions with redundant capacity. Consequently, even mission critical applications are frequently excluded from proper disaster recovery planning. + +Cloud platforms, such as Azure, provide geographically dispersed regions around the world. These platforms also provide capabilities that support availability and a variety of disaster recovery scenarios. Now, every mission critical cloud application can be given due consideration for disaster proofing of the system. Azure has resiliency and disaster recovery built in to many of its services. You must study these platform features carefully, and supplement with application strategies. + +This article outlines the necessary architecture steps you must take to disaster-proof an Azure deployment. Then you can implement the larger business continuity process. A business continuity plan is a roadmap for continuing operations under adverse conditions. This could be a failure with technology, such as a downed service, or a natural disaster, such as a storm or power outage. Application resiliency for disasters is only a subset of the larger disaster recovery process, as described in this NIST document: [Contingency Planning Guide for Information Technology Systems](https://www.fismacenter.com/sp800-34.pdf). + +The following sections define different levels of failures, techniques to deal with them, and architectures that support these techniques. This information provides input to your disaster recovery processes and procedures, to ensure your disaster recovery strategy works correctly and efficiently. + +## Characteristics of resilient cloud applications +A well architected application can withstand capability failures at a tactical level, and it can also tolerate strategic system-wide failures at the region level. The following sections define the terminology referenced throughout the document to describe various aspects of resilient cloud services. + +### High availability +A highly available cloud application implements strategies to absorb the outage of dependencies, like the managed services offered by the cloud platform. Despite possible failures of the cloud platform’s capabilities, this approach permits the application to continue to exhibit the expected functional and non-functional systemic characteristics. This is covered in-depth in the Channel 9 video series, [Failsafe: Guidance for Resilient Cloud Architectures](https://channel9.msdn.com/Series/FailSafe). + +When you implement the application, you must consider the probability of a capability outage. Additionally, consider the impact an outage will have on the application from the business perspective, before diving deep into the implementation strategies. Without due consideration to the business impact and the probability of hitting the risk condition, the implementation can be expensive and potentially unnecessary. + +Consider an automotive analogy for high availability. Even quality parts and superior engineering does not prevent occasional failures. For example, when your car gets a flat tire, the car still runs, but it is operating with degraded functionality. If you planned for this potential occurrence, you can use one of those thin-rimmed spare tires until you reach a repair shop. Although the spare tire does not permit fast speeds, you can still operate the vehicle until you replace the tire. Similarly, a cloud service that plans for potential loss of capabilities can prevent a relatively minor problem from bringing down the entire application. This is true even if the cloud service must run with degraded functionality. + +There are a few key characteristics of highly available cloud services: availability, scalability, and fault tolerance. Although these characteristics are interrelated, it is important to understand each, and how they contribute to the overall availability of the solution. + +### Availability +An available application considers the availability of its underlying infrastructure and dependent services. Available applications remove single points of failure through redundancy and resilient design. When you broaden the scope to consider availability in Azure, it is important to understand the concept of the effective availability of the platform. Effective availability considers the Service Level Agreements (SLA) of each dependent service, and their cumulative effect on the total system availability. + +System availability is the measure of the percentage of a time window the system will be able to operate. For example, the availability SLA of at least two instances of a web or worker role in Azure is 99.95 percent (out of 100 percent). It does not measure the performance or functionality of the services running on those roles. However, the effective availability of your cloud service is also affected by the various SLAs of the other dependent services. The more moving parts within the system, the more care you must take to ensure the application can resiliently meet the availability requirements of its end users. + +Consider the following SLAs for an Azure service that uses Azure services: Compute, Azure SQL Database, and Azure Storage. + +| Azure service | SLA | Potential minutes downtime/month (30 days) | +|:--- |:--- |:---:| +| Compute |99.95% |21.6 minutes | +| SQL Database |99.99% |4.3 minutes | +| Storage |99.90% |43.2 minutes | + +You must plan for all services to potentially go down at different times. In this simplified example, the total number of minutes per month that the application could be down is 108 minutes. A 30-day month has a total of 43,200 minutes. 108 minutes is .25 percent of the total number of minutes in a 30-day month (43,200 minutes). This gives you an effective availability of 99.75 percent for the cloud service. + +However, using availability techniques described in this paper can improve this. For example, if you design your application to continue running when the SQL Database is unavailable, you can remove that from the equation. This might mean that the application runs with reduced capabilities, so there are also business requirements to consider. For a complete list of Azure SLAs, see [Service Level Agreements](https://azure.microsoft.com/support/legal/sla/). + +### Scalability +Scalability directly affects availability. An application that fails under increased load is no longer available. Scalable applications are able to meet increased demand with consistent results, in acceptable time windows. When a system is scalable, it scales horizontally or vertically to manage increases in load while maintaining consistent performance. In basic terms, horizontal scaling adds more machines of the same size (processor, memory, and bandwidth), while vertical scaling increases the size of the existing machines. For Azure, you have vertical scaling options for selecting various machine sizes for compute. However, changing the machine size requires a re-deployment. Therefore, the most flexible solutions are designed for horizontal scaling. This is especially true for compute, because you can easily increase the number of running instances of any web or worker role. These additional instances handle increased traffic through the Azure Web portal, PowerShell scripts, or code. Base this decision on increases in specific monitored metrics. In this scenario, user performance or metrics do not suffer a noticeable drop under load. Typically, the web and worker roles store any state externally. This allows for flexible load balancing and graceful handling of any changes to instance counts. Horizontal scaling also works well with services, such as Azure Storage, which do not provide tiered options for vertical scaling. + +Cloud deployments should be seen as a collection of scale-units. This allows the application to be elastic in servicing the throughput needs of end users. The scale units are easier to visualize at the web and application server level. This is because Azure already provides stateless compute nodes through web and worker roles. Adding more compute scale-units to the deployment will not cause any application state management side effects, because compute scale-units are stateless. A storage scale-unit is responsible for managing a partition of data (structured or unstructured). Examples of storage scale-units include Azure Table partition, Azure Blob container, and Azure SQL Database. Even the usage of multiple Azure Storage accounts has a direct impact on the application scalability. You must design a highly scalable cloud service to incorporate multiple storage scale-units. For instance, if an application uses relational data, partition the data across several SQL databases. Doing so allows the storage to keep up with the elastic compute scale-unit model. Similarly, Azure Storage allows data partitioning schemes that require deliberate designs to meet the throughput needs of the compute layer. For a list of best practices for designing scalable cloud services, see [Best Practices for the Design of Large-Scale Services on Azure Cloud Services](https://azure.microsoft.com/blog/best-practices-for-designing-large-scale-services-on-windows-azure/). + +### Fault tolerance +Applications should assume that every dependent cloud capability can and will go down at some point in time. A fault tolerant application detects and maneuvers around failed elements, to continue and return the correct results within a specific timeframe. For transient error conditions, a fault tolerant system will employ a retry policy. For more serious faults, the application can detect problems and fail over to alternative hardware or contingency plans until the failure is corrected. A reliable application can properly manage the failure of one or more parts, and continue operating properly. Fault tolerant applications can use one or more design strategies, such as redundancy, replication, or degraded functionality. + +## Disaster recovery +A cloud deployment might cease to function due to a systemic outage of the dependent services or the underlying infrastructure. Under such conditions, a business continuity plan triggers the disaster recovery process. This process typically involves both operations personnel and automated procedures in order to reactivate the application in an available region. This requires the transfer of application users, data, and services to the new region. It also involves the use of backup media or ongoing replication. + +Consider the previous analogy that compared high availability to the ability to recover from a flat tire through the use of a spare. In contrast, disaster recovery involves the steps taken after a car crash, where the car is no longer operational. In that case, the best solution is to find an efficient way to change cars, by calling a travel service or a friend. In this scenario, there is likely going to be a longer delay in getting back on the road. There is also more complexity in repairing and returning to the original vehicle. In the same way, disaster recovery to another region is a complex task that typically involves some downtime and potential loss of data. To better understand and evaluate disaster recovery strategies, it is important to define two terms: recovery time objective (RTO) and recovery point objective (RPO). + +### Recovery time objective +The RTO is the maximum amount of time allocated for restoring application functionality. This is based on business requirements, and it is related to the importance of the application. Critical business applications require a low RTO. + +### Recovery point objective +The RPO is the acceptable time window of lost data due to the recovery process. For example, if the RPO is one hour, you must completely back up or replicate the data at least every hour. Once you bring up the application in an alternate region, the backup data may be missing up to an hour of data. Like RTO, critical applications target a much smaller RPO. + +## Checklist +Let’s summarize the key points that have been covered in this article (and its related articles on [high availability](high-availability-azure-applications.md) and [disaster recovery](disaster-recovery-azure-applications.md) for Azure applications). This summary will act as a checklist of items you should consider for your own availability and disaster recovery planning. These are best practices that have been useful for customers seeking to get serious about implementing a successful solution. + +1. Conduct a risk assessment for each application, because each can have different requirements. Some applications are more critical than others and would justify the extra cost to architect them for disaster recovery. +2. Use this information to define the RTO and RPO for each application. +3. Design for failure, starting with the application architecture. +4. Implement best practices for high availability, while balancing cost, complexity, and risk. +5. Implement disaster recovery plans and processes. + * Consider failures that span the module level all the way to a complete cloud outage. + * Establish backup strategies for all reference and transactional data. + * Choose a multi-site disaster recovery architecture. +6. Define a specific owner for disaster recovery processes, automation, and testing. The owner should manage and own the entire process. +7. Document the processes so they are easily repeatable. Although there is one owner, multiple people should be able to understand and follow the processes in an emergency. +8. Train the staff to implement the process. +9. Use regular disaster simulations for both training and validation of the process. + +## Summary +When hardware or applications fail within Azure, the techniques and strategies for managing them are different than when failure occurs on on-premises systems. The main reason for this is that cloud solutions typically have more dependencies on infrastructure that is distributed across an Azure region, and managed as separate services. You must deal with partial failures using high availability techniques. To manage more severe failures, possibly due to a disaster event, use disaster recovery strategies. + +Azure detects and handles many failures, but there are many types of failures that require application-specific strategies. You must actively prepare for and manage the failures of applications, services, and data. + +When creating your application’s availability and disaster recovery plan, consider the business consequences of the application’s failure. Defining the processes, policies, and procedures to restore critical systems after a catastrophic event takes time, planning, and commitment. And once you establish the plans, you cannot stop there. You must regularly analyze, test, and continually improve the plans based on your application portfolio, business needs, and the technologies available to you. Azure provides new capabilities and raises new challenges to creating robust applications that withstand failures. + diff --git a/docs/resiliency/failure-mode-analysis.md b/docs/resiliency/failure-mode-analysis.md new file mode 100644 index 00000000000..cdcdc24aa9b --- /dev/null +++ b/docs/resiliency/failure-mode-analysis.md @@ -0,0 +1,504 @@ +--- +title: Failure mode analysis +description: Guidelines for performing failure mode analysis for cloud solutions based on Azure. +services: '' +documentationcenter: na +author: MikeWasson +manager: christb +editor: '' +tags: '' + +pnp.series.title: Design for Resiliency + +ms.assetid: 561e9bbc-26bc-4de5-8329-e3b9e06f2e7e +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 10/24/2016 +ms.author: mwasson + +--- +# Failure mode analysis +[!INCLUDE [header](../_includes/header.md)] + +Failure mode analysis (FMA) is a process for building resiliency into a system, by identifying possible failure points in the system. The FMA should be part of the architecture and design phases, so that you can build failure recovery into the system from the beginning. + +Here is the general process to conduct an FMA: + +1. Identify all of the components in the system. Include external dependencies, such as as identity providers, third-party services, and so on. +2. For each component, identify potential failures that could occur. A single component may have more than one failure mode. For example, you should consider read failures and write failures separately, because the impact and possible mitigations will be different. +3. Rate each failure mode according to its overall risk. Consider these factors: + + * What is the likelihood of the failure. Is it relatively common? Extrememly rare? You don't need exact numbers; the purpose is to help rank the priority. + * What is the impact on the application, in terms of availability, data loss, monetary cost, and business disruption? +4. For each failure mode, determine how the application will respond and recover. Consider tradeoffs in cost and application complexity. + +As a starting point for your FMA process, this article contains a catalog of potential failure modes and their mitigations. The catalog is organized by technology or Azure service, plus a general category for application-level design. The catalog is not exhaustive, but covers many of the core Azure services. + +## App Service +### App Service app shuts down. +**Detection**. Possible causes: + +* Expected shutdown + + * An operator shuts down the application; for example, using the Azure portal. + * The app was unloaded because it was idle. (Only if the `Always On` setting is disabled.) +* Unexpected shutdown + + * The app crashes. + * An App Service VM instance becomes unavailable. + +Application_End logging will catch the app domain shutdown (soft process crash) and is the only way to catch the application domain shutdowns. + +**Recovery** + +* If the shutdown was expected, use the application's shutdown event to shut down gracefully. For example, in ASP.NET, use the `Application_End` method. +* If the application was unloaded while idle, it is automatically restarted on the next request. However, you will incur the "cold start" cost. +* To prevent the application from being unloaded while idle, enable the `Always On` setting in the web app. See [Configure web apps in Azure App Service][app-service-configure]. +* To prevent an operator from shutting down the app, set a resource lock with `ReadOnly` level. See [Lock resources with Azure Resource Manager][rm-locks]. +* If the app crashes or an App Service VM becomes unavailable, App Service automatically restarts the app. + +**Diagnostics**. Application logs and web server logs. See [Enable diagnostics logging for web apps in Azure App Service][app-service-logging]. + +### A particular user repeatedly makes bad requests or overloads the system. +**Detection**. Authenticate users and include user ID in application logs. + +**Recovery** + +* Use [Azure API Management][api-management] to throttle requests from the user. See [Advanced request throttling with Azure API Management][api-management-throttling] +* Block the user. + +**Diagnostics**. Log all authentication requests. + +### A bad update was deployed. +**Detection**. Monitor the application health through the Azure Portal (see [Monitor Azure web app performance][app-insights-web-apps]) or implement the [health endpoint monitoring pattern][health-endpoint-monitoring-pattern]. + +**Recovery**. Use multiple [deployment slots][app-service-slots] and roll back to the last-known-good deployment. For more information, see [Basic web application][ra-web-apps-basic]. + +## Azure Active Directory +### OpenID Connect (OIDC) authentication fails. +**Detection**. Possible failure modes include: + +1. Azure AD is not available, or cannot be reached due to a network problem. Redirection to the authentication endpoint fails, and the OIDC middleware throws an exception. +2. Azure AD tenant does not exist. Redirection to the authentication endpoint returns an HTTP error code, and the OIDC middleware throws an exception. +3. User cannot authenticate. No detection strategy is necessary; Azure AD handles login failures. + +**Recovery** + +1. Catch unhandled exceptions from the middleware. +2. Handle `AuthenticationFailed` events. +3. Redirect the user to an error page. +4. User retries. + +## Azure Search +### Writing data to Azure Search fails. +**Detection**. Catch `Microsoft.Rest.Azure.CloudException` errors. + +**Recovery** + +The [Search .NET SDK][search-sdk] automatically retries after transient failures. Any exceptions thrown by the client SDK should be treated as non-transient errors. + +The default retry policy uses exponential back-off. To use a different retry policy, call `SetRetryPolicy` on the `SearchIndexClient` or `SearchServiceClient` class. For more information, see [Automatic Retries][auto-rest-client-retry]. + +**Diagnostics**. Use [Search Traffic Analytics][search-analytics]. + +### Reading data from Azure Search fails. +**Detection**. Catch `Microsoft.Rest.Azure.CloudException` errors. + +**Recovery** + +The [Search .NET SDK][search-sdk] automatically retries after transient failures. Any exceptions thrown by the client SDK should be treated as non-transient errors. + +The default retry policy uses exponential back-off. To use a different retry policy, call `SetRetryPolicy` on the `SearchIndexClient` or `SearchServiceClient` class. For more information, see [Automatic Retries][auto-rest-client-retry]. + +**Diagnostics**. Use [Search Traffic Analytics][search-analytics]. + +## Cassandra +### Reading or writing to a node fails. +**Detection**. Catch the exception. For .NET clients, this will typically be `System.Web.HttpException`. Other client may have other exception types. For more information, see [Cassandra error handling done right](http://www.datastax.com/dev/blog/cassandra-error-handling-done-right). + +**Recovery** + +* Each [Cassandra client](https://wiki.apache.org/cassandra/ClientOptions) has its own retry policies and capabilities. For more information, see [Cassandra error handling done right][cassandra-error-handling]. +* Use a rack-aware deployment, with data nodes distributed across the fault domains. +* Deploy to multiple regions with local quorum consistency. If a non-transient failure occurs, fail over to another region. + +**Diagnostics**. Application logs + +## Cloud Service +### Web or worker roles are unexpectedly being shut down. +**Detection**. The [RoleEnvironment.Stopping][RoleEnvironment.Stopping] event is fired. + +**Recovery**. Override the [RoleEntryPoint.OnStop][RoleEntryPoint.OnStop] method to gracefully clean up. For more information, see [The Right Way to Handle Azure OnStop Events][onstop-events] (blog). + +## DocumentDB +### Reading data from DocumentDB fails. +**Detection**. Catch `System.Net.Http.HttpRequestException` or `Microsoft.Azure.Documents.DocumentClientException`. + +**Recovery** + +* The SDK automatically retries failed attempts. To set the number of retries and the maximum wait time, configure `ConnectionPolicy.RetryOptions`. Exceptions that the client raises are either beyond the retry policy or are not transient errors. +* If DocumentDB throttles the client, it returns an HTTP 429 error. Check the status code in the `DocumentClientException`. If you are getting error 429 consistently, consider increasing the throughput value of the DocumentDB collection. +* Replicate the DocumentDB database across two or more regions. All replicas are readable. Using the client SDKs, specify the `PreferredLocations` parameter. This is an ordered list of Azure regions. All reads will be sent to the first available region in the list. If the request fails, the client will try the other regions in the list, in order. For more information, see [Developing with multi-region DocumentDB accounts][docdb-multi-region]. + +**Diagnostics**. Log all errors on the client side. + +### Writing data to DocumentDB fails. +**Detection**. Catch `System.Net.Http.HttpRequestException` or `Microsoft.Azure.Documents.DocumentClientException`. + +**Recovery** + +* The SDK automatically retries failed attempts. To set the number of retries and the maximum wait time, configure `ConnectionPolicy.RetryOptions`. Exceptions that the client raises are either beyond the retry policy or are not transient errors. +* If DocumentDB throttles the client, it returns an HTTP 429 error. Check the status code in the `DocumentClientException`. If you are getting error 429 consistently, consider increasing the throughput value of the DocumentDB collection. +* Replicate the DocumentDB database across two or more regions. If the primary region fails, another region will be promoted to write. You can also trigger a failover manually. The SDK does automatic discovery and routing, so application code continues to work after a failover. During the failover period (typically minutes), write operations will have higher latency, as the SDK finds the new write region. + For more information, see [Developing with multi-region DocumentDB accounts][docdb-multi-region]. +* As a fallback, persist the document to a backup queue, and process the queue later. + +**Diagnostics**. Log all errors on the client side. + +## Elasticsearch +### Reading data from Elasticsearch fails. +**Detection**. Catch the appropriate exception for the particular [Elasticsearch client][elasticsearch-client] being used. + +**Recovery** + +* Use a retry mechanism. Each client has its own retry policies. +* Deploy multiple Elasticsearch nodes and use replication for high availability. + +For more information, see [Running Elasticsearch on Azure][elasticsearch-azure]. + +**Diagnostics**. You can use monitoring tools for Elasticsearch, or log all errors on the client side with the payload. See the 'Monitoring' section in [Running Elasticsearch on Azure][elasticsearch-azure]. + +### Writing data to Elasticsearch fails. +**Detection**. Catch the appropriate exception for the particular [Elasticsearch client][elasticsearch-client] being used. + +**Recovery** + +* Use a retry mechanism. Each client has its own retry policies. +* If the application can tolerate a reduced consistency level, consider writing with `write_consistency` setting of `quorum`. + +For more information, see [Running Elasticsearch on Azure][elasticsearch-azure]. + +**Diagnostics**. You can use monitoring tools for Elasticsearch, or log all errors on the client side with the payload. See the 'Monitoring' section in [Running Elasticsearch on Azure][elasticsearch-azure]. + +## Queue storage +### Writing a message to Azure Queue storage fails consistently. +**Detection**. After *N* retry attempts, the write operation still fails. + +**Recovery** + +* Store the data in a local cache, and forward the writes to storage later, when the service becomes available. +* Create a secondary queue, and write to that queue if the primary queue is unavailable. + +**Diagnostics**. Use [storage metrics][storage-metrics]. + +### The application cannot process a particular message from the queue. +**Detection**. Application specific. For example, the message contains invalid data, or the business logic fails for some reason. + +**Recovery** + +Move the message to a separate queue. Run a separate process to examine the messages in that queue. + +Consider using Azure Service Bus Messaging queues, which provides a [dead-letter queue][sb-dead-letter-queue] functionality for this purpose. + +> [!NOTE] +> If you are using Storage queues with WebJobs, the WebJobs SDK provides built-in poison message handling. See [How to use Azure queue storage with the WebJobs SDK][sb-poison-message]. + +**Diagnostics**. Use application logging. + +## Redis Cache +### Reading from the cache fails. +**Detection**. Catch `StackExchange.Redis.RedisConnectionException`. + +**Recovery** + +1. Retry on transient failures. Azure Redis cache supports built-in retry through See [Redis Cache retry guidelines][redis-retry]. +2. Treat non-transient failures as a cache miss, and fall back to the original data source. + +**Diagnostics**. Use [Redis Cache diagnostics][redis-monitor]. + +### Writing to the cache fails. +**Detection**. Catch `StackExchange.Redis.RedisConnectionException`. + +**Recovery** + +1. Retry on transient failures. Azure Redis cache supports built-in retry through See [Redis Cache retry guidelines][redis-retry]. +2. If the error is non-transient, ignore it and let other transactions write to the cache later. + +**Diagnostics**. Use [Redis Cache diagnostics][redis-monitor]. + +## SQL Database +### Cannot connect to the database in the primary region. +**Detection**. Connection fails. + +**Recovery** + +Prerequisite: The database must be configured for active geo-replication. See [SQL Database Active Geo-Replication][sql-db-replication]. + +* For queries, read from a secondary replica. +* For inserts and updates, manually fail over to a secondary replica. See [Initiate a planned or unplanned failover for Azure SQL Database][sql-db-failover]. + +The replica uses a different connection string, so you will need to update the connection string in your application. + +### Client runs out of connections in the connection pool. +**Detection**. Catch `System.InvalidOperationException` errors. + +**Recovery** + +* Retry the operation. +* As a mitigation plan, isolate the connection pools for each use case, so that one use case can't dominate all the connections. +* Increase the maximum connection pools. + +**Diagnostics**. Application logs. + +### Database connection limit is reached. +**Detection**. Azure SQL Database limits the number of concurrent workers, logins, and sessions. The limits depend on the service tier. For more information, see [Azure SQL Database resource limits][sql-db-limits]. + +To detect these errors, catch `System.Data.SqlClient.SqlException` and check the value of `SqlException.Number` for the SQL error code. For a list of relevant error codes, see [SQL error codes for SQL Database client applications: Database connection error and other issues][sql-db-errors]. + +**Recovery**. These errors are considered transient, so retrying may resolve the issue. If you consistently hit these errors, consider scaling the database. + +**Diagnostics**. - The [sys.event_log][sys.event_log] query returns successful database connections, connection failures, and deadlocks. + +* Create an [alert rule][azure-alerts] for failed connections. +* Enable [SQL Database auditing][sql-db-audit] and check for failed logins. + +## Service Bus Messaging +### Reading a message from a Service Bus queue fails. +**Detection**. Catch exceptions from the client SDK. The base class for Service Bus exceptions is [MessagingException][sb-messagingexception-class]. If the error is transient, the `IsTransient` property is true. + +For more information, see [Service Bus messaging exceptions][sb-messaging-exceptions]. + +**Recovery** + +1. Retry on transient failures. See [Service Bus retry guidelines][sb-retry]. +2. Messages that cannot be delivered to any receiver are placed in a *dead-letter queue*. Use this queue to see which messages could not be received. There is no automatic cleanup of the dead-letter queue. Messages remain there until you explicitly retrieve them. See [Overview of Service Bus dead-letter queues][sb-dead-letter-queue]. + +### Writing a message to a Service Bus queue fails. +**Detection**. Catch exceptions from the client SDK. The base class for Service Bus exceptions is [MessagingException][sb-messagingexception-class]. If the error is transient, the `IsTransient` property is true. + +For more information, see [Service Bus messaging exceptions][sb-messaging-exceptions]. + +**Recovery** + +1. The Service Bus client automatically retries after transient errors. By default, it uses exponential back-off. After the maximum retry count or maximum timeout period, the client throws an exception. For more information, see [Service Bus retry guidelines][sb-retry]. +2. If the queue quota is exceeded, the client throws [QuotaExceededException][QuotaExceededException]. The exception message gives more details. Drain some messages from the queue before retrying, and consider using the Circuit Breaker pattern to avoid continued retries while the quota is exceeded. Also, make sure the [BrokeredMessage.TimeToLive] property is not set too high. +3. Within a region, resiliency can be improved by using [partitioned queues or topics][sb-partition]. A non-partitioned queue or topic is assigned to one messaging store. If this messaging store is unavailable, all operations on that queue or topic will fail. A partitioned queue or topic is partitioned across multiple messaging stores. +4. For additional resiliency, create two Service Bus namespaces in different regions, and replicate the messages. You can use either active replication or passive replication. + + * Active replication: The client sends every message to both queues. The receiver listens on both queues. Tag messages with a unique identifier, so the client can discard duplicate messages. + * Passive replication: The client sends the message to one queue. If there is an error, the client falls back to the other queue. The receiver listens on both queues. This approach reduces the number of duplicate messages that are sent. However, the receiver must still handle duplicate messages. + + For more information, see [GeoReplication sample][sb-georeplication-sample] and [Best practices for insulating applications against Service Bus outages and disasters](/azure/service-bus-messaging/service-bus-outages-disasters/). + +### Duplicate message. +**Detection**. Examine the `MessageId` and `DeliveryCount` properties of the message. + +**Recovery** + +* If possible, design your message processing operations to be idempotent. Otherwise, store message IDs of messages that are already processed, and check the ID before processing a message. +* Enable duplicate detection, by creating the queue with `RequiresDuplicateDetection` set to true. With this setting, Service Bus automatically deletes any message that is sent with the same `MessageId` as a previous message. Note the following: + + * This setting prevents duplicate messages from being put into the queue. It doesn't prevent a receiver from processing the same message more than once. + * Duplicate detection has a time window. If a duplicate is sent beyond this window, it won't be detected. + +**Diagnostics**. Log duplicated messages. + +### The application cannot process a particular message from the queue. +**Detection**. Application specific. For example, the message contains invalid data, or the business logic fails for some reason. + +**Recovery** + +There are two failure modes to consider. + +* The receiver detects the failure. In this case, move the message to the dead-letter queue. Later, run a separate process to examine the messages in the dead-letter queue. +* The receiver fails in the middle of processing the message — for example, due to an unhandled exception. To handle this case, use `PeekLock` mode. In this mode, if the lock expires, the message becomes available to other receivers. If the message exceeds the maximum delivery count or the time-to-live, the message is automatically moved to the dead-letter queue. + +For more information, see [Overview of Service Bus dead-letter queues][sb-dead-letter-queue]. + +**Diagnostics**. Whenever the application moves a message to the dead-letter queue, write an event to the application logs. + +## Service Fabric +### A request to a service fails. +**Detection**. The service returns an error. + +**Recovery** + +* Locate a proxy again (`ServiceProxy` or `ActorProxy`) and call the service/actor method again. +* **Stateful service**. Wrap operations on reliable collections in a transaction. If there is an error, the transaction will be rolled back. The request, if pulled from a queue, will be processed again. +* **Stateless service**. If the service persists data to an external store, all operations need to be idempotent. + +**Diagnostics**. Application log + +### Service Fabric node is shut down. +**Detection**. A cancellation token is passed to the service's `RunAsync` method. Service Fabric cancels the task before shutting down the node. + +**Recovery**. Use the cancellation token to detect shutdown. When Service Fabric requests cancellation, finish any work and exit `RunAsync` as quickly as possible. + +**Diagnostics**. Application logs + +## Storage +### Writing data to Azure Storage fails +**Detection**. The client receives errors when writing. + +**Recovery** + +1. Retry the operation, to recover from transient failures. The [retry policy][Storage.RetryPolicies] in the client SDK handles this automatically. +2. Implement the Circuit Breaker pattern to avoid overwhelming storage. +3. If N retry attempts fail, perform a graceful fallback. For example: + + * Store the data in a local cache, and forward the writes to storage later, when the service becomes available. + * If the write action was in a transactional scope, compensate the transaction. + +**Diagnostics**. Use [storage metrics][storage-metrics]. + +### Reading data from Azure Storage fails. +**Detection**. The client receives errors when reading. + +**Recovery** + +1. Retry the operation, to recover from transient failures. The [retry policy][Storage.RetryPolicies] in the client SDK handles this automatically. +2. For RA-GRS storage, if reading from the primary endpoint fails, try reading from the secondary endpoint. The client SDK can handle this automatically. See [Azure Storage replication][storage-replication]. +3. If *N* retry attempts fail, take a fallback action to degrade gracefully. For example, if a product image can't be retrieved from storage, show a generic placeholder image. + +**Diagnostics**. Use [storage metrics][storage-metrics]. + +## Virtual Machine +### Connection to a backend VM fails. +**Detection**. Network connection errors. + +**Recovery** + +* Deploy at least two backend VMs in an availability set, behind a load balancer. +* If the connection error is transient, sometimes TCP will successfully retry sending the message. +* Implement a retry policy in the application. +* For persistent or non-transient errors, implement the [Circuit Breaker][circuit-breaker] pattern. +* If the calling VM exceeds its network egress limit, the outbound queue will fill up. If the outbound queue is consistently full, consider scaling out. + +**Diagnostics**. Log events at service boundaries. + +### VM instance becomes unavailable or unhealthy. +**Detection**. Configure a Load Balancer [health probe][lb-probe] that signals whether the VM instance is healthy. The probe should check whether critical functions are responding correctly. + +**Recovery**. For each application tier, put multiple VM instances into the same availability set, and place a load balancer in front of the VMs. If the health probe fails, the Load Balancer stops sending new connections to the unhealthy instance. + +**Diagnostics**. - Use Load Balancer [log analytics][lb-monitor]. + +* Configure your monitoring system to monitor all of the health monitoring endpoints. + +### Operator accidentally shuts down a VM. +**Detection**. N/A + +**Recovery**. Set a resource lock with `ReadOnly` level. See [Lock resources with Azure Resource Manager][rm-locks]. + +**Diagnostics**. Use [Azure Activity Logs][azure-activity-logs]. + +## WebJobs +### Continuous job stops running when the SCM host is idle. +**Detection**. Pass a cancellation token to the WebJob function. For more information, see [Graceful shutdown][web-jobs-shutdown]. + +**Recovery**. Enable the `Always On` setting in the web app. For more information, see [Run Background tasks with WebJobs][web-jobs]. + +## Application design +### Application can't handle a spike in incoming requests. +**Detection**. Depends on the application. Typical symptoms: + +* The website starts returning HTTP 5xx error codes. +* Dependent services, such as database or storage, start to throttle requests. Look for HTTP errors such as HTTP 429 (Too Many Requests), depending on the service. +* HTTP queue length grows. + +**Recovery** + +* Scale out to handle increased load. +* Mitigate failures to avoid having cascading failures disrupt the entire application. Mitigation strategies include: + + * Implement the [Throttling Pattern][throttling-pattern] to avoid overwhelming backend systems. + * Use [queue-based load leveling][queue-based-load-leveling] to buffer requests and process them at an appropriate pace. + * Prioritize certain clients. For example, if the application has free and paid tiers, throttle customers on the free tier, but not paid customers. See [Priority queue pattern][priority-queue-pattern]. + +**Diagnostics**. Use [App Service diagnostic logging][app-service-logging]. Use a service such as [Azure Log Analytics][azure-log-analytics], [Application Insights][app-insights], or [New Relic][new-relic] to help understand the diagnostic logs. + +### One of the operations in a workflow or distributed transaction fails. +**Detection**. After *N* retry attempts, it still fails. + +**Recovery** + +* As a mitigation plan, implement the [Scheduler Agent Supervisor][scheduler-agent-supervisor] pattern to manage the entire workflow. +* Don't retry on timeouts. There is a low success rate for this error. +* Queue work, in order to retry later. + +**Diagnostics**. Log all operations (successful and failed), including compensating actions. Use correlation IDs, so that you can track all operations within the same transaction. + +### A call to a remote service fails. +**Detection**. HTTP error code. + +**Recovery** + +1. Retry on transient failures. +2. If the call fails after *N* attempts, take a fallback action. (Application specific.) +3. Implement the [Circuit Breaker pattern][circuit-breaker] to avoid cascading failures. + +**Diagnostics**. Log all remote call failures. + +## Next steps +For more information about the FMA process, see [Resilience by design for cloud services][resilience-by-design-pdf] (PDF download). + + + +[api-management]: https://azure.microsoft.com/documentation/services/api-management/ +[api-management-throttling]: /azure/api-management/api-management-sample-flexible-throttling/ +[app-insights]: /azure/application-insights/app-insights-overview/ +[app-insights-web-apps]: /azure/application-insights/app-insights-azure-web-apps/ +[app-service-configure]: /azure/app-service-web/web-sites-configure/ +[app-service-logging]: /azure/app-service-web/web-sites-enable-diagnostic-log/ +[app-service-slots]: /azure/app-service-web/web-sites-staged-publishing/ +[auto-rest-client-retry]: https://github.com/Azure/autorest/tree/master/docs +[azure-activity-logs]: /azure/monitoring-and-diagnostics/monitoring-overview-activity-logs/ +[azure-alerts]: /azure/monitoring-and-diagnostics/insights-alerts-portal/ +[azure-log-analytics]: /azure/log-analytics/log-analytics-overview/ +[BrokeredMessage.TimeToLive]: https://msdn.microsoft.com/library/microsoft.servicebus.messaging.brokeredmessage.timetolive.aspx +[cassandra-error-handling]: http://www.datastax.com/dev/blog/cassandra-error-handling-done-right +[circuit-breaker]: https://msdn.microsoft.com/library/dn589784.aspx +[docdb-multi-region]: /azure/documentdb/documentdb-developing-with-multiple-regions/ +[elasticsearch-azure]: ../elasticsearch/index.md +[elasticsearch-client]: https://www.elastic.co/guide/en/elasticsearch/client/index.html +[health-endpoint-monitoring-pattern]: https://msdn.microsoft.com/library/dn589789.aspx +[onstop-events]: https://azure.microsoft.com/blog/the-right-way-to-handle-azure-onstop-events/ +[lb-monitor]: /azure/load-balancer/load-balancer-monitor-log/ +[lb-probe]: /azure/load-balancer/load-balancer-custom-probe-overview/#learn-about-the-types-of-probes +[new-relic]: https://newrelic.com/ +[priority-queue-pattern]: https://msdn.microsoft.com/library/dn589794.aspx +[queue-based-load-leveling]: https://msdn.microsoft.com/library/dn589783.aspx +[QuotaExceededException]: https://msdn.microsoft.com/library/azure/microsoft.servicebus.messaging.quotaexceededexception.aspx +[ra-web-apps-basic]: ../blueprints/managed-web-app/basic-web-app.md +[redis-monitor]: /azure/redis-cache/cache-how-to-monitor/ +[redis-retry]: ../best-practices/retry-service-specific.md#azure-redis-cache-retry-guidelines +[resilience-by-design-pdf]: http://download.microsoft.com/download/D/8/C/D8C599A4-4E8A-49BF-80EE-FE35F49B914D/Resilience_by_Design_for_Cloud_Services_White_Paper.pdf +[RoleEntryPoint.OnStop]: https://msdn.microsoft.com/library/azure/microsoft.windowsazure.serviceruntime.roleentrypoint.onstop.aspx +[RoleEnvironment.Stopping]: https://msdn.microsoft.com/library/azure/microsoft.windowsazure.serviceruntime.roleenvironment.stopping.aspx +[rm-locks]: /azure/azure-resource-manager/resource-group-lock-resources/ +[sb-dead-letter-queue]: /azure/service-bus-messaging/service-bus-dead-letter-queues/ +[sb-georeplication-sample]: https://github.com/Azure-Samples/azure-servicebus-messaging-samples/tree/master/GeoReplication +[sb-messagingexception-class]: https://msdn.microsoft.com/library/azure/microsoft.servicebus.messaging.messagingexception.aspx +[sb-messaging-exceptions]: /azure/service-bus-messaging/service-bus-messaging-exceptions/ +[sb-outages]: /azure/service-bus-messaging/service-bus-outages-disasters/#protecting-queues-and-topics-against-datacenter-outages-or-disasters +[sb-partition]: /azure/service-bus-messaging/service-bus-partitioning/ +[sb-poison-message]: /azure/app-service-web/websites-dotnet-webjobs-sdk-storage-queues-how-to/#poison +[sb-retry]: ../best-practices/retry-service-specific.md#service-bus-retry-guidelines +[search-sdk]: https://msdn.microsoft.com/library/dn951165.aspx +[scheduler-agent-supervisor]: https://msdn.microsoft.com/library/dn589780.aspx +[search-analytics]: /azure/search/search-traffic-analytics/ +[sql-db-audit]: /azure/sql-database/sql-database-auditing-get-started/ +[sql-db-errors]: /azure/sql-database/sql-database-develop-error-messages/#resource-governance-errors +[sql-db-failover]: /azure/sql-database/sql-database-geo-replication-failover-portal/ +[sql-db-limits]: /azure/sql-database/sql-database-resource-limits/ +[sql-db-replication]: /azure/sql-database/sql-database-geo-replication-overview/ +[storage-metrics]: https://msdn.microsoft.com/library/dn782843.aspx +[storage-replication]: /azure/storage/storage-redundancy/ +[Storage.RetryPolicies]: https://msdn.microsoft.com/library/microsoft.windowsazure.storage.retrypolicies.aspx +[sys.event_log]: https://msdn.microsoft.com/library/dn270018.aspx +[throttling-pattern]: https://msdn.microsoft.com/library/dn589798.aspx +[web-jobs]: /azure/app-service-web/web-sites-create-web-jobs/ +[web-jobs-shutdown]: /azure/app-service-web/websites-dotnet-webjobs-sdk-storage-queues-how-to/#graceful diff --git a/docs/resiliency/high-availability-azure-applications.md b/docs/resiliency/high-availability-azure-applications.md new file mode 100644 index 00000000000..d547620e847 --- /dev/null +++ b/docs/resiliency/high-availability-azure-applications.md @@ -0,0 +1,138 @@ +--- +title: High availability for Azure applications +description: Technical overview and in-depth information about designing and building applications for high availability on Microsoft Azure. +services: '' +documentationcenter: na +author: adamglick +manager: saladki +editor: '' + +ms.assetid: 8be4f323-ca00-4025-9356-c9bbc998eef3 +ms.service: resiliency +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: aglick + +--- +[!INCLUDE [header](../_includes/header.md)] +# High availability for applications built on Microsoft Azure +A highly available application absorbs fluctuations in availability, load, and temporary failures in the dependent services and hardware. The application continues to operate at an acceptable user and systemic response level, as defined by business requirements or application service-level agreements (SLAs). + +## Azure high-availability features +Azure has many built-in platform features that support highly available applications. This section describes some of those key features. + +### Fabric controller +The Azure fabric controller provisions and monitors the condition of the Azure compute instances. The fabric controller checks the status of the hardware and software of the host and guest machine instances. When it detects a failure, it enforces SLAs by automatically relocating the VM instances. The concept of fault and upgrade domains further supports the compute SLA. + +When multiple Cloud Service role instances are deployed, Azure deploys these instances to different fault domains. A fault domain boundary is basically a different hardware rack in the same region. Fault domains reduce the probability that a localized hardware failure will interrupt the service of an application. You cannot manage the number of fault domains that are allocated to your worker or web roles. The fabric controller uses dedicated resources that are separate from Azure-hosted applications. It has 100 percent uptime because it serves as the nucleus of the Azure system. It monitors and manages role instances across fault domains. + +The following diagram shows Azure shared resources that the fabric controller deploys and manages across different fault domains. + +![Simplified view of fault domain isolation](./images/high-availability-azure-applications/fault-domain-isolation.png) + +Upgrade domains are similar to fault domains in function, but they support upgrades rather than failures. An upgrade domain is a logical unit of instance separation that determines which instances in a particular service will be upgraded at a point in time. By default, for your hosted service deployment, five upgrade domains are defined. However, you can change that value in the service definition file. For example, assume that you have eight instances of your web role. There will be two instances in three upgrade domains and two instances in one upgrade domain. Azure defines the update sequence, but it's based on the number of upgrade domains. For more information on upgrade domains, see [Update a cloud service](/azure/cloud-services/cloud-services-update-azure-service/). + +### Features in other services +In addition to the platform features that support high compute availability, Azure embeds high-availability features into its other services. For example, Azure Storage maintains three replicas of all blob, table, and queue data. It also allows the option of geo-replication to store backups of blobs and tables in a secondary region. The Azure Content Delivery Network allows blobs to be cached around the world for both redundancy and scalability. Azure SQL Database maintains multiple replicas as well. + +In addition to the [Resiliency technical guidance](https://aka.ms/bctechguide) series of articles, see the [Best Practices for the Design of Large-Scale Services on Azure Cloud Services](https://azure.microsoft.com/blog/best-practices-for-designing-large-scale-services-on-windows-azure/) paper. These papers provide a deeper discussion of the Azure platform availability features. + +Although Azure provides multiple features that support high availability, it's important to understand their limitations: + +* For compute, Azure guarantees that your roles are available and running, but it does not know if your application is running or overloaded. +* For Azure SQL Database, data is replicated synchronously within the region. You can choose active geo-replication, which allows up to four additional database copies in the same region (or different regions). These database replicas are not point-in-time backups. SQL databases do provide point-in-time backup capabilities. To learn more about SQL Database point-in-time capabilities, read [Azure SQL Database Point in Time Restore](https://azure.microsoft.com/blog/azure-sql-database-point-in-time-restore/). +* For Azure Storage, table and blob data is replicated by default to an alternate region. However, you cannot access the replicas until Microsoft chooses to fail over to the alternate site. A region failover occurs only in the case of a prolonged region-wide service disruption, and there is no SLA for geo-failover time. It's also important to note that any data corruption quickly spreads to the replicas. + +For these reasons, you must supplement the platform availability features with application-specific availability features. Application-specific availability features include the blob snapshot feature to create point-in-time backups of blob data. + +### Availability sets for Azure Virtual Machines +The majority of this article focuses on cloud services, which use a platform as a service (PaaS) model. However, there are also specific availability features for Azure Virtual Machines, which uses an infrastructure as a service (IaaS) model. To achieve high availability with Virtual Machines, you must use availability sets. An availability set serves a similar function to fault and upgrade domains. Within an availability set, Azure positions the virtual machines in a way that prevents localized hardware faults and maintenance activities from bringing down all of the machines in that group. Availability sets are required to achieve the Azure SLA for the availability of Virtual Machines. + +The following diagram provides a representation of two availability sets that group web and SQL Server virtual machines, respectively. + +![Availability sets for Azure Virtual Machines](./images/high-availability-azure-applications/availability-set-for-azure-virtual-machines.png) + +> [!NOTE] +> In the preceding diagram, SQL Server is installed and running on virtual machines. This is different from the previous discussion of Azure SQL Database, which provides a database as a managed service. +> +> + +## Application strategies for high availability +Most application strategies for high availability involve either redundancy or the removal of hard dependencies between application components. Application design should support fault tolerance during sporadic downtime of Azure or third-party services. The following sections describe application patterns for increasing the availability of your cloud services. + +### Asynchronous communication and durable queues +Consider asynchronous communication between loosely coupled services to increase availability in Azure applications. In this pattern, write messages to either storage queues or Azure Service Bus queues for later processing. When you write the message to the queue, control immediately returns to the sender of the message. Another tier of the application handles the message processing, typically implemented as a worker role. If the worker role goes down, the messages accumulate in the queue until the processing service is restored. As long as the queue is available, there is no direct dependency between the front-end sender and the message processor. This eliminates the requirement for synchronous service calls that can be a throughput bottleneck in distributed applications. + +A variation of this uses Azure Storage (blobs, tables, queues) or Service Bus queues as a failover location for failed database calls. For example, a synchronous call within an application to another service (such as Azure SQL Database) fails repeatedly. You might be able to serialize that data into durable storage. At some later point when the service or database is back online, the application can re-submit the request from storage. The difference in this model is that the intermediate location is not a constant part of the application workflow. It is used only in failure scenarios. + +In both scenarios, asynchronous communication and intermediate storage prevent a downed back-end service from bringing down the entire application. Queues serve as a logical intermediary. For more guidance on choosing the correct queuing service, see [Azure queues and Azure Service Bus queues--compared and contrasted](/azure/service-bus-messaging/service-bus-azure-and-service-bus-queues-compared-contrasted/). + +### Fault detection and retry logic +A key point in highly available application design is to use retry logic within code to gracefully handle a service that is temporarily down. The [Transient Fault Handling Application Block](https://msdn.microsoft.com/library/hh680934.aspx), developed by the Microsoft Patterns and Practices team, assists application developers in this process. The word “transient” means a temporary condition that lasts only for a relatively short time. In the context of this article, handling transient failures is part of developing a highly available application. Examples of transient conditions include intermittent network errors and lost database connections. + +The Transient Fault Handling Application Block is a simplified way for you to handle failures within your code in a graceful manner. You can use it to improve the availability of your applications by adding robust transient fault-handling logic. In most cases, retry logic handles the brief interruption and reconnects the sender and receiver after one or more failed attempts. A successful retry attempt typically goes unnoticed to application users. + +Developers have three options for managing their retry logic: incremental, fixed interval, and exponential. Incremental waits longer before each retry in an increasing linear fashion (for example, 1, 2, 3, and 4 seconds). Fixed interval waits the same amount of time between each retry (for example, 2 seconds). For a more random option, the exponential back-off waits longer between retries. However, it uses exponential behavior (for example, 2, 4, 8, and 16 seconds). + +The high-level strategy within your code is: + +1. Define your retry strategy and policy. +2. Try the operation that might result in a transient fault. +3. If transient fault occurs, invoke the retry policy. +4. If all retries fail, catch a final exception. + +Test your retry logic in simulated failures to ensure that retries on successive operations do not result in an unanticipated lengthy delay. Do this before deciding to fail the overall task. + +### Reference data pattern for high availability +Reference data is the read-only data of an application. This data provides the business context within which the application generates transactional data during the course of a business operation. Transactional data is a point-in-time function of the reference data. Therefore, its integrity depends on the snapshot of the reference data at the time of the transaction. This is a somewhat loose definition, but it should suffice for our purpose here. + +Reference data in the context of an application is necessary for the functioning of the application. The respective applications create and maintain reference data; master data management (MDM) systems often perform this function. These systems are responsible for the life cycle of the reference data. Examples of reference data include product catalog, employee master, parts master, and equipment master. Reference data can also originate from outside the organization, for example, postal codes or tax rates. Strategies for increasing the availability of reference data are typically less difficult than those for transactional data. Reference data has the advantage of being mostly immutable. + +You can make Azure web and worker roles that consume reference data autonomous at runtime by deploying the reference data along with the application. If the size of the local storage allows such a deployment, this is an ideal state. Embedded databases (SQL, NoSQL) or XML files deployed to a local file system will help with the autonomy of Azure compute scale units. However, you should have a mechanism to update the data in each role without requiring redeployment. To do this, place any updates to the reference data to a cloud storage endpoint (for example, Azure Blob storage or SQL Database). Add code to each role that downloads the data updates into the compute nodes at role startup. Alternatively, add code that allows an administrator to perform a forced download into the role instances. + +To increase availability, the roles should also contain a set of reference data in case storage is down. This enables the roles to start with a basic set of reference data until the storage resource becomes available for the updates. + +![Application high availability through autonomous compute nodes](./images/high-availability-azure-applications/application-high-availability-through-autonomous-compute-nodes.png) + +One consideration for this pattern is the deployment and startup speed for your roles. If you are deploying or downloading large amounts of reference data on startup, this can increase the amount of time it takes to spin up new deployments or role instances. This might be an acceptable tradeoff for the autonomy of having the reference data immediately available on each role rather than depending on external storage services. + +### Transactional data pattern for high availability +Transactional data is the data that the application generates in a business context. Transactional data is a combination of the set of business processes that the application implements and the reference data that supports these processes. Examples of transactional data can include orders, advanced shipping notices, invoices, and customer relationship management (CRM) opportunities. The transactional data thus generated will be fed to external systems for record keeping or for further processing. + +Keep in mind that reference data can change within the systems that are responsible for this data. For this reason, transactional data must save the point-in-time reference data context so that it has minimal external dependencies for its semantic consistency. For example, consider the removal of a product from the catalog a few months after an order is fulfilled. The best practice is to embed as much reference data context as feasible into the transaction. This preserves the semantics associated with the transaction, even if the reference data changes after the transaction is captured. + +As mentioned previously, architectures that use loose coupling and asynchronous communication lend themselves to higher levels of availability. This holds true for transactional data as well, but the implementation is more complex. Traditional transactional notions typically rely on the database for guaranteeing the transaction. When you introduce intermediate layers, the application code must correctly handle the data at various layers to ensure sufficient consistency and durability. + +The following sequence describes a workflow that separates the capture of transactional data from its processing: + +1. Web compute node: Present reference data. +2. External storage: Save intermediate transactional data. +3. Web compute node: Complete the end-user transaction. +4. Web compute node: Send the completed transactional data, along with the reference data context, to temporary durable storage that is guaranteed to give a predictable response. +5. Web compute node: Signal the end-user completion of the transaction. +6. Background compute node: Extract the transactional data, post-process it if necessary, and send it to its final storage location in the current system. + +The following diagram shows one possible implementation of this design in an Azure-hosted cloud service. + +![High availability through loose coupling](./images/disaster-recovery-high-availability-azure-applications/application-high-availability-through-loose-coupling.png) + +The dashed arrows in the preceding diagram indicate asynchronous processing. The front-end web role is not aware of this asynchronous processing. This leads to the storage of the transaction at its final destination with reference to the current system. Due to the latency that this asynchronous model introduces, the transactional data is not immediately available for query. Therefore, each unit of the transactional data needs to be saved in a cache or a user session to meet the immediate UI needs. + +The web role is autonomous from the rest of the infrastructure. Its availability profile is a combination of the web role and the Azure queue and not the entire infrastructure. In addition to high availability, this approach allows the web role to scale horizontally, independently of the back-end storage. This high-availability model can have an impact on the economics of operations. Additional components like Azure queues and worker roles can affect monthly usage costs. + +Note that the previous diagram shows one implementation of this decoupled approach to transactional data. There are many other possible implementations. The following list provides some alternatives: + +* A worker role might be placed between the web role and the storage queue. +* A Service Bus queue can be used instead of an Azure Storage queue. +* The final destination might be Azure Storage or a different database provider. +* Azure Cache can be used at the web layer to provide the immediate caching requirements after the transaction. + +### Scalability patterns +It's important to note that the scalability of the cloud service directly affects availability. If increased load causes your service to be unresponsive, the user impression is that the application is down. Follow best practices for scalability based on your expected application load and future expectations. The highest scale involves many considerations, such as the use of single versus multiple storage accounts, sharing across multiple databases, and caching strategies. For an in-depth look at these patterns, see [Best Practices for the Design of Large-Scale Services on Azure Cloud Services](https://azure.microsoft.com/blog/best-practices-for-designing-large-scale-services-on-windows-azure/). + +## Next steps +This article is part of a series of articles focused on [disaster recovery and high availability for applications built on Microsoft Azure](disaster-recovery-high-availability-azure-applications.md). The next article in this series is [Disaster recovery for applications built on Microsoft Azure](disaster-recovery-azure-applications.md). + diff --git a/docs/resiliency/high-availability-checklist.md b/docs/resiliency/high-availability-checklist.md new file mode 100644 index 00000000000..7e38328a684 --- /dev/null +++ b/docs/resiliency/high-availability-checklist.md @@ -0,0 +1,86 @@ +--- +title: High availability checklist +description: A quick checklist of settings and actions that you can take to ensure you are improving your applications availability with Azure. +services: '' +documentationcenter: na +author: adamglick +manager: saladki +editor: '' + +ms.assetid: 7e19ab40-2b63-4037-8546-e62342e6fd7f +ms.service: resiliency +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: aglick + +--- +[!INCLUDE [header](../_includes/header.md)] +# High availability checklist +One of the great benefits of using Azure is the ability to increase the availability (and scalability) of your applications with the help of the cloud. To make sure you are making the most of those options, the checklist below is meant to help you with some of the key infrastructure basics to ensuring that your applications are resilient. + +> [!NOTE] +> Most of the suggestions below are things that can be implemented at any time in your application and thus are great for "quick fixes". The best long-term solution often involves an application design that is built for the cloud. For a checklist on these (more design oriented areas, please read our [Availability checklist](../checklist/availability.md). +> +> + +### Are you using Traffic Manager in front of your resources? +Using Traffic Manager helps you route internet traffic across Azure regions, or Azure and on-premises locations. You can do this for a number of reasons including latency and availability. + +**What happens if you don't use Traffic Manager?** If you aren't using Traffic Manager in front of your application, you are limited to a single region for your resources. This limits your scale, increases latency to users that are not close to your chosen region, and lowers your protection in the case of a region-wide service disruption. + +### Have you avoided using a single virtual machine for any role? +Good design avoids any single point of failure. This is important in all service design (on-premises or in the cloud) but is especially useful in the cloud as you can increase scalability and resiliency though scaling out (adding virtual machines) instead of scaling up (using a more powerful virtual machine). If you would like to find out more information on scalable application design, read [High availability for applications built on Microsoft Azure](high-availability-azure-applications.md). + +**What happens if you have a single virtual machine for a role?** A single machine is a single point of failure and is not available for the [Azure Virtual Machine Service Level Agreement](https://azure.microsoft.com/support/legal/sla/virtual-machines/v1_0/). In the best cases, your application will run just fine but this is not a resilient design, is not covered by the Azure Virtual Machine SLA, and any single point of failure increases the chance of downtime if something fails. + +### Are you using a load balancer in front of your application's internet-facing VMs? +Load balancers allow you to spread the incoming traffic to your application across an arbitrary number of machines. You can add/remove machines from your load balancer at any time, which works well with Virtual Machines (and also with auto-scaling with Virtual Machine Scale Sets) to allow you to easily handle increases in traffic or VM failures. If you want to know more about load balancers, please read the [Azure Load Balancer overview](/azure/load-balancer/load-balancer-overview/). + +**What happens if you are not using a load balancer in front of your internet-facing VMs?** Without a load balancer you will not be able to scale out (add more virtual machines) and your only option will be to scale up (increase the size of your web-facing virtual machine). You will also face a single point of failure with that virtual machine. You will also need to write DNS code to notice if you have lost an internet-facing machine and re-map your DNS entry to the new machine you start to take its place. + +### Are you using availability sets for your stateless application and web servers? +Putting your machines in the same application tier in an availability set makes your VMs eligible for the Azure VM SLA. Being part of an availability set also ensures that your machines are put into different update domains (i.e. different host machines that are patched at different times) and fault domains (i.e. host machines that share a common power source and network switch). Without being in an availability set, your VMs could be located on the same host machine and thus there might be a single point of failure that is not visible to you. If you would like to find out more information about increasing the availability of your VMs using availability sets, please read [Manage the availability of virtual machines](/azure/virtual-machines/virtual-machines-windows-manage-availability/?toc=%2fazure%2fvirtual-machines%2fwindows%2ftoc.json). + +**What happens if you don't use an availability set with your stateless applications and web servers?** Not using an availability set means that you aren't able to take advantage of the Azure VM SLA. It also means that machines in that layer of your application could all go offline if there is an update on the host machine (the machine that hosts the VMs you are using), or a common hardware failure. + +### Are you using Virtual Machine Scale Sets (VMSS) for your stateless application or web servers? +A good scalable and resilient design uses VMSS to make sure that you can grow/shrink the number of machines in a tier of your application (such as your web tier). VMSS allows you to define how your application tier scales (adding or removing servers based on criteria you choose). If you would like to find out more information on how to use Azure Virtual Machine Scale Sets to increase your resiliency to traffic spikes, read [Virtual Machine Scale Sets Overview](/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-overview/). + +**What happens if you don't us a Virtual Machine Scale Set with my stateless application of web server?** Without a VMSS, you limit your ability to scale without limits and to optimize your use of resources. A design that lacks VMSS has an upper scaling limit that will have to be handled with additional code (or manually). This lack of a VMSS also means that your application can not easily add and remove machines (regardless of scale) to help you handle large spikes of traffic (such as during a promotion or if your site/app/product goes viral). + +### Are you using premium storage and separate storage accounts for each of your virtual machines? +It is a best practice to use premium storage for your production virtual machines. In addition, you should make sure that you use a separate storage account for each virtual machine (this is true for small-scale deployments. For larger deployments you can re-use storage accounts for multiple machines but there is a balancing that needs to be done to ensure you are balanced across update domains and across tiers of your application). If you would like to find out more information on Azure Storage performance and scalability, read [Microsoft Azure Storage Performance and Scalability Checklist](/azure/storage/storage-performance-checklist/). + +**What happens if you don't use separate storage accounts for each virtual machine?** A storage account, like many other resources is a single point of failure. Although there are many protections and resiliency features of Azure Storage, a single point of failure is never a good design. For instance, if access rights get corrupted to that account, a storage limit is hit, or an [IOPS limit](/azure/azure-subscription-service-limits/#virtual-machine-disk-limits) is reached, all virtual machines using that storage account are impacted. Additionally, if there is a service disruption that impacts a storage stamp that includes that particular storage account you could have multiple virtual machines impacted. + +### Are you using a load balancer or a queue between each tier of your application? +Using load balancers or queues between each tier of your application enables you to scale each tier of your application easily and independently. You should choose between these technologies based on your latency, complexity, and distribution (i.e. how widely you are distributing your app) needs. In general, queues tend to have higher latency and add complexity but benefit you at being more resilient and allowing you to distribute your application over larger areas (such as across regions). If you would like to find out more information on how to use internal load balancers or queues, please read [Internal Load balancer Overview](/azure/load-balancer/load-balancer-internal-overview/) and [Azure Queues and Service Bus queues - compared and contrasted](/azure/service-bus-messaging/service-bus-azure-and-service-bus-queues-compared-contrasted/). + +**What happens if you don't use a load balancer or queue between each tier of your application?** Without a load balancer, or queue, between each tier of your application it is difficult to scale your application up or down and distribute its load across multiple machines. Not doing this can lead to over, or under provisioning your resources and a risk of downtime, or poor user experience, if you have unexpected changes in traffic or system failures. + +### Are your SQL Databases using active geo-replication? +Active Geo-Replication enables you to configure up to 4 readable secondary databases in the same, or different, regions. Secondary databases are available in the case of a service disruption or the inability to connect to the primary database. If you want to know more about SQL Database active geo-replication, please read [Overview: SQL Database Active Geo-Replication](/azure/sql-database/sql-database-geo-replication-overview/). + + **What happens if you don't use active geo-replication with your SQL databases?** Without active geo-replication, if your primary database ever goes offline (planned maintenance, service disruption, hardware failure, etc.) your application database will be offline until you can bring your primary database back online in a healthy state. + +### Are you using a cache (Azure Redis Cache) in front of your databases? +If your application has a high database load where most of the database calls are reads, you can increase the speed of your application and decrease the load on your database by implementing a caching layer in front of your database to offload these read operations. You can increase the speed of your application and decrease your database load (thus increasing the scale it can handle) by placing a caching layer in front of your database. If you would like to learn more about the Azure Redis cache, please read [Caching guidance](../best-practices/caching.md). + + **What happens if you don't use a cache in front of your database?** If your database machine is powerful enough to handle the traffic load you put on it then your application will respond as normal, though this may mean that at lower load you will be paying for a database machine that is more expensive than necessary. If your database machine is not powerful enough to handle your load then you will start to experience poor user experience with your application (latency, timeouts, and possibly service downtime). + +### Have you contacted Microsoft Azure Support if you are expecting a high scale event? +Azure support can help you increase your service limits to deal with planned high traffic events (like new product launches or special holidays). Azure Support may also be able to help you connect with experts who can help you review your design with your account team and help you find the best solution to meet your high scale event needs. If you would like to find out more information on how to contact Azure support, please read the [Azure Support FAQs](https://azure.microsoft.com/support/faq/). + +**What happens if you don't contact Azure Support for a high-scale event?** If you don’t communicate, or plan for, a high traffic event, you risk hitting certain [Azure services limits](/azure/azure-subscription-service-limits/) and thus creating a poor user experience (or worse, downtime) during your event. Architectural reviews and communicating ahead of surges can help mitigate these risks. + +### Are you using a Content Delivery Network (Azure CDN) in front of your web-facing storage blobs and static assets? +Using a CDN helps you take load off your servers by caching your content in the CDN POP/edge locations that are located around the world. You can do this to decrease latency, increase scalability, decrease server load, and as part of a strategy for protection from denial of service(DOS) attacks. If you would like to find out more information on how to use Azure CDN to increase your resiliency and decrease your customer latency, read [Overview of the Azure Content Delivery Network (CDN)](/azure/cdn/cdn-overview/). + +**What happens if you don't use a CDN?** If you aren't using a CDN then all of your customer traffic comes directly to your resources. This means that you will see higher loads on your servers which decreases their scalability. Additionally, your customers may experience higher latencies as CDNs offer locations around the world that are likely closer to your customers. + +## Next steps: +If you would like to read more about how to design your applications for high availability, please read [High availability for applications built on Microsoft Azure](high-availability-azure-applications.md). + diff --git a/docs/resiliency/images/bulkhead.png b/docs/resiliency/images/bulkhead.png new file mode 100644 index 00000000000..e37705cdc4a Binary files /dev/null and b/docs/resiliency/images/bulkhead.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/active-active.png b/docs/resiliency/images/disaster-recovery-azure-applications/active-active.png new file mode 100644 index 00000000000..674369c1ba5 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/active-active.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/active-passive-database-only.png b/docs/resiliency/images/disaster-recovery-azure-applications/active-passive-database-only.png new file mode 100644 index 00000000000..c115b3f4f5a Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/active-passive-database-only.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/active-passive-full-replica.png b/docs/resiliency/images/disaster-recovery-azure-applications/active-passive-full-replica.png new file mode 100644 index 00000000000..c07578e9186 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/active-passive-full-replica.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/application-high-availability-through-loose-coupling.png b/docs/resiliency/images/disaster-recovery-azure-applications/application-high-availability-through-loose-coupling.png new file mode 100644 index 00000000000..19e90276df5 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/application-high-availability-through-loose-coupling.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/degraded-application-mode-for-transaction-capture.png b/docs/resiliency/images/disaster-recovery-azure-applications/degraded-application-mode-for-transaction-capture.png new file mode 100644 index 00000000000..4fb773e50de Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/degraded-application-mode-for-transaction-capture.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/redeploy-to-a-secondary-azure-region.png b/docs/resiliency/images/disaster-recovery-azure-applications/redeploy-to-a-secondary-azure-region.png new file mode 100644 index 00000000000..b0ff8eb7ee9 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/redeploy-to-a-secondary-azure-region.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/reference-data-publication-to-both-primary-and-secondary-regions.png b/docs/resiliency/images/disaster-recovery-azure-applications/reference-data-publication-to-both-primary-and-secondary-regions.png new file mode 100644 index 00000000000..5133900bf43 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/reference-data-publication-to-both-primary-and-secondary-regions.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/replicate-transactional-data-in-preparation-for-disaster-recovery.png b/docs/resiliency/images/disaster-recovery-azure-applications/replicate-transactional-data-in-preparation-for-disaster-recovery.png new file mode 100644 index 00000000000..e5bd285c803 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/replicate-transactional-data-in-preparation-for-disaster-recovery.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/routing-using-azure-traffic-manager.png b/docs/resiliency/images/disaster-recovery-azure-applications/routing-using-azure-traffic-manager.png new file mode 100644 index 00000000000..ca6abc21b42 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/routing-using-azure-traffic-manager.png differ diff --git a/docs/resiliency/images/disaster-recovery-azure-applications/single-region-deployment.png b/docs/resiliency/images/disaster-recovery-azure-applications/single-region-deployment.png new file mode 100644 index 00000000000..15729a63f65 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-azure-applications/single-region-deployment.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-active.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-active.png new file mode 100644 index 00000000000..01c73bd6b5e Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-active.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-passive-database-only.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-passive-database-only.png new file mode 100644 index 00000000000..2f818be38a0 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-passive-database-only.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-passive-full-replica.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-passive-full-replica.png new file mode 100644 index 00000000000..0af849a417a Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/active-passive-full-replica.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/application-high-availability-through-autonomous-compute-nodes.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/application-high-availability-through-autonomous-compute-nodes.png new file mode 100644 index 00000000000..e28ae3977c7 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/application-high-availability-through-autonomous-compute-nodes.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/application-high-availability-through-loose-coupling.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/application-high-availability-through-loose-coupling.png new file mode 100644 index 00000000000..a97cf883598 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/application-high-availability-through-loose-coupling.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/availability-set-for-azure-virtual-machines.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/availability-set-for-azure-virtual-machines.png new file mode 100644 index 00000000000..41bb3ab2a8c Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/availability-set-for-azure-virtual-machines.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/degraded-application-mode-for-transaction-capture.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/degraded-application-mode-for-transaction-capture.png new file mode 100644 index 00000000000..f054a5c9021 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/degraded-application-mode-for-transaction-capture.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/fault-domain-isolation.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/fault-domain-isolation.png new file mode 100644 index 00000000000..4197bd2858c Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/fault-domain-isolation.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/redeploy-to-a-secondary-azure-region.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/redeploy-to-a-secondary-azure-region.png new file mode 100644 index 00000000000..0de3e43e58d Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/redeploy-to-a-secondary-azure-region.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/reference-data-publication-to-both-primary-and-secondary-databases.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/reference-data-publication-to-both-primary-and-secondary-databases.png new file mode 100644 index 00000000000..f71d9edeffd Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/reference-data-publication-to-both-primary-and-secondary-databases.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/replicate-transactional-data-in-preparation-for-disaster-recovery.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/replicate-transactional-data-in-preparation-for-disaster-recovery.png new file mode 100644 index 00000000000..fab14eaa9be Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/replicate-transactional-data-in-preparation-for-disaster-recovery.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/routing-using-azure-traffic-manager.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/routing-using-azure-traffic-manager.png new file mode 100644 index 00000000000..ca6abc21b42 Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/routing-using-azure-traffic-manager.png differ diff --git a/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/single-region-deployment.png b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/single-region-deployment.png new file mode 100644 index 00000000000..d6c022a770a Binary files /dev/null and b/docs/resiliency/images/disaster-recovery-high-availability-azure-applications/single-region-deployment.png differ diff --git a/docs/resiliency/images/high-availability-azure-applications/application-high-availability-through-autonomous-compute-nodes.png b/docs/resiliency/images/high-availability-azure-applications/application-high-availability-through-autonomous-compute-nodes.png new file mode 100644 index 00000000000..6f9861cd55a Binary files /dev/null and b/docs/resiliency/images/high-availability-azure-applications/application-high-availability-through-autonomous-compute-nodes.png differ diff --git a/docs/resiliency/images/high-availability-azure-applications/availability-set-for-azure-virtual-machines.png b/docs/resiliency/images/high-availability-azure-applications/availability-set-for-azure-virtual-machines.png new file mode 100644 index 00000000000..41bb3ab2a8c Binary files /dev/null and b/docs/resiliency/images/high-availability-azure-applications/availability-set-for-azure-virtual-machines.png differ diff --git a/docs/resiliency/images/high-availability-azure-applications/fault-domain-isolation.png b/docs/resiliency/images/high-availability-azure-applications/fault-domain-isolation.png new file mode 100644 index 00000000000..4197bd2858c Binary files /dev/null and b/docs/resiliency/images/high-availability-azure-applications/fault-domain-isolation.png differ diff --git a/docs/resiliency/images/monitoring.png b/docs/resiliency/images/monitoring.png new file mode 100644 index 00000000000..28c2c8bdd76 Binary files /dev/null and b/docs/resiliency/images/monitoring.png differ diff --git a/docs/resiliency/images/retry.png b/docs/resiliency/images/retry.png new file mode 100644 index 00000000000..d0457c461ef Binary files /dev/null and b/docs/resiliency/images/retry.png differ diff --git a/docs/resiliency/images/sla1.png b/docs/resiliency/images/sla1.png new file mode 100644 index 00000000000..7a65a99636e Binary files /dev/null and b/docs/resiliency/images/sla1.png differ diff --git a/docs/resiliency/images/sla2.png b/docs/resiliency/images/sla2.png new file mode 100644 index 00000000000..1729fcccaf1 Binary files /dev/null and b/docs/resiliency/images/sla2.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-local-failures/high_availability_solutions-1.png b/docs/resiliency/images/technical-guidance-recovery-local-failures/high_availability_solutions-1.png new file mode 100644 index 00000000000..fb00249e885 Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-local-failures/high_availability_solutions-1.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-local-failures/high_availability_solutions-2.png b/docs/resiliency/images/technical-guidance-recovery-local-failures/high_availability_solutions-2.png new file mode 100644 index 00000000000..6a648f319f1 Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-local-failures/high_availability_solutions-2.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-local-failures/partitioning-1.png b/docs/resiliency/images/technical-guidance-recovery-local-failures/partitioning-1.png new file mode 100644 index 00000000000..4197bd2858c Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-local-failures/partitioning-1.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-local-failures/partitioning-2.png b/docs/resiliency/images/technical-guidance-recovery-local-failures/partitioning-2.png new file mode 100644 index 00000000000..41bb3ab2a8c Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-local-failures/partitioning-2.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-loss-azure-region/SQL_Server_Disaster_Recovery-1.png b/docs/resiliency/images/technical-guidance-recovery-loss-azure-region/SQL_Server_Disaster_Recovery-1.png new file mode 100644 index 00000000000..10623a0a17e Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-loss-azure-region/SQL_Server_Disaster_Recovery-1.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-loss-azure-region/SQL_Server_Disaster_Recovery-2.png b/docs/resiliency/images/technical-guidance-recovery-loss-azure-region/SQL_Server_Disaster_Recovery-2.png new file mode 100644 index 00000000000..897e62184a7 Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-loss-azure-region/SQL_Server_Disaster_Recovery-2.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-3.png b/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-3.png new file mode 100644 index 00000000000..fe363d1f4f4 Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-3.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-4.png b/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-4.png new file mode 100644 index 00000000000..07eb8dcd987 Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-4.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-5.png b/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-5.png new file mode 100644 index 00000000000..1f777f8333f Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-5.png differ diff --git a/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-6.png b/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-6.png new file mode 100644 index 00000000000..ce1215af2b5 Binary files /dev/null and b/docs/resiliency/images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-6.png differ diff --git a/docs/resiliency/index.md b/docs/resiliency/index.md new file mode 100644 index 00000000000..10beb71ff3c --- /dev/null +++ b/docs/resiliency/index.md @@ -0,0 +1,366 @@ +--- +title: Design resilient applications +description: How to build resilient applications in Azure, for high availability and disaster recovery. +services: '' +documentationcenter: na +author: MikeWasson +manager: christb +editor: '' +tags: '' + +pnp.series.title: Design for Resiliency + +ms.assetid: 8be7ed24-47aa-4b41-b3e5-3f6342265443 +ms.service: guidance +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: mwasson + +--- +# Designing resilient applications for Azure + +In a distributed system, failures will happen. Hardware can fail. The network can have transient failures. Rarely, an entire service or region may experience a disruption, but even those must be planned for. + +Building a reliable application in the cloud is different than building a reliable application in an enterprise setting. While historically you may have purchased higher-end hardware to scale up, in a cloud environment you must scale out instead of up. Costs for cloud environments are kept low through the use of commodity hardware. Instead of focusing on preventing failures and optimizing "mean time between failures," in this new environment the focus shifts to "mean time to restore." The goal is to minimize the impact from a failure. + +This article gives an overview of how to build resilient applications in Microsoft Azure. It starts with a definition of the term *resiliency* and related concepts. Then it describes a process for achieving resiliency, using a structured approach over the life of an application, from design and implementation, to deployment and operations. + +## What is resiliency? +**Resiliency** is the ability to recover from failures and continue to function. It's not about *avoiding* failures, but *responding* to failures in a way that avoids downtime or data loss. The goal of resiliency is to return the application to a fully functioning state after a failure. + +Two important aspects of resiliency are high availability and disaster recovery. + +* **High availability** (HA) is the ability of the application to keep running in a healthy state, without significant downtime. By "healthy state," we mean the application is responsive, and users can connect to the application and interact with it. +* **Disaster recovery** (DR) is the ability to recover from rare but major incidents: Non-transient, wide-scale failures, such as service disruption that affects an entire region. Disaster recovery includes data backup and archiving, and may include manual interventions, such as restoring a database from backup. + +One way to think about HA versus DR is that DR starts when the impact of a fault exceeds the ability of the HA design to handle it. For example, putting several VMs behind a load balancer will provide availability if one VM fails, but not if they all fail at the same time. + +When you design an application to be resilient, you have to understand your availability requirements. How much downtime is acceptable? This is partly a function of cost. How much will potential downtime cost your business? How much should you invest in making the application highly available? You also have to define what it means for the application to be available. For example, is the application "down" if a customer can submit an order but the system cannot process it in the normal timeframe? + +Another common term is **business continuity** (BC), which is the ability to perform essential business functions during and after a disaster. BC covers the entire operation of the business, including physical facilities, people, communications, transportation, and IT. In this article, we are just focused on cloud applications, but resilience planning must be done in the context of overall BC requirements. + +## Process to achieve resiliency +Resiliency is not an add-on. It must be designed into the system and put into operational practice. Here is a general model to follow: + +1. **Define** your availability requirements, based on business needs +2. **Design** the application for resiliency. Start with an architecture that follows proven practices, and then identify the possible failure points in that architecture. +3. **Implement** strategies to detect and recover from failures. +4. **Test** the implementation by simulating faults and triggering forced failovers. +5. **Deploy** the application into production using a reliable, repeatable process. +6. **Monitor** the application to detect failures. By monitoring the system, you can gauge the health of the application and respond to incidents if necessary. +7. **Respond** if there are incidents that require manual interventions. + +In the remainder of this article, we discuss each of these steps in more detail. + +## Defining your resiliency requirements +Resiliency planning starts with business requirements. Here are some approaches for thinking about resiliency in those terms. + +### Decompose by workload +Many cloud solutions consist of multiple application workloads. The term "workload" in this context means a discrete capability or computing task, which can be logically separated from other tasks, in terms of business logic and data storage requirements. For example, an e-commerce app might include the following workloads: + +* Browse and search a product catalog. +* Create and track orders. +* View recommendations. + +These workloads might have different requirements for availability, scalability, data consistency, disaster recovery, and so forth. Again, these are business decisions. + +Also think about usage patterns. Are there certain critical periods when the system must be available? For example, a tax-filing service can't go down right before the filing deadline; a video streaming service must stay up during a big sports event; and so on. During the critical periods, you might have redundant deployments across several regions, so the application could fail over if one region failed. However, a multi-region deployment is more expensive, so during less critical times, you might run the application in a single region. + +### RTO and RPO +Two important metrics to consider are the recovery time objective and recovery point objective: + +* **Recovery time objective** (RTO) is the maximum acceptable time that an application can be unavailable after an incident. If your RTO is 90 minutes, you must be able to restore the application to a running state within 90 minutes from the start of a disaster. If you have a very low RTO, you might keep a second deployment continually running on standby, to protect against a regional outage. +* **Recovery point objective** (RPO) is the maximum duration of data loss that is acceptable during a disaster. For example, if you store data in a single database, with no replication to other databases, and perform hourly backups, you could lose up to an hour of data. + +RTO and RPO are business requirements. Another common metric is **mean time to recover** (MTTR), which is the average time that it takes to restore the application after a failure. MTTR is an empirical fact about a system. If MTTR exceeds the RTO, then a failure in the system will cause an unacceptable business disruption, because it won't be possible to restore the system within the defined RTO. + +### SLAs +In Azure, the [Service Level Agreement][sla] (SLA) describes Microsoft’s commitments for uptime and connectivity. If the SLA for a particular service is 99.9%, it means you should expect the service to be available 99.9% of the time. + +> [!NOTE] +> The Azure SLA also includes provisions for obtaining a service credit if the SLA is not met, along with specific definitions of "availability" for each service. That aspect of the SLA acts as an enforcement policy. +> +> + +You should define your own target SLAs for each workload in your solution. An SLA makes it possible to reason about the architecture, and whether the architecture meets the business requirements. For example, if a workload requires 99.99% uptime, but depends on a service with a 99.9% SLA, that service cannot be a single-point of failure in the system. One remedy is to have a fallback path in case the service fails, or take other measures to recover from a failure in that service. + +The following table shows the potential cumulative downtime for various SLA levels. + +| SLA | Downtime per week | Downtime per month | Downtime per year | +| --- | --- | --- | --- | +| 99% |1.68 hours |7.2 hours |3.65 days | +| 99.9% |10.1 minutes |43.2 minutes |8.76 hours | +| 99.95% |5 minutes |21.6 minutes |4.38 hours | +| 99.99% |1.01 minutes |4.32 minutes |52.56 minutes | +| 99.999% |6 seconds |25.9 seconds |5.26 minutes | + +Of course, higher availability is better, everything else being equal. But as you strive for more 9s, the cost and complexity to achieve that level of availability grows. An uptime of 99.99% translates to about 5 minutes of total downtime per month. Is it worth the additional complexity and cost to reach five 9s? The answer depends on the business requirements. + +Here are some other considerations when defining an SLA: + +* To achieve four 9's (99.99%), you probably can't rely on manual intervention to recover from failures. The application must be self-diagnosing and self-healing. +* Beyond four 9's, it is challenging to detect outages quickly enough to meet the SLA. +* Think about the time window that your SLA is measured against. The smaller the window, the tighter the tolerances. It probably doesn't make sense to define your SLA in terms of hourly or daily uptime. + +### Composite SLAs +Consider an App Service web app that writes to Azure SQL Database. At the time of this writing, these Azure services have the following SLAs: + +* App Service Web Apps = 99.95% +* SQL Database = 99.99% + +![Composite SLA](./images/sla1.png) + +What is the maximum downtime you would expect for this application? If either service fails, the whole application fails. In general, the probability of each service failing is independent, so the composite SLA for this application is 99.95% x 99.99% = 99.94%. That's lower than the individual SLAs, which isn't surprising, because an application that relies on multiple services has more potential failure points. + +On the other hand, you can improve the composite SLA by creating independent fallback paths. For example, if SQL Database is unavailable, put transactions into a queue, to be processed later. + +![Composite SLA](./images/sla2.png) + +With this design, the application is still available even if it can't connect to the database. However, it fails if the database and the queue both fail at the same time. The expected percentage of time for a simultaneous failure is 0.0001 × 0.001, so the composite SLA for this combined path is + +* Database OR queue = 1.0 − (0.0001 × 0.001) = 99.99999% + +The total composite SLA is: + +* Web app AND (database OR queue) = 99.95% × 99.99999% = ~99.95% + +But there are tradeoffs to this approach. The application logic is more complex, you are paying for the queue, and there may be data consistency issues to consider. + +**SLA for multi-region deployments**. Another HA technique is to deploy the application in more than one region, and use Azure Traffic Manager to fail over if the application fails in one region. For a two-region deployment, the composite SLA is calculated as follows. + +Let *N* be the composite SLA for the application deployed in one region. The expected chance that the application will fail in both regions at the same time is (1 − N) × (1 − N). Therefore, + +* Combined SLA for both regions = 1 − (1 − N)(1 − N) = N + (1 − N)N + +Finally, you must factor in the [SLA for Traffic Manager][tm-sla]. As of when this article was written, the SLA for Traffic Manager SLA is 99.99%. + +* Composite SLA = 99.99% × (combined SLA for both regions) + +A further detail is that failing over is not instantaneous, which can result in some downtime during a failover. See [Traffic Manager endpoint monitoring and failover][tm-failover]. + +The calculated SLA number is a useful baseline, but it doesn't tell the whole story about availability. Often, an application can degrade gracefully when a non-critical path fails. Consider an application that shows a catalog of books. If the application can't retrieve the thumbnail image for the cover, it might show a placeholder image. In that case, failing to get the image does not reduce the application's uptime, although it affects the user experience. + +## Designing for resiliency +During the design phase, you should perform a failure mode analysis (FMA). The goal of an FMA is to identify possible points of failure, and define how the application will respond to those failures. + +* How will the application detect this type of failure? +* How will the application respond to this type of failure? +* How will you log and monitor this type of failure? + +For more information about the FMA process, with specific recommendations for Azure, see [Azure resiliency guidance: Failure mode analysis][fma]. + +### Example of identifying failure modes and detection strategy +**Failure point:** Call to an external web service / API. + +| Failure mode | Detection strategy | +| --- | --- | +| Service is unavailable |HTTP 5xx | +| Throttling |HTTP 429 (Too Many Requests) | +| Authentication |HTTP 401 (Unauthorized) | +| Slow response |Request times out | + +## Resiliency strategies +This section provides a survey of some common resiliency strategies. Most of these are not limited to a particular technology. The descriptions in this section are meant to summarize the general idea behind each technique, with links to further reading. + +### Retry transient failures +Transient failures can be caused by momentary loss of network connectivity, a dropped database connection, or a timeout when a service is busy. Often, a transient failure can be resolved simply by retrying the request. For many Azure services, the client SDK implements automatic retries, in a way that is transparent to the caller; see [Retry service specific guidance][retry-service-specific guidance]. + +Each retry attempt adds to the total latency. Also, too many failed requests can cause a bottleneck, as pending requests accumulate in the queue. These blocked requests might hold critical system resources such as memory, threads, database connections, and so on, which can cause cascading failures. To avoid this, increase the delay between each retry attempt, and limit the total number of failed requests. + +![Composite SLA](./images/retry.png) + +For more information, see [Retry Pattern][retry-pattern]. + +### Load balance across instances +For scalability, a cloud application should be able to scale out by adding more instances. This approach also improves resiliency, because unhealthy instances can be taken out of rotation. + +For example: + +* Put two or more VMs behind a load balancer. The load balancer distributes traffic to all the VMs. See [Running multiple VMs on Azure for scalability and availability][ra-multi-vm]. +* Scale out an Azure App Service app to multiple instances. App Service automatically load balances across instances. See [Basic web application][ra-basic-web]. +* Use [Azure Traffic Manager][tm] to distribute traffic across a set of endpoints. + +### Replicate data +Replicating data is a general strategy for handling non-transient failures in a data store. Many storage technologies provide built-in replication, including Azure SQL Database, DocumentDB, and Apache Cassandra. + +It's important consider both the read and write paths. Depending on the storage technology, you might have multiple writable replicas, or a single writable replica and multiple read-only replicas. + +For highest availability, replicas can be placed in multiple regions. However, this increases the latency to replicate the data. Typically, replicating across regions is done asynchronously, which implies an eventual consistency model and potential data loss if a replica fails. + +### Degrade gracefully +If a service fails and there is no failover path, the application may be able to degrade gracefully, in a way that still provides an acceptable user experience. For example: + +* Put a work item on a queue, to be executed later. +* Return an estimated value +* Use locally cached data. +* Show the user an error message. (This option is better than having the application stop responding to requests.) + +### Throttle high-volume users +Sometimes a small number of users create excessive load. That can have an impact on other users, reducing the overall availability of your application. + +When a single client makes an excessive number of requests, the application might throttle the client for a certain period of time. During the throttling period, the application refuses some or all of the requests from that client (depending on the exact throttling strategy). The threshold for throttling might depend on the customer's service tier. + +Throttling does not imply the client was necessarily acting maliciously. It just means the client exceeded their service quota. In some cases, a consumer might consistently exceed their quota or otherwise behave badly. In that case, you might go further and block the user. Typically, this is done by blocking an API key or an IP address range. + +For more information, see [Throttling Pattern][throttling-pattern]. + +### Use a circuit breaker +The Circuit Breaker pattern can prevent an application from repeatedly trying an operation that is likely to fail. The analogy is to a physical circuit breaker, a switch that interrupts the flow of current when a circuit is overloaded. + +The circuit breaker wraps calls to a service. It has three states: + +* **Closed**. This is the normal state. The circuit breaker sends requests to the service, and a counter tracks the number of recent failures. If the failure count exceeds a threshold within a given time period, the circuit breaker switches to the Open state. +* **Open**. In this state, the circuit breaker immediately fails all requests, without calling the service. The application should use a mitigation path, such as reading data from a replica or simply returning an error to the user. When the circuit breaker switches to Open, it starts a timer. When the timer expires, the circuit breaker switches to the Half-open state. +* **Half-open**. In this state, the circuit breaker lets a limited number of requests go through to the service. If they succeed, the service is assumed to be recovered, and the circuit breaker switches back to the Closed state. Otherwise, it reverts to the Open state. The Half-Open state prevents a recovering service from suddenly being inundated with requests. + +For more information, see [Circuit Breaker Pattern][circuit-breaker-pattern]. + +### Use load leveling to smooth out spikes in traffic +Applications may experience sudden spikes in traffic, which can overwhelm services on the backend. If a backend service cannot respond to requests quickly enough, it may cause requests to queue (back up), or cause the service to throttle the application. + +To avoid this, you can use a queue as a buffer. When there is a new work item, instead of calling the backend service immediately, the application queues a work item to run asynchronously. The queue acts as a buffer that smooths out peaks in the load. + +For more information, see [Queue-Based Load Leveling Pattern][load-leveling-pattern]. + +### Isolate critical resources +Failures in one subsystem can sometimes cascade, causing failures in other parts of the application. This can happen if a failure causes some resources, such as threads or sockets, not to get freed in a timely manner, leading to resource exhaustion. + +To avoid this, you can partition a system into isolated groups, so that a failure in one partition does not bring down the entire system. This technique is sometimes called the Bulkhead pattern. + +Examples: + +* Partition a database -- for example, by tenant -- and assign a separate pool of web server instances for each partition. +* Use separate thread pools to isolate calls to different services. This helps to prevent cascading failures if one of the services fails. For an example, see the Netflix [Hystrix library][hystrix]. +* Use [containers][containers] to limit the resources available to a particular subsystem. + +![Composite SLA](./images/bulkhead.png) + +### Apply compensating transactions +A compensating transaction is a transaction that undoes the effects of another completed transaction. + +In a distributed system, it can be very difficult to achieve strong transactional consistency. Compensating transactions are a way to achieve consistency by using a series of smaller, individual transactions that can be undone at each step. + +For example, to book a trip, a customer might reserve a car, a hotel room, and a flight. If any of these steps fails, the entire operation fails. Instead of trying to use a single distributed transaction for the entire operation, you can define a compensating transaction for each step. For example, to undo a car reservation, you cancel the reservation. In order to complete the whole operation, a coordinator executes each step. If any step fails, the coordinator applies compensating transactions to undo any steps that were completed. + +For more information, see [Compensating Transaction Pattern][compensating-transaction-pattern]. + +## Testing for resiliency +Generally, you can't test resiliency in the same way that you test application functionality (by running unit tests and so on). Instead, you must test how the end-to-end workload performs under failure conditions, which by definition don't happen all of the time. + +Testing is part of an iterative process. Test the application, measure the outcome, analyze and fix any failures that result, and repeat the process. + +**Fault injection testing**. Test the resiliency of the system to failures, either by triggering actual failures or by simulating them. Here are some common failure scenarios to test: + +* Shut down VM instances. +* Crash processes. +* Expire certificates. +* Change access keys. +* Shut down the DNS service on domain controllers. +* Limit available system resources, such as RAM or number of threads. +* Unmount disks. +* Redeploy a VM. + +Measure the recovery times and verify they meet your business requirements. Test combinations of failure modes, as well. Make sure that failures don't cascade, and are handled in an isolated way. + +This is another reason why it's important to analyze possible failure points during the design phase. The results of that analysis should be inputs into your test plan. + +**Load testing**. Load test the application using a tool such as [Visual Studio Team Services][vsts] or [Apache JMeter][jmeter] Load testing is crucial for identifying failures that only happen under load, such as the backend database being overwhelmed or service throttling. Test for peak load, using production data, or synthetic data that is as close to production data as possible. The goal is to see how the application behaves under real-world conditions. + +## Resilient deployment +Once an application is deployed to production, updates are a possible source of errors. In the worst case, a bad update can cause downtime. To avoid this, the deployment process must be predictable and repeatable. Deployment includes provisioning Azure resources, deploying application code, and applying configuration settings. An update may involve all three, or a subset. + +The crucial point is that manual deployments are prone to error. Therefore, it's recommended to have an automated, idempotent process that you can run on demand, and re-run if something fails. + +* Use Resource Manager templates to automate provisioning of Azure resources. +* Use [Azure Automation Desired State Configuration][dsc] (DSC) to configure VMs. +* Use an automated deployment process for application code. + +Two concepts related to resilient deployment are *infrastructure as code* and *immutable infrastructure*. + +* **Infrastructure as code** is the practice of using code to provision and configure infrastructure. Infrastructure as code may use a declarative approach or an imperative approach (or a combination of both). Resource Manager templates are an example of a declarative approach. PowerShell scripts are an example of an imperative approach. +* **Immutable infrastructure** is the principle that you shouldn’t modify infrastructure after it’s deployed to production. Otherwise, you can get into a state where ad hoc changes have been applied, so it’s hard to know exactly what changed, and hard to reason about the system. + +Another question is how to roll out an application update. We recommend techniques such as blue-green deployment or canary releases, which push updates in highly controlled way to minimize possible impacts from a bad deployment. + +* [Blue-green deployment][blue-green] is a technique where you deploy an update into a separate production environment from the live application. After you validate the deployment, switch the traffic routing to the updated version. For example, Azure App Service Web Apps enables this with [staging slots][staging-slots]. +* [Canary releases][canary-release] are similar to blue-green deployment. Instead of switching all traffic to the updated version, you roll out the update to a small percentage of users, by routing a portion of the traffic to the new deployment. If there is a problem, back off and revert to the old deployment. Otherwise, route more traffic to the new version, until it gets 100% of traffic. + +Whatever approach you take, make sure that you can roll back to the last-known good-deployment, in case the new version is not functioning. Also, if errors occur, it must be possible to tell from the application logs which version caused the error. + +## Monitoring and diagnostics +Monitoring and diagnostics are crucial for resiliency. If something fails, you need to know that it failed, and you need insights into the cause of the failure. + +Monitoring a large-scale distributed system poses a significant challenge. Think about an application that runs on a few dozen VMs -- it's not practical to log into each VM, one at a time, and look through log files, trying to troubleshoot a problem. Moreover, the number of VM instances is probably not static. VMs get added and removed as the application scales in and out, and occasionally an instance may fail and need to be reprovisioned. In addition, a typical cloud application might use multiple data stores (Azure storage, SQL Database, DocumentDB, Redis cache), and a single user action may span multiple subsystems. + +You can think of the monitoring and diagnostics process as a pipeline with several distinct stages: + +![Composite SLA](./images/monitoring.png) + +* **Instrumentation**. The raw data for monitoring and diagnostics comes from a variety of sources, including application logs, web server logs, OS performance counters, database logs, and diagnostics built into the Azure platform. Most Azure services have a diagnostics feature that you can use to figure out the cause of problems. +* **Collection and storage**. The raw instrumentation data can be held in a variety of locations and with varying formats (application trace logs, performace counters, IIS logs). These disparate sources are collected, consolidated, and put into reliable storage. +* **Analysis and diagnosis**. After the data is consolidated, it can be analyzed, in order to troubleshoot issues and provide an overall view of the health of the application. +* **Visualization and alerts**. In this stage, telemetry data is presented in such a way that an operator can quickly spot trends or problems. Example include dashboards or email alerts. + +Monitoring is different than failure detection. For example, your application might detect a transient error and retry, resulting in no downtime. But it should also log the retry operation, so that you can monitor the error rate, in order to get an overall picture of the application health. + +Application logs are an important source of diagnostics data. Here are some best practices for application logging: + +* Log in production. Otherwise, you lose insight at the very times when you need it the most. +* Log events at service boundaries. Include a correlation ID that flows across service boundaries. If transaction X flows through multiple services and one of them fails, the correlation ID will help you pinpoint why the transaction failed. +* Use semantic logging, also called structured logging. Unstructured logs make it hard to automate the consumption and analysis of the log data, which is needed at cloud scale. +* Use asynchronous logging. Otherwise, the logging system itself can cause the application to fail, by causing requests to back up, as they block waiting to write a logging event. +* Application logging is not the same as auditing. Auditing may be done for compliance or regulatory reasons. As such, audit records must be complete, and it's not acceptible to drop any while processing transactions. If an application requires auditing, this should be kept separate from diagnostics logging. + +For more information about monitoring and diagnostics, see [Monitoring and diagnostics guidance][monitoring-guidance]. + +## Manual failure responses +Previous sections have focused on automated recovery strategies, which are critical for high availability. However, sometimes manual intervention is needed. + +* **Alerts**. Monitor your application for warning signs that may require pro-active intervention. For example, if you see that SQL Database or DocumentDB consistently throttles your application, you might need to increase your database capacity or optimize your queries. In this example, even though the application might handle the throttling errors transparently, your telemetry should still raise an alert, so that you can follow up. +* **Manual failover**. Some systems cannot fail over automatically, and require a manual failover. +* **Operational readiness testing**. If your application fails over to a secondary region, you should perform an operational readiness test before you fail back to the primary region. The test should verify that the primary region is healthy and ready to receive traffic again. +* **Data consistency check**. If a failure happens in a data store, there may be data inconsistencies when the store becomes available again, especially if the data was replicated. +* **Restoring from backup**. For example, if SQL Database experiences a regional outage, you can geo-restore the database from the latest backup. + +Document and test your disaster recovery plan. Include written procedures for any manual steps, such as manual failover, restoring data from backups, and so forth. + +## Summary +This article looked at resiliency from a holistic perspective, emphasizing some of the unique challenges of the cloud. These include the distributed nature of cloud computing, the use of commodity hardware, and the presence of transience network faults. + +Here are the major points to take away from this article: + +* Resiliency leads to higher availability, and lower mean time to recover from failures. +* Achieving resiliency in the cloud requires a different set of techniques from traditional on-premises solutions. +* Resiliency does not happen by accident. It must be designed and built in from the start. +* Resiliency touches every part of the application lifecycle, from planning and coding to operations. +* Test and monitor! + + + + +[blue-green]: http://martinfowler.com/bliki/BlueGreenDeployment.html +[canary-release]: http://martinfowler.com/bliki/CanaryRelease.html +[circuit-breaker-pattern]: https://msdn.microsoft.com/library/dn589784.aspx +[compensating-transaction-pattern]: https://msdn.microsoft.com/library/dn589804.aspx +[containers]: https://en.wikipedia.org/wiki/Operating-system-level_virtualization +[dsc]: https://azure.microsoft.com/documentation/articles/automation-dsc-overview/ +[fma]: failure-mode-analysis.md +[hystrix]: http://techblog.netflix.com/2012/11/hystrix.html +[jmeter]: http://jmeter.apache.org/ +[load-leveling-pattern]: https://msdn.microsoft.com/library/dn589783.aspx +[monitoring-guidance]: ../best-practices/monitoring.md +[ra-basic-web]: https://azure.microsoft.com/documentation/articles/web-apps-basic/ +[ra-multi-vm]: https://azure.microsoft.com/documentation/articles/compute-multi-vm/ +[checklist]: ../checklist/resiliency.md +[retry-pattern]: https://msdn.microsoft.com/library/dn589788.aspx +[retry-service-specific guidance]: ../best-practices/retry-service-specific.md +[sla]: https://azure.microsoft.com/support/legal/sla/ +[staging-slots]: https://azure.microsoft.com/documentation/articles/web-apps-basic/ +[throttling-pattern]: https://msdn.microsoft.com/library/dn589798.aspx +[tm]: https://azure.microsoft.com/services/traffic-manager/ +[tm-failover]: https://azure.microsoft.com/documentation/articles/traffic-manager-monitoring/ +[tm-sla]: https://azure.microsoft.com/support/legal/sla/traffic-manager/v1_0/ +[vsts]: https://www.visualstudio.com/features/vso-cloud-load-testing-vs.aspx diff --git a/docs/resiliency/recovery-data-corruption.md b/docs/resiliency/recovery-data-corruption.md new file mode 100644 index 00000000000..17c7cf2cf00 --- /dev/null +++ b/docs/resiliency/recovery-data-corruption.md @@ -0,0 +1,85 @@ +--- +title: Recover from data corruption or accidental deletion +description: Article on understanding how to recover from data corruption of data or accidental data deletion to and designing resilient, highly available, fault tolerant applications as well as planning for disaster recovery +services: '' +documentationcenter: na +author: adamglick +manager: saladki +editor: '' + +ms.assetid: ecdfa1a8-d93f-43f2-b4a3-f5cfd13201e7 +ms.service: resiliency +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: aglick + +--- +[!INCLUDE [header](../_includes/header.md)] +# Azure resiliency technical guidance: recovery from data corruption or accidental deletion +Part of a robust business continuity plan is having a plan if your data gets corrupted or accidentally deleted. The following is information about recovery after data has been corrupted or accidentally deleted, due to application errors or operator error. + +## Virtual Machines +To protect your Azure Virtual Machines (sometimes called infrastructure-as-a-service VMs) from application errors or accidental deletion, use [Azure Backup](https://azure.microsoft.com/services/backup/). Azure Backup enables the creation of backups that are consistent across multiple VM disks. In addition, the Backup Vault can be replicated across regions to provide recovery from region loss. + +## Storage +Note that while Azure Storage provides data resiliency through automated replicas, this does not prevent your application code (or developers/users) from corrupting data through accidental or unintended deletion, update, and so on. Maintaining data fidelity in the face of application or user error requires more advanced techniques, such as copying the data to a secondary storage location with an audit log. Developers can take advantage of the blob [snapshot capability](https://msdn.microsoft.com/library/azure/ee691971.aspx), which can create read-only point-in-time snapshots of blob contents. This can be used as the basis of a data-fidelity solution for Azure Storage blobs. + +### Blob and Table Storage Backup +While blobs and tables are highly durable, they always represent the current state of the data. Recovery from unwanted modification or deletion of data may require restoring data to a previous state. This can be achieved by taking advantage of the capabilities provided by Azure to store and retain point-in-time copies. + +For Azure Blobs, you can perform point-in-time backups using the [blob snapshot feature](https://msdn.microsoft.com/library/ee691971.aspx). For each snapshot, you are only charged for the storage required to store the differences within the blob since the last snapshot state. The snapshots are dependent on the existence of the original blob they are based on, so a copy operation to another blob or even another storage account is advisable. This ensures that backup data is properly protected against accidental deletion. For Azure Tables, you can make point-in-time copies to a different table or to Azure Blobs. More detailed guidance and examples of performing application-level backups of tables and blobs can be found here: + +* [Protecting Your Tables Against Application Errors](https://blogs.msdn.microsoft.com/windowsazurestorage/2010/05/03/protecting-your-tables-against-application-errors/) +* [Protecting Your Blobs Against Application Errors](https://blogs.msdn.microsoft.com/windowsazurestorage/2010/04/29/protecting-your-blobs-against-application-errors/) + +## Database +There are several [business continuity](/azure/sql-database/sql-database-business-continuity/) (backup, restore) options available for Azure SQL Database. Databases can be copied by using the [Database Copy](/azure/sql-database/sql-database-copy/) functionality, or by [exporting](/azure/sql-database/sql-database-export/) and [importing](https://msdn.microsoft.com/library/hh710052.aspx) a SQL Server bacpac file. Database Copy provides transactionally consistent results, while a bacpac (through the import/export service) does not. Both of these options run as queue-based services within the data center, and they do not currently provide a time-to-completion SLA. + +> [!NOTE] +> The database copy and import/export options place a significant degree of load on the source database. They can trigger resource contention or throttling events. +> +> + +### SQL Database Backup +Point-in-time backups for Microsoft Azure SQL Database are achieved by [copying your Azure SQL database](/azure/sql-database/sql-database-copy/). You can use this command to create a transactionally consistent copy of a database on the same logical database server or to a different server. In either case, the database copy is fully functional and completely independent of the source database. Each copy you create represents a point-in-time recovery option. You can recover the database state completely by renaming the new database with the source database name. Alternatively, you can recover a specific subset of data from the new database by using Transact-SQL queries. For additional details about SQL Database, see [Overview of business continuity with Azure SQL Database](/azure/sql-database/sql-database-business-continuity/). + +### SQL Server on Virtual Machines Backup +For SQL Server used with Azure infrastructure as a service virtual machines (often called IaaS or IaaS VMs), there are two options: traditional backups and log shipping. Using traditional backups enables you to restore to a specific point in time, but the recovery process is slow. Restoring traditional backups requires starting with an initial full backup, and then applying any backups taken after that. The second option is to configure a log shipping session to delay the restore of log backups (for example, by two hours). This provides a window to recover from errors made on the primary. + +## Other Azure platform services +Some Azure platform services store information in a user-controlled storage account or Azure SQL Database. If the account or storage resource is deleted or corrupted, this could cause serious errors with the service. In these cases, it is important to maintain backups that would enable you to re-create these resources if they were deleted or corrupted. + +For Azure Web Sites and Azure Mobile Services, you must backup and maintain the associated databases. For Azure Media Service and Virtual Machines, you must maintain the associated Azure Storage account and all resources in that account. For example, for Virtual Machines, you must back up and manage the VM disks in Azure blob storage. + +## Checklists for data corruption or accidental deletion +## Virtual Machines checklist +1. Review the Virtual Machines section of this document. +2. Back up and maintain the VM disks with Azure Backup (or your own backup system by using Azure blob storage and VHD snapshots). + +## Storage checklist +1. Review the Storage section of this document. +2. Regularly back up critical storage resources. +3. Consider using the snapshot feature for blobs. + +## Database checklist +1. Review the Database section of this document. +2. Create point-in-time backups by using the Database Copy command. + +## SQL Server on Virtual Machines Backup checklist +1. Review the SQL Server on Virtual Machines Backup section of this document. +2. Use traditional backup and restore techniques. +3. Create a delayed log shipping session. + +## Web Apps checklist +1. Back up and maintain the associated database, if any. + +## Media Services checklist +1. Back up and maintain the associated storage resources. + +## More information +For more information about backup and restore features in Azure, see [Storage, backup and recovery scenarios](https://azure.microsoft.com/documentation/scenarios/storage-backup-recovery/). + + diff --git a/docs/resiliency/recovery-local-failures.md b/docs/resiliency/recovery-local-failures.md new file mode 100644 index 00000000000..e1a2dd3b130 --- /dev/null +++ b/docs/resiliency/recovery-local-failures.md @@ -0,0 +1,191 @@ +--- +title: 'Technical guidance: Recovery from local failures in Azure' +description: Article on understanding and designing resilient, highly available, fault-tolerant applications, as well as planning for disaster recovery focused on local failures within Azure. +services: '' +documentationcenter: na +author: adamglick +manager: saladki +editor: '' + +ms.assetid: 2e50f6c1-fa61-4c7d-ac26-566a142fbfc2 +ms.service: resiliency +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: aglick + +--- +[!INCLUDE [header](../_includes/header.md)] +# Azure resiliency technical guidance: Recovery from local failures in Azure +There are two primary threats to application availability: + +* The failure of devices, such as drives and servers +* The exhaustion of critical resources, such as compute under peak load conditions + +Azure provides a combination of resource management, elasticity, load balancing, and partitioning to enable high availability under these circumstances. Some of these features are performed automatically for all Azure services. However, in some cases, the application developer must do some additional work to benefit from them. + +## Cloud Services +Azure Cloud Services consists of collections of one or more web or worker roles. One or more instances of a role can run concurrently. The configuration determines the number of instances. Role instances are monitored and managed through a component called the fabric controller. The fabric controller detects and responds to both software and hardware failures automatically. + +Every role instance runs in its own virtual machine (VM) and communicates with its fabric controller through a guest agent. The guest agent collects resource and node metrics, including VM usage, status, logs, resource usage, exceptions, and failure conditions. The fabric controller queries the guest agent at configurable intervals, and it restarts the VM if the guest agent fails to respond. In the event of hardware failure, the associated fabric controller moves all affected role instances to a new hardware node and reconfigures the network to route traffic there. + +To benefit from these features, developers should ensure that all service roles avoid storing state on the role instances. Instead, all persistent data should be accessed from durable storage, such as Azure Storage or Azure SQL Database. This allows any roles to handle requests. It also means that role instances can go down at any time without creating inconsistencies in the transient or persistent state of the service. + +The requirement to store state externally to the roles has several implications. It implies, for example, that all related changes to an Azure Storage table should be changed in a single entity-group transaction, if possible. Of course, it isn't always possible to make all changes in a single transaction. You must take special care to ensure that role instance failures do not cause problems when they interrupt long-running operations that span two or more updates to the persistent state of the service. If another role attempts to retry such an operation, it should anticipate and handle the case where the work was partially completed. + +For example, consider a service that partitions data across multiple stores. If a worker role goes down while it's relocating a shard, the relocation of the shard might not finish. Or the relocation might be repeated from its inception by a different worker role, potentially causing orphaned data or data corruption. To prevent problems, long-running operations must be one or both of the following: + +* *Idempotent*: Repeatable without side effects. To be idempotent, a long-running operation should have the same effect no matter how many times it's executed, even when it's interrupted during execution. +* *Incrementally restartable*: Able to continue from the most recent point of failure. To be incrementally restartable, a long-running operation should consist of a sequence of smaller atomic operations. It should also record its progress in durable storage, so that each subsequent invocation picks up where its predecessor stopped. + +Finally, all long-running operations should be invoked repeatedly until they succeed. For example, a provisioning operation might be placed in an Azure queue, and then removed from the queue by a worker role only when it succeeds. Garbage collection might be necessary to clean up data that interrupted operations create. + +### Elasticity +The initial number of instances running for each role is determined in each role’s configuration. Administrators should initially configure each role to run with two or more instances based on expected load. But you can easily scale role instances up or down as usage patterns change. You can do this manually in the Azure portal, or you can automate the process by using Windows PowerShell, the Service Management API, or third-party tools. For more information, see [How to autoscale an application](/azure/cloud-services/cloud-services-how-to-scale/). + +### Partitioning +The Azure fabric controller uses two types of partitions: + +* An *update domain* is used to upgrade a service’s role instances in groups. Azure deploys service instances into multiple update domains. For an in-place update, the fabric controller brings down all the instances in one update domain, updates them, and then restarts them before moving to the next update domain. This approach prevents the entire service from being unavailable during the update process. +* A *fault domain* defines potential points of hardware or network failure. For any role that has more than one instance, the fabric controller ensures that the instances are distributed across multiple fault domains, to prevent isolated hardware failures from disrupting service. Fault domains govern all exposure to server and cluster failures. + +The [Azure service-level agreement (SLA)](https://azure.microsoft.com/support/legal/sla/) guarantees that when two or more web role instances are deployed to different fault and upgrade domains, they'll have external connectivity at least 99.95 percent of the time. Unlike update domains, there's no way to control the number of fault domains. Azure automatically allocates fault domains and distributes role instances across them. At least the first two instances of every role are placed in different fault and upgrade domains to ensure that any role with at least two instances will satisfy the SLA. This is represented in the following diagram. + +![Simplified view of fault domain isolation](./images/technical-guidance-recovery-local-failures/partitioning-1.png) + +### Load balancing +All inbound traffic to a web role passes through a stateless load balancer, which distributes client requests among the role instances. Individual role instances do not have public IP addresses, and they are not directly addressable from the Internet. Web roles are stateless so that any client request can be routed to any role instance. A [StatusCheck](https://msdn.microsoft.com/library/microsoft.windowsazure.serviceruntime.roleenvironment.statuscheck.aspx) event is raised every 15 seconds. You can use this to indicate whether the role is ready to receive traffic, or whether it's busy and should be taken out of the load-balancer rotation. + +## Virtual Machines +Azure Virtual Machines differs from platform as a service (PaaS) compute roles in several respects in relation to high availability. In some instances, you must do additional work to ensure high availability. + +### Disk durability +Unlike PaaS role instances, data stored on virtual machine drives is persistent even when the virtual machine is relocated. Azure virtual machines use VM disks that exist as blobs in Azure Storage. Because of the availability characteristics of Azure Storage, the data stored on a virtual machine’s drives is also highly available. + +Note that drive D (in Windows VMs) is the exception to this rule. Drive D is actually physical storage on the rack server that hosts the VM, and its data will be lost if the VM is recycled. Drive D is intended for temporary storage only. In Linux, Azure “usually” (but not always) exposes the local temporary disk as /dev/sdb block device. It is often mounted by the Azure Linux Agent as /mnt/resource or /mnt mount points (configurable via /etc/waagent.conf). + +### Partitioning +Azure natively understands the tiers in a PaaS application (web role and worker role) and thus can properly distribute them across fault and update domains. In contrast, the tiers in an infrastructure as a service (IaaS) application must be manually defined through availability sets. Availability sets are required for an SLA under IaaS. + +![Availability sets for Azure virtual machines](./images/technical-guidance-recovery-local-failures/partitioning-2.png) + +In the preceding diagram, the Internet Information Services (IIS) tier (which works as a web app tier) and the SQL tier (which works as a data tier) are assigned to different availability sets. This ensures that all instances of each tier have hardware redundancy by distributing virtual machines across fault domains, and that entire tiers are not taken down during an update. + +### Load balancing +If the VMs should have traffic distributed across them, you must group the VMs in an application and load balance across a specific TCP or UDP endpoint. For more information, see [Load balancing virtual machines](/azure/virtual-machines/virtual-machines-linux-load-balance/?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json). If the VMs receive input from another source (for example, a queuing mechanism), a load balancer is not required. The load balancer uses a basic health check to determine whether traffic should be sent to the node. It's also possible to create your own probes to implement application-specific health metrics that determine whether the VM should receive traffic. + +## Storage +Azure Storage is the baseline durable data service for Azure. It provides blob, table, queue, and VM disk storage. It uses a combination of replication and resource management to provide high availability within a single datacenter. The Azure Storage availability SLA guarantees that at least 99.9 percent of the time: + +* Correctly formatted requests to add, update, read, and delete data will be successfully and correctly processed. +* Storage accounts will have connectivity to the Internet gateway. + +### Replication +Azure Storage facilitates data durability by maintaining multiple copies of all data on different drives across fully independent physical storage subsystems within the region. Data is replicated synchronously, and all copies are committed before the write is acknowledged. Azure Storage is strongly consistent, meaning that reads are guaranteed to reflect the most recent writes. In addition, copies of data are continually scanned to detect and repair bit rot, an often overlooked threat to the integrity of stored data. + +Services benefit from replication just by using Azure Storage. The service developer doesn't need to do additional work to recover from a local failure. + +### Resource management +Storage accounts created after May 2014, can grow to up to 500 TB (the previous maximum was 200 TB). If additional space is required, applications must be designed to use multiple storage accounts. + +### Virtual machine disks +A virtual machine’s disk is stored as a page blob in Azure Storage, giving it all the same durability and scalability properties as Blob storage. This design makes the data on a virtual machine’s disk persistent, even if the server running the VM fails and the VM must be restarted on another server. + +## Database +### SQL Database +Azure SQL Database provides database as a service. It allows applications to quickly provision, insert data into, and query relational databases. It provides many of the familiar SQL Server features and functionality, while abstracting the burden of hardware, configuration, patching, and resiliency. + +> [!NOTE] +> Azure SQL Database does not provide one-to-one feature parity with SQL Server. It's intended to fulfill a different set of requirements--one that's uniquely suited to cloud applications (elastic scale, database as a service to reduce maintenance costs, and so on). For more information, see [Choose a cloud SQL Server option: Azure SQL (PaaS) Database or SQL Server on Azure VMs (IaaS)](/azure/sql-database/sql-database-paas-vs-sql-server-iaas/). +> +> + +#### Replication +Azure SQL Database provides built-in resiliency to node-level failure. All writes into a database are automatically replicated to two or more background nodes through a quorum commit technique. (The primary and at least one secondary must confirm that the activity is written to the transaction log before the transaction is deemed successful and returns.) In the case of node failure, the database automatically fails over to one of the secondary replicas. This causes a transient connection interruption for client applications. For this reason, all Azure SQL Database clients must implement some form of transient connection handling. For more information, see [Retry service specific guidance](/azure/best-practices-retry-service-specific/). + +#### Resource management +Each database, when created, is configured with an upper size limit. The currently available maximum size is 1 TB (size limits vary based on your service tier, see [service tiers and performance levels of Azure SQL Databases](/azure/sql-database/sql-database-resource-limits/#service-tiers-and-performance-levels). When a database hits its upper size limit, it rejects additional INSERT or UPDATE commands. (Querying and deleting data is still possible.) + +Within a database, Azure SQL Database uses a fabric to manage resources. However, instead of a fabric controller, it uses a ring topology to detect failures. Every replica in a cluster has two neighbors and is responsible for detecting when they go down. When a replica goes down, its neighbors trigger a reconfiguration agent to re-create it on another machine. Engine throttling is provided to ensure that a logical server doesn't use too many resources on a machine or exceed the machine’s physical limits. + +### Elasticity +If the application requires more than the 1 TB database limit, it must implement a scale-out approach. You scale out with Azure SQL Database by manually partitioning, also known as sharding, data across multiple SQL databases. This scale-out approach provides the opportunity to achieve nearly linear cost growth with scale. Elastic growth or capacity on demand can grow with incremental costs as needed because databases are billed based on the average actual size used per day, not based on maximum possible size. + +## SQL Server on Virtual Machines +By installing SQL Server (version 2014 or later) on Azure Virtual Machines, you can take advantage of the traditional availability features of SQL Server. These features include AlwaysOn Availability Groups and database mirroring. Note that Azure VMs, storage, and networking have different operational characteristics than an on-premises, non-virtualized IT infrastructure. A successful implementation of a high availability/disaster recovery (HA/DR) SQL Server solution in Azure requires that you understand these differences and design your solution to accommodate them. + +### High-availability nodes in an availability set +When you implement a high-availability solution in Azure, you can use the availability set in Azure to place the high-availability nodes into separate fault domains and upgrade domains. To be clear, the availability set is an Azure concept. It's a best practice that you should follow to make sure that your databases are indeed highly available, whether you're using AlwaysOn Availability Groups, database mirroring, or something else. If you don't follow this best practice, you might be under the false assumption that your system is highly available. But in reality, your nodes can all fail simultaneously because they happen to be placed in the same fault domain in the Azure region. + +This recommendation is not as applicable with log shipping. As a disaster recovery feature, you should ensure that the servers are running in separate Azure regions. By definition, these regions are separate fault domains. + +For Azure Cloud Services VMs deployed through the classic portal to be in the same availability set, you must deploy them in the same Cloud Service. VMs deployed through Azure Resource Manager (the current portal) do not have this limitation. For classic portal deployed VMs in Azure Cloud Service, only nodes in the same Cloud Service can participate in the same availability set. In addition, the Cloud Services VMs should be in the same virtual network to ensure that they maintain their IPs even after service healing. This avoids DNS update disruptions. + +### Azure-only: High-availability solutions +You can have a high-availability solution for your SQL Server databases in Azure by using AlwaysOn Availability Groups or database mirroring. + +The following diagram demonstrates the architecture of AlwaysOn Availability Groups running on Azure Virtual Machines. This diagram was taken from the in-depth article on this subject, [High availability and disaster recovery for SQL Server on Azure Virtual Machines](/azure/virtual-machines/windows/sql/virtual-machines-windows-sql-high-availability-dr/). + +![AlwaysOn Availability Groups in Microsoft Azure](./images/technical-guidance-recovery-local-failures/high_availability_solutions-1.png) + +You can also automatically provision an AlwaysOn Availability Groups deployment end-to-end on Azure VMs by using the AlwaysOn template in the Azure portal. For more information, see [SQL Server AlwaysOn Offering in Microsoft Azure Portal Gallery](https://blogs.technet.microsoft.com/dataplatforminsider/2014/08/25/sql-server-alwayson-offering-in-microsoft-azure-portal-gallery/). + +The following diagram demonstrates the use of database mirroring on Azure Virtual Machines. It was also taken from the in-depth topic [High availability and disaster recovery for SQL Server on Azure Virtual Machines](/azure/virtual-machines/windows/sql/virtual-machines-windows-sql-high-availability-dr/). + +![Database mirroring in Microsoft Azure](./images/technical-guidance-recovery-local-failures/high_availability_solutions-2.png) + +> [!NOTE] +> Both architectures require a domain controller. However, with database mirroring, it's possible to use server certificates to eliminate the need for a domain controller. +> +> + +## Other Azure platform services +Applications that are built on Azure benefit from platform capabilities to recover from local failures. In some cases, you can take specific actions to increase availability for your specific scenario. + +### Service Bus +To mitigate against a temporary outage of Azure Service Bus, consider creating a durable client-side queue. This temporarily uses an alternate, local storage mechanism to store messages that cannot be added to the Service Bus queue. The application can decide how to handle the temporarily stored messages after the service is restored. For more information, see [Best practices for performance improvements using Service Bus brokered messaging](/azure/service-bus-messaging/service-bus-performance-improvements/) and [Service Bus (disaster recovery)](recovery-loss-azure-region.md#other-azure-platform-services). + +### HDInsight +The data that's associated with Azure HDInsight is stored by default in Azure Blob storage. Azure Storage specifies high-availability and durability properties for Blob storage. The multiple-node processing that's associated with Hadoop MapReduce jobs occurs on a transient Hadoop Distributed File System (HDFS) that is provisioned when HDInsight needs it. Results from a MapReduce job are also stored by default in Azure Blob storage, so that the processed data is durable and remains highly available after the Hadoop cluster is deprovisioned. For more information, see [HDInsight (disaster recovery)](recovery-loss-azure-region.md#other-azure-platform-services). + +## Checklists for local failures +### Cloud Services +1. Review the Cloud Services section of this document. +2. Configure at least two instances for each role. +3. Persist state in durable storage, not on role instances. +4. Correctly handle the StatusCheck event. +5. Wrap related changes in transactions when possible. +6. Verify that worker role tasks are idempotent and restartable. +7. Continue to invoke operations until they succeed. +8. Consider autoscaling strategies. + +### Virtual Machines +1. Review the Virtual Machines section of this document. +2. Do not use drive D for persistent storage. +3. Group machines in a service tier into an availability set. +4. Configure load balancing and optional probes. + +### Storage +1. Review the Storage section of this document. +2. Use multiple storage accounts when data or bandwidth exceeds quotas. + +### SQL Database +1. Review the SQL Database section of this document. +2. Implement a retry policy to handle transient errors. +3. Use partitioning/sharding as a scale-out strategy. + +### SQL Server on Virtual Machines +1. Review the SQL Server on Virtual Machines section of this document. +2. Follow the previous recommendations for Virtual Machines. +3. Use SQL Server high availability features, such as AlwaysOn. + +### Service Bus +1. Review the Service Bus section of this document. +2. Consider creating a durable client-side queue as a backup. + +### HDInsight +1. Review the HDInsight section of this document. +2. No additional availability steps are required for local failures. + diff --git a/docs/resiliency/recovery-loss-azure-region.md b/docs/resiliency/recovery-loss-azure-region.md new file mode 100644 index 00000000000..f3d110ebe7f --- /dev/null +++ b/docs/resiliency/recovery-loss-azure-region.md @@ -0,0 +1,180 @@ +--- +title: Recover from loss of an Azure region +description: Article on understanding and designing resilient, highly available, fault tolerant applications as well as planning for disaster recovery +services: '' +documentationcenter: na +author: adamglick +manager: saladki +editor: '' + +ms.assetid: f2f750aa-9305-487e-8c3f-1f8fbc06dc47 +ms.service: resiliency +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: aglick + +--- +[!INCLUDE [header](../_includes/header.md)] +# Azure resiliency technical guidance: recovery from a region-wide service disruption +Azure is divided physically and logically into units called regions. A region consists of one or more datacenters in close proximity. At the time of this writing, Azure has twenty-four regions around the world. + +Under rare circumstances, it is possible that facilities in an entire region can become inaccessible, for example due to network failures. Or facilities can be lost entirely, for example due to a natural disaster. This section explains the capabilities of Azure for creating applications that are distributed across regions. Such distribution helps to minimize the possibility that a failure in one region could affect other regions. + +## Cloud services +### Resource management +You can distribute compute instances across regions by creating a separate cloud service in each target region, and then publishing the deployment package to each cloud service. However, note that distributing traffic across cloud services in different regions must be implemented by the application developer or with a traffic management service. + +Determining the number of spare role instances to deploy in advance for disaster recovery is an important aspect of capacity planning. Having a full-scale secondary deployment ensures that capacity is already available when needed; however, this effectively doubles the cost. A common pattern is to have a small, secondary deployment, just large enough to run critical services. This small secondary deployment is a good idea, both to reserve capacity, and for testing the configuration of the secondary environment. + +> [!NOTE] +> The subscription quota is not a capacity guarantee. The quota is simply a credit limit. To guarantee capacity, the required number of roles must be defined in the service model, and the roles must be deployed. +> +> + +### Load Balancing +To load balance traffic across regions requires a traffic management solution. Azure provides [Azure Traffic Manager](https://azure.microsoft.com/services/traffic-manager/). You can also take advantage of third-party services that provide similar traffic management capabilities. + +### Strategies +Many alternative strategies are available for implementing distributed compute across regions. These must be tailored to the specific business requirements and circumstances of the application. At a high level, the approaches can be divided into the following categories: + +* **Redeploy on disaster**: In this approach the application is redeployed from scratch at the time of disaster. This is appropriate for non-critical applications that don’t require a guaranteed recovery time. +* **Warm Spare (Active/Passive)**: A secondary hosted service is created in an alternate region, and roles are deployed to guarantee minimal capacity; however, the roles don’t receive production traffic. This approach is useful for applications that have not been designed to distribute traffic across regions. +* **Hot Spare (Active/Active)**: The application is designed to receive production load in multiple regions. The cloud services in each region might be configured for higher capacity than required for disaster recovery purposes. Alternatively, the cloud services might scale out as necessary at the time of a disaster and failover. This approach requires substantial investment in application design, but it has significant benefits. These include low and guaranteed recovery time, continuous testing of all recovery locations, and efficient usage of capacity. + +A complete discussion of distributed design is outside the scope of this document. For further information, see [Disaster Recovery and High Availability for Azure Applications](https://aka.ms/drtechguide). + +## Virtual Machines +Recovery of infrastructure as a service (IaaS) virtual machines (VMs) is similar to platform as a service (PaaS) compute recovery in many respects. There are important differences, however, due to the fact that an IaaS VM consists of both the VM and the VM disk. + +* **Use Azure Backup to create cross region backups that are application consistent**. + [Azure Backup](https://azure.microsoft.com/services/backup/) enables customers to create application consistent backups across multiple VM disks, and support replication of backups across regions. You can do this by choosing to geo-replicate the backup vault at the time of creation. Note that replication of the backup vault must be configured at the time of creation. It can't be set later. If a region is lost, Microsoft will make the backups available to customers. Customers will be able to restore to any of their configured restore points. +* **Separate the data disk from the operating system disk**. An important consideration for IaaS VMs is that you cannot change the operating system disk without re-creating the VM. This is not a problem if your recovery strategy is to redeploy after disaster. However, it might be a problem if you are using the Warm Spare approach to reserve capacity. To implement this properly, you must have the correct operating system disk deployed to both the primary and secondary locations, and the application data must be stored on a separate drive. If possible, use a standard operating system configuration that can be provided on both locations. After a failover, you must then attach the data drive to your existing IaaS VMs in the secondary DC. Use AzCopy to copy snapshots of the data disk(s) to a remote site. +* **Be aware of potential consistency issues after a geo-failover of multiple VM Disks**. VM Disks are implemented as Azure Storage blobs, and have the same geo-replication characteristic. Unless [Azure Backup](https://azure.microsoft.com/services/backup/) is used, there are no guarantees of consistency across disks, because geo-replication is asynchronous and replicates independently. Individual VM disks are guaranteed to be in a crash consistent state after a geo-failover, but not consistent across disks. This could cause problems in some cases (for example, in the case of disk striping). + +## Storage +### Recovery by using Geo-Redundant Storage of blob, table, queue and VM disk storage +In Azure, blobs, tables, queues, and VM disks are all geo-replicated by default. This is referred to as Geo-Redundant Storage (GRS). GRS replicates storage data to a paired datacenter hundreds of miles apart within a specific geographic region. GRS is designed to provide additional durability in case there is a major datacenter disaster. Microsoft controls when failover occurs, and failover is limited to major disasters in which the original primary location is deemed unrecoverable in a reasonable amount of time. Under some scenarios, this can be several days. Data is typically replicated within a few minutes, although synchronization interval is not yet covered by a service level agreement. + +In the event of a geo-failover, there will be no change to how the account is accessed (the URL and account key will not change). The storage account will, however, be in a different region after failover. This could impact applications that require regional affinity with their storage account. Even for services and applications that do not require a storage account in the same datacenter, the cross-datacenter latency and bandwidth charges might be compelling reasons to move traffic to the failover region temporarily. This could factor into an overall disaster recovery strategy. + +In addition to automatic failover provided by GRS, Azure has introduced a service that gives you read access to the copy of your data in the secondary storage location. This is called Read-Access Geo-Redundant Storage (RA-GRS). + +For more information about both GRS and RA-GRS storage, see [Azure Storage replication](/azure/storage/storage-redundancy/). + +### Geo-Replication region mappings: +It is important to know where your data is geo-replicated, in order to know where to deploy the other instances of your data that require regional affinity with your storage. For more information see [Azure Paired Regions](/azure/best-practices-availability-paired-regions). + +### Geo-Replication pricing: +Geo-replication is included in current pricing for Azure Storage. This is called Geo-Redundant Storage (GRS). If you do not want your data geo-replicated you can disable geo-replication for your account. This is called Locally Redundant Storage, and it is charged at a discounted price compared to GRS. + +### Determining if a geo-failover has occurred +If a geo-failover occurs, this will be posted to the [Azure Service Health Dashboard](https://azure.microsoft.com/status/). Applications can implement an automated means of detecting this, however, by monitoring the geo-region for their storage account. This can be used to trigger other recovery operations, such as activation of compute resources in the geo-region where their storage moved to. You can perform a query for this from the service management API, by using [Get Storage Account Properties](https://msdn.microsoft.com/library/ee460802.aspx). The relevant properties are: + + primary-region + [Available|Unavailable] + DateTime + secondary-region + [Available|Unavailable] + +### VM disks and geo-failover +As discussed in the section on VM disks, there are no guarantees for data consistency across VM disks after a failover. To ensure correctness of backups, a backup product such as Data Protection Manager should be used to back up and restore application data. + +## Database +### SQL Database +Azure SQL Database provides two types of recovery: Geo-Restore and Active Geo-Replication. + +#### Geo-Restore +[Geo-Restore](/azure/sql-database/sql-database-recovery-using-backups/#geo-restore) is also available with Basic, Standard, and Premium databases. It provides the default recovery option when the database is unavailable because of an incident in the region where your database is hosted. Similar to Point-In-Time Restore, Geo-Restore relies on database backups in geo-redundant Azure storage. It restores from the geo-replicated backup copy, and therefore is resilient to the storage outages in the primary region. For more details, see [Restore an Azure SQL Database or failover to a secondary](/azure/sql-database/sql-database-disaster-recovery/). + +#### Active Geo-Replication +[Active Geo-Replication](/azure/sql-database/sql-database-geo-replication-overview/) is available for all database tiers. It’s designed for applications that have more aggressive recovery requirements than Geo-Restore can offer. Using Active Geo-Replication, you can create up to four readable secondaries on servers in different regions. You can initiate failover to any of the secondaries. In addition, Active Geo-Replication can be used to support the application upgrade or relocation scenarios, as well as load balancing for read-only workloads. For details, see [configure Geo-Replication](/azure/sql-database/sql-database-geo-replication-portal/) and to [fail over to the secondary database](/azure/sql-database/sql-database-geo-replication-failover-portal/). Refer to [Design an application for cloud disaster recovery using Active Geo-Replication in SQL Database](/azure/sql-database/sql-database-designing-cloud-solutions-for-disaster-recovery/) and [Managing rolling upgrades of cloud applications using SQL Database Active Geo-Replication](/azure/sql-database/sql-database-manage-application-rolling-upgrade/) for details on how to design and implement applications and applications upgrade without downtime. + +### SQL Server on Virtual Machines +A variety of options are available for recovery and high availability for SQL Server 2012 (and later) running in Azure Virtual Machines. For more information, see [High availability and disaster recovery for SQL Server in Azure Virtual Machines](/azure/virtual-machines/windows/sql/virtual-machines-windows-sql-high-availability-dr/). + +## Other Azure platform services +When attempting to run your cloud service in multiple Azure regions, you must consider the implications for each of your dependencies. In the following sections, the service-specific guidance assumes that you must use the same Azure service in an alternate Azure datacenter. This involves both configuration and data-replication tasks. + +> [!NOTE] +> In some cases, these steps can help to mitigate a service-specific outage rather than an entire datacenter event. From the application perspective, a service-specific outage might be just as limiting and would require temporarily migrating the service to an alternate Azure region. +> +> + +### Service Bus +Azure Service Bus uses a unique namespace that does not span Azure regions. So the first requirement is to setup the necessary service bus namespaces in the alternate region. However, there are also considerations for the durability of the queued messages. There are several strategies for replicating messages across Azure regions. For the details on these replication strategies and other disaster recovery strategies, see [Best practices for insulating applications against Service Bus outages and disasters](/azure/service-bus-messaging/service-bus-outages-disasters/). For other availability considerations, see [Service Bus (Availability)](recovery-local-failures.md#other-azure-platform-services). + +### App Service +To migrate an Azure App Service application, such as Web Apps or Mobile Apps, to a secondary Azure region, you must have a backup of the website available for publishing. If the outage does not involve the entire Azure datacenter, it might be possible to use FTP to download a recent backup of the site content. Then create a new app in the alternate region, unless you have previously done this to reserve capacity. Publish the site to the new region, and make any necessary configuration changes. These changes could include database connection strings or other region-specific settings. If necessary, add the site’s SSL certificate and change the DNS CNAME record so that the custom domain name points to the redeployed Azure Web App URL. + +### HDInsight +The data associated with HDInsight is stored by default in Azure Blob Storage. HDInsight requires that a Hadoop cluster processing MapReduce jobs must be co-located in the same region as the storage account that contains the data being analyzed. Provided you use the geo-replication feature available to Azure Storage, you can access your data in the secondary region where the data was replicated if for some reason the primary region is no longer available. You can create a new Hadoop cluster in the region where the data has been replicated and continue processing it. For other availability considerations, see [HDInsight (Availability)](recovery-local-failures.md#other-azure-platform-services). + +### SQL Reporting +At this time, recovering from the loss of an Azure region requires multiple SQL Reporting instances in different Azure regions. These SQL Reporting instances should access the same data, and that data should have its own recovery plan in the event of a disaster. You can also maintain external backup copies of the RDL file for each report. + +### Media Services +Azure Media Services has a different recovery approach for encoding and streaming. Typically, streaming is more critical during a regional outage. To prepare for this, you should have a Media Services account in two different Azure regions. The encoded content should be located in both regions. During a failure, you can redirect the streaming traffic to the alternate region. Encoding can be performed in any Azure region. If encoding is time-sensitive, for example during live event processing, you must be prepared to submit jobs to an alternate datacenter during failures. + +### Virtual network +Configuration files provide the quickest way to set up a virtual network in an alternate Azure region. After configuring the virtual network in the primary Azure region, [export the virtual network settings](/azure/virtual-network/virtual-networks-create-vnet-classic-portal/) for the current network to a network configuration file. In the event of an outage in the primary region, [restore the virtual network](/azure/virtual-network/virtual-networks-create-vnet-classic-portal/) from the stored configuration file. Then configure other cloud services, virtual machines, or cross-premises settings to work with the new virtual network. + +## Checklists for disaster recovery +## Cloud Services checklist +1. Review the Cloud Services section of this document. +2. Create a cross-region disaster recovery strategy. +3. Understand trade-offs in reserving capacity in alternate regions. +4. Use traffic routing tools, such as Azure Traffic Manager. + +## Virtual Machines checklist +1. Review the Virtual Machines section of this document. +2. Use [Azure Backup](https://azure.microsoft.com/services/backup/) to create application consistent backups across regions. + +## Storage checklist +1. Review the Storage section of this document. +2. Do not disable geo-replication of storage resources. +3. Understand alternate region for geo-replication in the event of failover. +4. Create custom backup strategies for user-controlled failover strategies. + +## SQL Database checklist +1. Review the SQL Database section of this document. +2. Use [Geo-Restore](/azure/sql-database/sql-database-recovery-using-backups/#geo-restore) or [Geo-Replication](/azure/sql-database/sql-database-geo-replication-overview/) as appropriate. + +## SQL Server on Virtual Machines checklist +1. Review the SQL Server on Virtual Machines section of this document. +2. Use cross-region AlwaysOn Availability Groups or database mirroring. +3. Alternately use backup and restore to blob storage. + +## Service Bus checklist +1. Review the Service Bus section of this document. +2. Configure a Service Bus namespace in an alternate region. +3. Consider custom replication strategies for messages across regions. + +## App Service checklist +1. Review the App Service section of this document. +2. Maintain site backups outside of the primary region. +3. If outage is partial, attempt to retrieve current site with FTP. +4. Plan to deploy the site to new or existing site in an alternate region. +5. Plan configuration changes for both application and DNS CNAME records. + +## HDInsight checklist +1. Review the HDInsight section of this document. +2. Create a new Hadoop cluster in the region with replicated data. + +## SQL Reporting checklist +1. Review the SQL Reporting section of this document. +2. Maintain an alternate SQL Reporting instance in a different region. +3. Maintain a separate plan to replicate the target to that region. + +## Media Services checklist +1. Review the Media Services section of this document. +2. Create a Media Services account in an alternate region. +3. Encode the same content in both regions to support streaming failover. +4. Submit encoding jobs to an alternate region in the event of a service disruption. + +## Virtual Network checklist +1. Review the Virtual Network section of this document. +2. Use exported virtual network settings to re-create it in another region. + diff --git a/docs/resiliency/recovery-on-premises-azure.md b/docs/resiliency/recovery-on-premises-azure.md new file mode 100644 index 00000000000..3a369b2b4fa --- /dev/null +++ b/docs/resiliency/recovery-on-premises-azure.md @@ -0,0 +1,94 @@ +--- +title: 'Technical guidance: Recovery from on-premises to Azure' +description: Article on understanding and designing recovery systems from on-premises infrastructure to Azure +services: '' +documentationcenter: na +author: adamglick +manager: saladki +editor: '' + +ms.assetid: 114c29c9-453f-4a54-8d0f-d77c5ad47d60 +ms.service: resiliency +ms.devlang: na +ms.topic: article +ms.tgt_pltfrm: na +ms.workload: na +ms.date: 08/18/2016 +ms.author: aglick + +--- +[!INCLUDE [header](../_includes/header.md)] +# Azure resiliency technical guidance: Recovery from on-premises to Azure +Azure provides a comprehensive set of services for enabling the extension of an on-premises datacenter to Azure for high availability and disaster recovery purposes: + +* **Networking**: With a virtual private network, you securely extend your on-premises network to the cloud. +* **Compute**: Customers using Hyper-V on-premises can “lift and shift” existing virtual machines (VMs) to Azure. +* **Storage**: StorSimple extends your file system to Azure Storage. The Azure Backup service provides backup for files and SQL databases to Azure Storage. +* **Database replication**: With SQL Server 2014 (or later) Availability Groups, you can implement high availability and disaster recovery for your on-premises data. + +## Networking +You can use Azure Virtual Network to create a logically isolated section in Azure and securely connect it to your on-premises datacenter or a single client machine by using an IPsec connection. With Virtual Network, you can take advantage of the scalable, on-demand infrastructure in Azure while providing connectivity to data and applications on-premises, including systems running on Windows Server, mainframes, and UNIX. See [Azure networking documentation](/azure/virtual-network/virtual-networks-overview/) for more information. + +## Compute +If you're using Hyper-V on-premises, you can “lift and shift” existing virtual machines to Azure and service providers running Windows Server 2012 (or later), without making changes to the VM or converting VM formats. For more information, see [About disks and VHDs for Azure virtual machines](/azure/virtual-machines/virtual-machines-linux-about-disks-vhds/?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json). + +## Azure Site Recovery +If you want disaster recovery as a service (DRaaS), Azure provides [Azure Site Recovery](https://azure.microsoft.com/services/site-recovery/). Azure Site Recovery offers comprehensive protection for VMware, Hyper-V, and physical servers. With Azure Site Recovery, you can use another on-premises server or Azure as your recovery site. For more information on Azure Site Recovery, see the [Azure Site Recovery documentation](https://azure.microsoft.com/documentation/services/site-recovery/). + +## Storage +There are several options for using Azure as a backup site for on-premises data. + +### StorSimple +StorSimple securely and transparently integrates cloud storage for on-premises applications. It also offers a single appliance that delivers high-performance tiered local and cloud storage, live archiving, cloud-based data protection, and disaster recovery. For more information, see the [StorSimple product page](https://azure.microsoft.com/services/storsimple/). + +### Azure Backup +Azure Backup enables cloud backups by using the familiar backup tools in Windows Server 2012 (or later), Windows Server 2012 Essentials (or later), and System Center 2012 Data Protection Manager (or later). These tools provide a workflow for backup management that is independent of the storage location of the backups, whether a local disk or Azure Storage. After data is backed up to the cloud, authorized users can easily recover backups to any server. + +With incremental backups, only changes to files are transferred to the cloud. This helps to efficiently use storage space, reduce bandwidth consumption, and support point-in-time recovery of multiple versions of the data. You can also choose to use additional features, such as data retention policies, data compression, and data transfer throttling. Using Azure as the backup location has the obvious advantage that the backups are automatically “offsite”. This eliminates the extra requirements to secure and protect on-site backup media. + +For more information, see [What is Azure Backup?](/azure/backup/backup-introduction-to-azure-backup/) and [Configure Azure Backup for DPM data](https://technet.microsoft.com/library/jj728752.aspx). + +## Database +You can have a disaster recovery solution for your SQL Server databases in a hybrid-IT environment by using AlwaysOn Availability Groups, database mirroring, log shipping, and backup and restore with Azure Blob storage. All of these solutions use SQL Server running on Azure Virtual Machines. + +AlwaysOn Availability Groups can be used in a hybrid-IT environment where database replicas exist both on-premises and in the cloud. This is shown in the following diagram. + +![SQL Server AlwaysOn Availability Groups in a hybrid cloud architecture](./images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-3.png) + +Database mirroring can also span on-premises servers and the cloud in a certificate-based setup. The following diagram illustrates this concept. + +![SQL Server database mirroring in a hybrid cloud architecture](./images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-4.png) + +Log shipping can be used to synchronize an on-premises database with a SQL Server database in an Azure virtual machine. + +![SQL Server log shipping in a hybrid cloud architecture](./images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-5.png) + +Finally, you can back up an on-premises database directly to Azure Blob storage. + +![Back up SQL Server to Azure Blob storage in a hybrid cloud architecture](./images/technical-guidance-recovery-on-premises-azure/SQL_Server_Disaster_Recovery-6.png) + +For more information, see [High availability and disaster recovery for SQL Server in Azure virtual machines](/azure/virtual-machines/windows/sql/virtual-machines-windows-sql-high-availability-dr/) and [Backup and restore for SQL Server in Azure virtual machines](/azure/virtual-machines/windows/sql/virtual-machines-windows-sql-backup-recovery/). + +## Checklists for on-premises recovery in Microsoft Azure +### Networking +1. Review the Networking section of this document. +2. Use Virtual Network to securely connect on-premises to the cloud. + +### Compute +1. Review the Compute section of this document. +2. Relocate VMs between Hyper-V and Azure. + +### Storage +1. Review the Storage section of this document. +2. Take advantage of StorSimple services for using cloud storage. +3. Use the Azure Backup service. + +### Database +1. Review the Database section of this document. +2. Consider using SQL Server on Azure VMs as the backup. +3. Set up AlwaysOn Availability Groups. +4. Configure certificate-based database mirroring. +5. Use log shipping. +6. Back up on-premises databases to Azure Blob storage. + + diff --git a/docs/resiliency/toc.md b/docs/resiliency/toc.md new file mode 100644 index 00000000000..e5e04fd3557 --- /dev/null +++ b/docs/resiliency/toc.md @@ -0,0 +1,14 @@ +# Design for Resiliency +## [Designing resilient applications](./index.md) +## [Resiliency checklist](../checklist/resiliency.md?toc=/azure/architecture/resiliency/toc.json) +## [Failure mode analysis](./failure-mode-analysis.md) + +## Additional guidance +### [Disaster recovery](./disaster-recovery-azure-applications.md) +### [Disaster recovery and high availability](./disaster-recovery-high-availability-azure-applications.md) +### [High availability](./high-availability-azure-applications.md) +### [High availability checklist](./high-availability-checklist.md) +### [Recovery from data corruption or accidental deletion](./recovery-data-corruption.md) +### [Recovery from local failures](./recovery-local-failures.md) +### [Recovery from a region-wide service disruption](./recovery-loss-azure-region.md) +### [Recovery from on-premises to Azure](./recovery-on-premises-azure.md) diff --git a/docs/service-fabric/images/node-placement.png b/docs/service-fabric/images/node-placement.png new file mode 100644 index 00000000000..cca485007f2 Binary files /dev/null and b/docs/service-fabric/images/node-placement.png differ diff --git a/docs/service-fabric/images/tailspin-cluster.png b/docs/service-fabric/images/tailspin-cluster.png new file mode 100644 index 00000000000..71094e0e3a1 Binary files /dev/null and b/docs/service-fabric/images/tailspin-cluster.png differ diff --git a/docs/service-fabric/images/tailspin01.png b/docs/service-fabric/images/tailspin01.png new file mode 100644 index 00000000000..a9e199b9603 Binary files /dev/null and b/docs/service-fabric/images/tailspin01.png differ diff --git a/docs/service-fabric/images/tailspin02.png b/docs/service-fabric/images/tailspin02.png new file mode 100644 index 00000000000..eb424345fb8 Binary files /dev/null and b/docs/service-fabric/images/tailspin02.png differ diff --git a/docs/service-fabric/migrate-from-cloud-services.md b/docs/service-fabric/migrate-from-cloud-services.md new file mode 100644 index 00000000000..cbd18d408c0 --- /dev/null +++ b/docs/service-fabric/migrate-from-cloud-services.md @@ -0,0 +1,326 @@ + +# Migrating an Azure Cloud Services application to Azure Service Fabric + +[![GitHub](../_images/github.png) Sample code][sample-code] + + +This article describes migrating an application from Azure Cloud Services to Azure Service Fabric. It focuses on architectural decisions and recommended practices. + +For this project, we started with a Cloud Services application called Surveys and ported it to Service Fabric. The goal was to migrate the application with as few changes as possible. In a later article, we will optimize the application for Service Fabric by adopting a microservices architecture. + +Before reading this article, it will be useful to understand the basics of Service Fabric and microservices architectures in general. See the following articles: + +- [Overview of Azure Service Fabric][sf-overview] +- [Why a microservices approach to building applications?][sf-why-microservices] + + +## About the Surveys application + +In 2012, the patterns & practices group created an application called Surveys, for a book called [Developing Multi-tenant Applications for the Cloud][tailspin-book]. The book describes a fictitious company named Tailspin that designs and implements the Surveys application. + +Surveys is a multitenant application that allows customers to create surveys. After a customer signs up for the application, members of the customer's organization can create and publish surveys, and collect the results for analysis. The application includes a public website where people can take a survey. Read more about the original Tailspin scenario [here][tailspin-scenario]. + +Now Tailspin wants to move the Surveys application to a microservices architecture, using Service Fabric running on Azure. Because the application is already deployed as a Cloud Services application, Tailspin adopts a multi-phase approach: + +1. Port the cloud services to Service Fabric, while minimizing changes to the application. +2. Optimize the application for Service Fabric, by moving to a microservices architecture. + +This article describes the first phase. A later article will describe the second phase. In a real-world project, it's likely that both stages would overlap. While porting to Service Fabric, you would also start to re-architect the application into micro-services. Later you might refine the architecture further, perhaps dividing coarse-grained services into smaller services. + +The application code is available on [GitHub][sample-code]. This repo contains both the Cloud Services application and the Service Fabric version. + +> The cloud service is an updated version of the original application from the *Developing Multi-tenant Applications* book. + +## Why Microservices? + +An in-depth discussion of microservices is beyond scope of this article, but here are some of the benefits that Tailspin hopes to get by moving to a microservices architecture: + +- **Application upgrades**. Services can be deployed independently, so you can take an incremental approach to upgrading an application. +- **Resiliency and fault isolation**. If a service fails, other services continue to run. +- **Scalability**. Services can be scaled independently. +- **Flexibility**. Services are designed around business scenarios, not technology stacks, making it easier to migrate services to new technologies, frameworks, or data stores. +- **Agile development**. Individual services have less code than a monolithic application, making the code base easier to understand, reason about, and test. +- **Small, focused teams**. Because the application is broken down into many small services, each service can be built by a small focused team. + + +## Why Service Fabric? + +Service Fabric is a good fit for a microservices architecture, because most of the features needed in a distributed system are built into Service Fabric, including: + +- **Cluster management**. Service Fabric automatically handles node failover, health monitoring, and other cluster management function. +- **Horizontal scaling**. When you add nodes to a Service Fabric cluster, the application automatically scales, as services are distributed across the new nodes. +- **Service discovery**. Service Fabric provides a discovery service that can resolve the endpoint for a named service. +- **Stateless and stateful services**. Stateful services use [reliable collections][sf-reliable-collections], which can take the place of a cache or queue, and can be partitioned. +- **Application lifecycle management**. Services can be upgraded independently and without application downtime. +- **Service orchestration** across a cluster of machines. +- **Higher density** for optimizing resource consumption. A single node can host multiple services. + +Service Fabric is used by various Microsoft services, including Azure SQL Database, DocumentDB, Azure Event Hubs, and others, making it a proven platform for building distributed cloud applications. + +## Comparing Cloud Services with Service Fabric + +The following table summarizes some of the important differences between Cloud Services and Service Fabric applications. For a more in-depth discussion, see [Learn about the differences between Cloud Services and Service Fabric before migrating applications][sf-compare-cloud-services]. + +| | Cloud Services | Service Fabric | +|--------|---------------|----------------| +| Application composition | Roles| Services | +| Density |One role instance per VM | Multiple services in a single node | +| Minimum number of nodes | 2 per role | 5 per cluster, for production deployments | +| State management | Stateless | Stateless or stateful* | +| Hosting | Azure | Cloud or on-premises | +| Web hosting | IIS** | Self-hosting | +| Deployment model | [Classic deployment model][azure-deployment-models] | [Resource Manager][azure-deployment-models] | +| Packaging | Cloud service package files (.cspkg) | Application and service packages | +| Application update | VIP swap or rolling update | Rolling update | +| Auto-scaling | [Built-in service][cloud-service-autoscale] | VM Scale Sets for auto scale out | +| Debugging | Local emulator | Local cluster | + + +\* Stateful services use [reliable collections][sf-reliable-collections] to store state across replicas, so that all reads are local to the nodes in the cluster. Writes are replicated across nodes for reliability. Stateless services can have external state, using a database or other external storage. + +** Worker roles can also self-host ASP.NET Web API using OWIN. + +## The Surveys application on Cloud Services + +The following diagram shows the architecture of the Surveys application running on Cloud Services. + +![](./images/tailspin01.png) + +The application consists of two web roles and a worker role. + +- The **Tailspin.Web** web role hosts an ASP.NET website that Tailspin customers use to create and manage surveys. Customers also use this website to sign up for the application and manage their subscriptions. Finally, Tailspin administrators can use it to see the list of tenants and manage tenant data. + +- The **Tailspin.Web.Survey.Public** web role hosts an ASP.NET website where people can take the surveys that Tailspin customers publish. + +- The **Tailspin.Workers.Survey** worker role does background processing. The web roles put work items onto a queue, and the worker role processes the items. Two background tasks are defined: Exporting survey answers to Azure SQL Database, and calculating statistics for survey answers. + +In addition to Cloud Services, the Surveys application uses some other Azure services: + +- **Azure Storage** to store surveys, surveys answers, and tenant information. + +- **Azure Redis Cache** to cache some of the data that is stored in Azure Storage, for faster read access. + +- **Azure Active Directory** (Azure AD) to authenticate customers and Tailspin administrators. + +- **Azure SQL Database** to store the survey answers for analysis. + +## Moving to Service Fabric + +As mentioned, the goal of this phase was migrating to Service Fabric with the minimum necessary changes. To that end, we created stateless services corresponding to each cloud service role in the original application: + +![](./images/tailspin02.png) + +Intentionally, this architecture is very similar to the original application. However, the diagram hides some important differences. In the rest of this article, we'll explore those differences. + + +## Converting the cloud service roles to services + +As mentioned, we migrated each cloud service role to a Service Fabric service. Because cloud service roles are stateless, for this phase it made sense to create stateless services in Service Fabric. + +For the migration, we followed the steps outlined in [Guide to converting Web and Worker Roles to Service Fabric stateless services][sf-migration]. + +### Creating the web front-end services + +In Service Fabric, a service runs inside a process created by the Service Fabric runtime. For a web front end, that means the service is not running inside IIS. Instead, the service must host a web server. This approach is called *self-hosting*, because the code that runs inside the process acts as the web server host. + +The requirement to self-host means that a Service Fabric service can't use ASP.NET MVC or ASP.NET Web Forms, because those frameworks require IIS and do not support self-hosting. Options for self-hosting include: + +- [ASP.NET Core][aspnet-core], self-hosted using the [Kestrel][kestrel] web server. +- [ASP.NET Web API][aspnet-webapi], self-hosted using [OWIN][owin]. +- Third-party frameworks such as [Nancy](http://nancyfx.org/). + +The original Surveys application uses ASP.NET MVC. Because ASP.NET MVC cannot be self-hosted in Service Fabric, we considered the following migration options: + +- Port the web roles to ASP.NET Core, which can be self-hosted. +- Convert the web site into a single-page application (SPA) that calls a web API implemented using ASP.NET Web API. This would have required a complete redesign of the web front end. +- Keep the existing ASP.NET MVC code and deploy IIS in a Windows Server container to Service Fabric. This approach would require little or no code change. However, [container support][sf-containers] in Service Fabric is currently still in preview. + +Based on these considerations, we selected the first option, porting to ASP.NET Core. To do so, we followed the steps described in [Migrating From ASP.NET MVC to ASP.NET Core MVC][aspnet-migration]. + +> [!NOTE] +> When using ASP.NET Core with Kestrel, you should place a reverse proxy in front of Kestrel to handle traffic from the Internet, for security reasons. For more information, see [Kestrel web server implementation in ASP.NET Core][kestrel]. The section [Deploying the application](#deploying-the-application) describes a recommended Azure deployment. + +### HTTP listeners + +In Cloud Services, a web or worker role exposes an HTTP endpoint by declaring it in the [service definition file][cloud-service-endpoints]. A web role must have at least one endpoint. + +```xml + + + + +``` + +Similarly, Service Fabric endpoints are declared in a service manifest: + +```xml + + + + +``` + +Unlike a cloud service role, however, Service Fabric services can be co-located within the same node. Therefore, every service must listen on a distinct port. Later in this article, we'll discuss how client requests on port 80 or port 443 get routed to the correct port for the service. + +A service must explicitly create listeners for each endpoint. The reason is that Service Fabric is agnostic about communication stacks. For more information, see [Build a web service front end for your application using ASP.NET Core][sf-aspnet-core]. + +## Packaging and configuration + + A cloud service contains the following configuration and package files: + +| File | Description | +|------|-------------| +| Service definition (.csdef) | Settings used by Azure to configure the cloud service. Defines the roles, endpoints, startup tasks, and the names of configuration settings. | +| Service configuration (.cscfg) | Per-deployment settings, including the number of role instances, endpoint port numbers, and the values of configuration settings. +| Service package (.cspkg) | Contains the application code and configurations, and the service definition file. | + +There is one .csdef file for the entire application. You can have multiple .cscfg files for different environments, such as local, test, or production. When the service is running, you can update the .cscfg but not the .csdef. For more information, see [What is the Cloud Service model and how do I package it?][cloud-service-config] + +Service Fabric has a similar division between a service *definition* and service *settings*, but the structure is more granular. To understand Service Fabric's configuration model, it helps to understand how a Service Fabric application is packaged. Here is the structure: + +``` +Application package + - Service packages + - Code package + - Configuration package + - Data package (optional) +``` + +The application package is what you deploy. It contains one or more service packages. A service package contains code, configuration, and data packages. The code package contains the binaries for the services, and the configuration package contains configuration settings. This model allows you to upgrade individual services without redeploying the entire application. It also lets you update just the configuration settings, without redeploying the code or restarting the service. + +A Service Fabric application contains the following configuration files: + +| File | Location | Description | +|------|----------|-------------| +| ApplicationManifest.xml | Application package | Defines the services that compose the application. | +| ServiceManifest.xml | Service package| Describes one or more services. | +| Settings.xml | Configuration package | Contains configuration settings for the services defined in the service package. | + +For more information, see [Model an application in Service Fabric][sf-application-model]. + +To support different configuration settings for multiple environments, use the following approach, described in [Manage application parameters for multiple environments][sf-multiple-environments]: + +1. Define the setting in the Setting.xml file for the service. +2. In the application manifest, define an override for the setting. +3. Put environment-specific settings into application parameter files. + + +## Deploying the application + +Whereas Azure Cloud Services is a managed service, Service Fabric is a runtime. You can create Service Fabric clusters in many environments, including Azure and on premises. In this article, we focus on deploying to Azure. + +The following diagram shows a recommended deployment: + +![](./images/tailspin-cluster.png) + +The Service Fabric cluster is deployed to a [VM scale set][vm-scale-sets]. Scale sets are an Azure Compute resource that can be used to deploy and manage a set of identical VMs. + +As mentioned, the Kestrel web server requires a reverse proxy for security reasons. This diagram shows [Azure Application Gateway][application-gateway], which is an Azure service that offers various layer 7 load balancing capabilities. It acts as a reverse-proxy service, terminating the client connection and forwarding requests to back-end endpoints. You might use a different reverse proxy solution, such as nginx. + +### Layer 7 routing + +In the [original Surveys application](https://msdn.microsoft.com/en-us/library/hh534477.aspx#sec21), one web role listened on port 80, and the other web role listened on port 443. + +| Public site | Survey management site | +|-------------|------------------------| +| `http://tailspin.cloudapp.net` | `https://tailspin.cloudapp.net` | + +Another option is to use layer 7 routing. In this approach, different URL paths get routed to different port numbers on the back end. For example, the public site might use URL paths starting with `/public/`. + +Options for layer 7 routing include: + +- Use Application Gateway. + +- Use a network virtual appliance (NVA), such as nginx. + +- Write a custom gateway as a stateless service. + +Consider this approach if you have two or more services with public HTTP endpoints, but want them to appear as one site with a single domain name. + +> One approach that we *don't* recommend is allowing external clients to send requests through the Service Fabric [reverse proxy][sf-reverse-proxy]. Although this is possible, the reverse proxy is intended for service-to-service communication. Opening it to external clients exposes *any* service running in the cluster that has an HTTP endpoint. + +### Node types and placement constraints + +In the deployment shown above, all the services run on all the nodes. However, you can also group services, so that certain services run only on particular nodes within the cluster. Reasons to use this approach include: + +- Run some services on different VM types. For example, some services might be compute-intensive or require GPUs. You can have a mix of VM types in your Service Fabric cluster. +- Isolate front-end services from back-end services, for security reasons. All the front-end services will run on one set of nodes, and the back-end services will run on different nodes in the same cluster. +- Different scale requirements. Some services might need to run on more nodes than other services. For example, if you define front-end nodes and back-end nodes, each set can be scaled independently. + +The following diagram shows a cluster that separates front-end and back-end services: + +![](././images/node-placement.png) + +To implement this approach: + +1. When you create the cluster, define two or more node types. +2. For each service, use [placement constraints][sf-placement-constraints] to assign the service to a node type. + +When you deploy to Azure, each node type is deployed to a separate VM scale set. The +Service Fabric cluster spans all node types. For more information, see [The relationship between Service Fabric node types and Virtual Machine Scale Sets][sf-node-types]. + +> If a cluster has multiple node types, one node type is designated as the *primary* node type. Service Fabric runtime services, such as the Cluster Management Service, run on the primary node type. Provision at least 5 nodes for the primary node type in a production environment. The other node type should have at least 2 nodes. + +## Configuring and managing the cluster + +Clusters must be secured to prevent unauthorized users from connecting to your cluster. It is recommended to use Azure AD to authenticate clients, and X.509 certificates for node-to-node security. For more information, see [Service Fabric cluster security scenarios][sf-security]. + +To configure a public HTTPS endpoint, see [Specify resources in a service manifest][sf-manifest-resources]. + +You can scale out the application by adding VMs to the cluster. VM scale sets support auto-scaling using auto-scale rules based on performance counters. For more information, see [Scale a Service Fabric cluster in or out using auto-scale rules][sf-auto-scale]. + +While the cluster is running, you should collect logs from all the nodes in a central location. For more information, see [Collect logs by using Azure Diagnostics][sf-logs]. + + +## Conclusion + +Porting the Surveys application to Service Fabric was fairly straightforward. To summarize, we did the following: + +- Converted the roles to stateless services. +- Converted the web front ends to ASP.NET Core. +- Changed the packaging and configuration files to the Service Fabric model. + +In addition, the deployment changed from Cloud Services to a Service Fabric cluster running in a VM Scale Set. + +However, at this point the application does not get all the benefits of microservices, such as independent service deployment and versioning. To take full advantage of Service Fabric, Tailspin needs to optimize a bit further. That will be the subject of another article. + + + + + +[application-gateway]: /azure/application-gateway/ +[aspnet-core]: /aspnet/core/ +[aspnet-webapi]: https://www.asp.net/web-api +[aspnet-migration]: /aspnet/core/migration/mvc +[aspnet-hosting]: /aspnet/core/fundamentals/hosting +[aspnet-webapi]: https://www.asp.net/web-api +[azure-deployment-models]: /azure/azure-resource-manager/resource-manager-deployment-model +[cloud-service-autoscale]: /azure/cloud-services/cloud-services-how-to-scale-portal +[cloud-service-config]: /azure/cloud-services/cloud-services-model-and-package +[cloud-service-endpoints]: /azure/cloud-services/cloud-services-enable-communication-role-instances#worker-roles-vs-web-roles +[kestrel]: https://docs.microsoft.com/aspnet/core/fundamentals/servers/kestrel +[lb-probes]: /azure/load-balancer/load-balancer-custom-probe-overview +[owin]: https://www.asp.net/aspnet/overview/owin-and-katana +[sample-code]: https://github.com/mspnp/ServiceFabricGuidance +[sf-application-model]: /azure/service-fabric/service-fabric-application-model +[sf-aspnet-core]: /azure/service-fabric/service-fabric-add-a-web-frontend +[sf-auto-scale]: /azure/service-fabric/service-fabric-cluster-scale-up-down +[sf-compare-cloud-services]: /azure/service-fabric/service-fabric-cloud-services-migration-differences +[sf-connect-and-communicate]: /azure/service-fabric/service-fabric-connect-and-communicate-with-services +[sf-containers]: /azure/service-fabric/service-fabric-containers-overview +[sf-logs]: /azure/service-fabric/service-fabric-diagnostics-how-to-setup-wad +[sf-manifest-resources]: /azure/service-fabric/service-fabric-service-manifest-resources +[sf-migration]: /azure/service-fabric/service-fabric-cloud-services-migration-worker-role-stateless-service +[sf-multiple-environments]: /azure/service-fabric/service-fabric-manage-multiple-environment-app-configuration +[sf-node-types]: /azure/service-fabric/service-fabric-cluster-nodetypes +[sf-overview]: /azure/service-fabric/service-fabric-overview +[sf-placement-constraints]: /azure/service-fabric/service-fabric-cluster-resource-manager-cluster-description +[sf-reliable-collections]: /azure/service-fabric/service-fabric-reliable-services-reliable-collections +[sf-reliable-services]: /azure/service-fabric/service-fabric-reliable-services-introduction +[sf-reverse-proxy]: /azure/service-fabric/service-fabric-reverseproxy +[sf-security]: /azure/service-fabric/service-fabric-cluster-security +[sf-why-microservices]: /azure/service-fabric/service-fabric-overview-microservices +[tailspin-book]: https://msdn.microsoft.com/en-us/library/ff966499.aspx +[tailspin-scenario]: https://msdn.microsoft.com/en-us/library/hh534482.aspx +[unity]: https://msdn.microsoft.com/en-us/library/ff647202.aspx +[vm-scale-sets]: /azure/virtual-machine-scale-sets/virtual-machine-scale-sets-overview diff --git a/docs/toc.md b/docs/toc.md new file mode 100644 index 00000000000..c1e9efc63a5 --- /dev/null +++ b/docs/toc.md @@ -0,0 +1,42 @@ +# [Reference Architectures](./blueprints/index.md) +## [Identity management](./blueprints/identity/index.md) +## [Hybrid network](./blueprints/hybrid-networking/index.md) +## [Network DMZ](./blueprints/dmz/index.md) +## [VM workloads (Linux)](./blueprints/virtual-machines-linux/index.md) +## [VM workloads (Windows)](./blueprints/virtual-machines-windows/index.md) +## [Web application with managed services](./blueprints/managed-web-app/index.md) + +# [Cloud Design Patterns](./patterns/index.md) + +# Best Practices +## [API design](./best-practices/api-design.md) +## [API implementation](./best-practices/api-implementation.md) +## [Autoscaling](./best-practices/auto-scaling.md) +## [Background jobs](./best-practices/background-jobs.md) +## [Caching](./best-practices/caching.md) +## [Content Delivery Network](./best-practices/cdn.md) +## [Data partitioning](./best-practices/data-partitioning.md) +## [Monitoring and diagnostics](./best-practices/monitoring.md) +## [Naming conventions](./best-practices/naming-conventions.md) +## [Transient fault handling](./best-practices/transient-faults.md) +## [Retry guidance for specific services](./best-practices/retry-service-specific.md) + +# Design Review Checklists +## [Availability](./checklist/availability.md) +## [Resiliency](./checklist/resiliency.md) +## [Scalability](./checklist/scalability.md) + +# Design for Resiliency +## [Designing resilient applications](./resiliency/index.md) +## [Resiliency checklist](./checklist/resiliency.md) +## [Failure mode analysis](./resiliency/failure-mode-analysis.md) + +# Scenario guides +## [Azure for AWS Professionals](./aws-professional/index.md) +## [Manage Identity in Multitenant Applications](./multitenant-identity/index.md) +## [Migrate from Cloud Services to Service Fabric](./service-fabric/migrate-from-cloud-services.md) +## [Run Elasticsearch on Azure](./elasticsearch/index.md) + +# Blogs +## [AzureCAT](https://blogs.msdn.microsoft.com/azurecat/) +## [SQLCAT](https://blogs.msdn.microsoft.com/sqlcat/) \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 00000000000..2a9155c7eba --- /dev/null +++ b/package.json @@ -0,0 +1,22 @@ +{ + "name": "build", + "version": "0.0.1", + "private": true, + "description": "Generate markdown files for publishing system", + "main": "build", + "dependencies": { + "commonmark": "^0.27.0", + "glob": "^7.1.1", + "line-ending-corrector": "^1.0.0", + "shopify-liquid": "^1.6.1", + "yaml-front-matter": "^3.4.0" + }, + "devDependencies": {}, + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1", + "start": "node .\\build\\build.js", + "build": "node .\\build\\build.js && git commit -am \"built!\" && git push" + }, + "author": "patterns & practices", + "license": "MIT" +}