diff --git a/css/atom-one-dark-reasonable.css b/css/atom-one-dark-reasonable.css new file mode 100644 index 0000000000..fd41c996a3 --- /dev/null +++ b/css/atom-one-dark-reasonable.css @@ -0,0 +1,77 @@ +/* + +Atom One Dark With support for ReasonML by Gidi Morris, based off work by Daniel Gamage + +Original One Dark Syntax theme from https://github.com/atom/one-dark-syntax + +*/ +.hljs { + display: block; + overflow-x: auto; + padding: 0.5em; + line-height: 1.3em; + color: #abb2bf; + background: #282c34; + border-radius: 5px; +} +.hljs-keyword, .hljs-operator { + color: #F92672; +} +.hljs-pattern-match { + color: #F92672; +} +.hljs-pattern-match .hljs-constructor { + color: #61aeee; +} +.hljs-function { + color: #61aeee; +} +.hljs-function .hljs-params { + color: #A6E22E; +} +.hljs-function .hljs-params .hljs-typing { + color: #FD971F; +} +.hljs-module-access .hljs-module { + color: #7e57c2; +} +.hljs-constructor { + color: #e2b93d; +} +.hljs-constructor .hljs-string { + color: #9CCC65; +} +.hljs-comment, .hljs-quote { + color: #b18eb1; + font-style: italic; +} +.hljs-doctag, .hljs-formula { + color: #c678dd; +} +.hljs-section, .hljs-name, .hljs-selector-tag, .hljs-deletion, .hljs-subst { + color: #e06c75; +} +.hljs-literal { + color: #56b6c2; +} +.hljs-string, .hljs-regexp, .hljs-addition, .hljs-attribute, .hljs-meta-string { + color: #98c379; +} +.hljs-built_in, .hljs-class .hljs-title { + color: #e6c07b; +} +.hljs-attr, .hljs-variable, .hljs-template-variable, .hljs-type, .hljs-selector-class, .hljs-selector-attr, .hljs-selector-pseudo, .hljs-number { + color: #d19a66; +} +.hljs-symbol, .hljs-bullet, .hljs-link, .hljs-meta, .hljs-selector-id, .hljs-title { + color: #61aeee; +} +.hljs-emphasis { + font-style: italic; +} +.hljs-strong { + font-weight: bold; +} +.hljs-link { + text-decoration: underline; +} diff --git a/css/auto-complete.css b/css/auto-complete.css new file mode 100644 index 0000000000..1557ef6ae0 --- /dev/null +++ b/css/auto-complete.css @@ -0,0 +1,49 @@ +.autocomplete-suggestions { + text-align: left; + cursor: default; + border: 1px solid #ccc; + border-top: 0; + background: #fff; + box-shadow: -1px 1px 3px rgba(0,0,0,.8); + + /* core styles should not be changed */ + position: absolute; + display: none; + z-index: 9999; + max-height: 80%; + width: 33% !important; + min-width: 266px; + overflow: hidden; + overflow-y: auto; + box-sizing: border-box; + +} +.autocomplete-suggestion { + position: relative; + cursor: pointer; + padding: 7px; + line-height: 23px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + color: #333; +} + +.autocomplete-suggestion b { + font-weight: normal; + color: #1f8dd6; +} + +.autocomplete-suggestion.selected { + background: #333; + color: #fff; +} + +.autocomplete-suggestion:hover { + background: #444; + color: #fff; +} + +.autocomplete-suggestion > .context { + font-size: 12px; +} diff --git a/css/featherlight.min.css b/css/featherlight.min.css new file mode 100644 index 0000000000..058487f916 --- /dev/null +++ b/css/featherlight.min.css @@ -0,0 +1,8 @@ +/** + * Featherlight - ultra slim jQuery lightbox + * Version 1.7.13 - http://noelboss.github.io/featherlight/ + * + * Copyright 2018, Noël Raoul Bossart (http://www.noelboss.com) + * MIT Licensed. +**/ +html.with-featherlight{overflow:hidden}.featherlight{display:none;position:fixed;top:0;right:0;bottom:0;left:0;z-index:2147483647;text-align:center;white-space:nowrap;cursor:pointer;background:#333;background:rgba(0,0,0,0)}.featherlight:last-of-type{background:rgba(0,0,0,.8)}.featherlight:before{content:'';display:inline-block;height:100%;vertical-align:middle}.featherlight .featherlight-content{position:relative;text-align:left;vertical-align:middle;display:inline-block;overflow:auto;padding:25px 25px 0;border-bottom:25px solid transparent;margin-left:5%;margin-right:5%;max-height:95%;background:#fff;cursor:auto;white-space:normal}.featherlight .featherlight-inner{display:block}.featherlight link.featherlight-inner,.featherlight script.featherlight-inner,.featherlight style.featherlight-inner{display:none}.featherlight .featherlight-close-icon{position:absolute;z-index:9999;top:0;right:0;line-height:25px;width:25px;cursor:pointer;text-align:center;font-family:Arial,sans-serif;background:#fff;background:rgba(255,255,255,.3);color:#000;border:0;padding:0}.featherlight .featherlight-close-icon::-moz-focus-inner{border:0;padding:0}.featherlight .featherlight-image{width:100%}.featherlight-iframe .featherlight-content{border-bottom:0;padding:0;-webkit-overflow-scrolling:touch}.featherlight iframe{border:0}.featherlight *{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}@media only screen and (max-width:1024px){.featherlight .featherlight-content{margin-left:0;margin-right:0;max-height:98%;padding:10px 10px 0;border-bottom:10px solid transparent}}@media print{html.with-featherlight>*>:not(.featherlight){display:none}} \ No newline at end of file diff --git a/css/fontawesome-all.min.css b/css/fontawesome-all.min.css new file mode 100644 index 0000000000..de56473722 --- /dev/null +++ b/css/fontawesome-all.min.css @@ -0,0 +1 @@ +.fa,.fab,.fal,.far,.fas{-moz-osx-font-smoothing:grayscale;-webkit-font-smoothing:antialiased;display:inline-block;font-style:normal;font-variant:normal;text-rendering:auto;line-height:1}.fa-lg{font-size:1.33333em;line-height:.75em;vertical-align:-.0667em}.fa-xs{font-size:.75em}.fa-sm{font-size:.875em}.fa-1x{font-size:1em}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-6x{font-size:6em}.fa-7x{font-size:7em}.fa-8x{font-size:8em}.fa-9x{font-size:9em}.fa-10x{font-size:10em}.fa-fw{text-align:center;width:1.25em}.fa-ul{list-style-type:none;margin-left:2.5em;padding-left:0}.fa-ul>li{position:relative}.fa-li{left:-2em;position:absolute;text-align:center;width:2em;line-height:inherit}.fa-border{border:.08em solid #eee;border-radius:.1em;padding:.2em .25em .15em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left,.fab.fa-pull-left,.fal.fa-pull-left,.far.fa-pull-left,.fas.fa-pull-left{margin-right:.3em}.fa.fa-pull-right,.fab.fa-pull-right,.fal.fa-pull-right,.far.fa-pull-right,.fas.fa-pull-right{margin-left:.3em}.fa-spin{animation:fa-spin 2s infinite linear}.fa-pulse{animation:fa-spin 1s infinite steps(8)}@keyframes fa-spin{0%{transform:rotate(0deg)}to{transform:rotate(1turn)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";transform:scaleX(-1)}.fa-flip-vertical{transform:scaleY(-1)}.fa-flip-horizontal.fa-flip-vertical,.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)"}.fa-flip-horizontal.fa-flip-vertical{transform:scale(-1)}:root .fa-flip-horizontal,:root .fa-flip-vertical,:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270{filter:none}.fa-stack{display:inline-block;height:2em;line-height:2em;position:relative;vertical-align:middle;width:2.5em}.fa-stack-1x,.fa-stack-2x{left:0;position:absolute;text-align:center;width:100%}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-500px:before{content:"\f26e"}.fa-accessible-icon:before{content:"\f368"}.fa-accusoft:before{content:"\f369"}.fa-acquisitions-incorporated:before{content:"\f6af"}.fa-ad:before{content:"\f641"}.fa-address-book:before{content:"\f2b9"}.fa-address-card:before{content:"\f2bb"}.fa-adjust:before{content:"\f042"}.fa-adn:before{content:"\f170"}.fa-adobe:before{content:"\f778"}.fa-adversal:before{content:"\f36a"}.fa-affiliatetheme:before{content:"\f36b"}.fa-air-freshener:before{content:"\f5d0"}.fa-algolia:before{content:"\f36c"}.fa-align-center:before{content:"\f037"}.fa-align-justify:before{content:"\f039"}.fa-align-left:before{content:"\f036"}.fa-align-right:before{content:"\f038"}.fa-alipay:before{content:"\f642"}.fa-allergies:before{content:"\f461"}.fa-amazon:before{content:"\f270"}.fa-amazon-pay:before{content:"\f42c"}.fa-ambulance:before{content:"\f0f9"}.fa-american-sign-language-interpreting:before{content:"\f2a3"}.fa-amilia:before{content:"\f36d"}.fa-anchor:before{content:"\f13d"}.fa-android:before{content:"\f17b"}.fa-angellist:before{content:"\f209"}.fa-angle-double-down:before{content:"\f103"}.fa-angle-double-left:before{content:"\f100"}.fa-angle-double-right:before{content:"\f101"}.fa-angle-double-up:before{content:"\f102"}.fa-angle-down:before{content:"\f107"}.fa-angle-left:before{content:"\f104"}.fa-angle-right:before{content:"\f105"}.fa-angle-up:before{content:"\f106"}.fa-angry:before{content:"\f556"}.fa-angrycreative:before{content:"\f36e"}.fa-angular:before{content:"\f420"}.fa-ankh:before{content:"\f644"}.fa-app-store:before{content:"\f36f"}.fa-app-store-ios:before{content:"\f370"}.fa-apper:before{content:"\f371"}.fa-apple:before{content:"\f179"}.fa-apple-alt:before{content:"\f5d1"}.fa-apple-pay:before{content:"\f415"}.fa-archive:before{content:"\f187"}.fa-archway:before{content:"\f557"}.fa-arrow-alt-circle-down:before{content:"\f358"}.fa-arrow-alt-circle-left:before{content:"\f359"}.fa-arrow-alt-circle-right:before{content:"\f35a"}.fa-arrow-alt-circle-up:before{content:"\f35b"}.fa-arrow-circle-down:before{content:"\f0ab"}.fa-arrow-circle-left:before{content:"\f0a8"}.fa-arrow-circle-right:before{content:"\f0a9"}.fa-arrow-circle-up:before{content:"\f0aa"}.fa-arrow-down:before{content:"\f063"}.fa-arrow-left:before{content:"\f060"}.fa-arrow-right:before{content:"\f061"}.fa-arrow-up:before{content:"\f062"}.fa-arrows-alt:before{content:"\f0b2"}.fa-arrows-alt-h:before{content:"\f337"}.fa-arrows-alt-v:before{content:"\f338"}.fa-artstation:before{content:"\f77a"}.fa-assistive-listening-systems:before{content:"\f2a2"}.fa-asterisk:before{content:"\f069"}.fa-asymmetrik:before{content:"\f372"}.fa-at:before{content:"\f1fa"}.fa-atlas:before{content:"\f558"}.fa-atlassian:before{content:"\f77b"}.fa-atom:before{content:"\f5d2"}.fa-audible:before{content:"\f373"}.fa-audio-description:before{content:"\f29e"}.fa-autoprefixer:before{content:"\f41c"}.fa-avianex:before{content:"\f374"}.fa-aviato:before{content:"\f421"}.fa-award:before{content:"\f559"}.fa-aws:before{content:"\f375"}.fa-baby:before{content:"\f77c"}.fa-baby-carriage:before{content:"\f77d"}.fa-backspace:before{content:"\f55a"}.fa-backward:before{content:"\f04a"}.fa-balance-scale:before{content:"\f24e"}.fa-ban:before{content:"\f05e"}.fa-band-aid:before{content:"\f462"}.fa-bandcamp:before{content:"\f2d5"}.fa-barcode:before{content:"\f02a"}.fa-bars:before{content:"\f0c9"}.fa-baseball-ball:before{content:"\f433"}.fa-basketball-ball:before{content:"\f434"}.fa-bath:before{content:"\f2cd"}.fa-battery-empty:before{content:"\f244"}.fa-battery-full:before{content:"\f240"}.fa-battery-half:before{content:"\f242"}.fa-battery-quarter:before{content:"\f243"}.fa-battery-three-quarters:before{content:"\f241"}.fa-bed:before{content:"\f236"}.fa-beer:before{content:"\f0fc"}.fa-behance:before{content:"\f1b4"}.fa-behance-square:before{content:"\f1b5"}.fa-bell:before{content:"\f0f3"}.fa-bell-slash:before{content:"\f1f6"}.fa-bezier-curve:before{content:"\f55b"}.fa-bible:before{content:"\f647"}.fa-bicycle:before{content:"\f206"}.fa-bimobject:before{content:"\f378"}.fa-binoculars:before{content:"\f1e5"}.fa-biohazard:before{content:"\f780"}.fa-birthday-cake:before{content:"\f1fd"}.fa-bitbucket:before{content:"\f171"}.fa-bitcoin:before{content:"\f379"}.fa-bity:before{content:"\f37a"}.fa-black-tie:before{content:"\f27e"}.fa-blackberry:before{content:"\f37b"}.fa-blender:before{content:"\f517"}.fa-blender-phone:before{content:"\f6b6"}.fa-blind:before{content:"\f29d"}.fa-blog:before{content:"\f781"}.fa-blogger:before{content:"\f37c"}.fa-blogger-b:before{content:"\f37d"}.fa-bluetooth:before{content:"\f293"}.fa-bluetooth-b:before{content:"\f294"}.fa-bold:before{content:"\f032"}.fa-bolt:before{content:"\f0e7"}.fa-bomb:before{content:"\f1e2"}.fa-bone:before{content:"\f5d7"}.fa-bong:before{content:"\f55c"}.fa-book:before{content:"\f02d"}.fa-book-dead:before{content:"\f6b7"}.fa-book-open:before{content:"\f518"}.fa-book-reader:before{content:"\f5da"}.fa-bookmark:before{content:"\f02e"}.fa-bowling-ball:before{content:"\f436"}.fa-box:before{content:"\f466"}.fa-box-open:before{content:"\f49e"}.fa-boxes:before{content:"\f468"}.fa-braille:before{content:"\f2a1"}.fa-brain:before{content:"\f5dc"}.fa-briefcase:before{content:"\f0b1"}.fa-briefcase-medical:before{content:"\f469"}.fa-broadcast-tower:before{content:"\f519"}.fa-broom:before{content:"\f51a"}.fa-brush:before{content:"\f55d"}.fa-btc:before{content:"\f15a"}.fa-bug:before{content:"\f188"}.fa-building:before{content:"\f1ad"}.fa-bullhorn:before{content:"\f0a1"}.fa-bullseye:before{content:"\f140"}.fa-burn:before{content:"\f46a"}.fa-buromobelexperte:before{content:"\f37f"}.fa-bus:before{content:"\f207"}.fa-bus-alt:before{content:"\f55e"}.fa-business-time:before{content:"\f64a"}.fa-buysellads:before{content:"\f20d"}.fa-calculator:before{content:"\f1ec"}.fa-calendar:before{content:"\f133"}.fa-calendar-alt:before{content:"\f073"}.fa-calendar-check:before{content:"\f274"}.fa-calendar-day:before{content:"\f783"}.fa-calendar-minus:before{content:"\f272"}.fa-calendar-plus:before{content:"\f271"}.fa-calendar-times:before{content:"\f273"}.fa-calendar-week:before{content:"\f784"}.fa-camera:before{content:"\f030"}.fa-camera-retro:before{content:"\f083"}.fa-campground:before{content:"\f6bb"}.fa-canadian-maple-leaf:before{content:"\f785"}.fa-candy-cane:before{content:"\f786"}.fa-cannabis:before{content:"\f55f"}.fa-capsules:before{content:"\f46b"}.fa-car:before{content:"\f1b9"}.fa-car-alt:before{content:"\f5de"}.fa-car-battery:before{content:"\f5df"}.fa-car-crash:before{content:"\f5e1"}.fa-car-side:before{content:"\f5e4"}.fa-caret-down:before{content:"\f0d7"}.fa-caret-left:before{content:"\f0d9"}.fa-caret-right:before{content:"\f0da"}.fa-caret-square-down:before{content:"\f150"}.fa-caret-square-left:before{content:"\f191"}.fa-caret-square-right:before{content:"\f152"}.fa-caret-square-up:before{content:"\f151"}.fa-caret-up:before{content:"\f0d8"}.fa-carrot:before{content:"\f787"}.fa-cart-arrow-down:before{content:"\f218"}.fa-cart-plus:before{content:"\f217"}.fa-cash-register:before{content:"\f788"}.fa-cat:before{content:"\f6be"}.fa-cc-amazon-pay:before{content:"\f42d"}.fa-cc-amex:before{content:"\f1f3"}.fa-cc-apple-pay:before{content:"\f416"}.fa-cc-diners-club:before{content:"\f24c"}.fa-cc-discover:before{content:"\f1f2"}.fa-cc-jcb:before{content:"\f24b"}.fa-cc-mastercard:before{content:"\f1f1"}.fa-cc-paypal:before{content:"\f1f4"}.fa-cc-stripe:before{content:"\f1f5"}.fa-cc-visa:before{content:"\f1f0"}.fa-centercode:before{content:"\f380"}.fa-centos:before{content:"\f789"}.fa-certificate:before{content:"\f0a3"}.fa-chair:before{content:"\f6c0"}.fa-chalkboard:before{content:"\f51b"}.fa-chalkboard-teacher:before{content:"\f51c"}.fa-charging-station:before{content:"\f5e7"}.fa-chart-area:before{content:"\f1fe"}.fa-chart-bar:before{content:"\f080"}.fa-chart-line:before{content:"\f201"}.fa-chart-pie:before{content:"\f200"}.fa-check:before{content:"\f00c"}.fa-check-circle:before{content:"\f058"}.fa-check-double:before{content:"\f560"}.fa-check-square:before{content:"\f14a"}.fa-chess:before{content:"\f439"}.fa-chess-bishop:before{content:"\f43a"}.fa-chess-board:before{content:"\f43c"}.fa-chess-king:before{content:"\f43f"}.fa-chess-knight:before{content:"\f441"}.fa-chess-pawn:before{content:"\f443"}.fa-chess-queen:before{content:"\f445"}.fa-chess-rook:before{content:"\f447"}.fa-chevron-circle-down:before{content:"\f13a"}.fa-chevron-circle-left:before{content:"\f137"}.fa-chevron-circle-right:before{content:"\f138"}.fa-chevron-circle-up:before{content:"\f139"}.fa-chevron-down:before{content:"\f078"}.fa-chevron-left:before{content:"\f053"}.fa-chevron-right:before{content:"\f054"}.fa-chevron-up:before{content:"\f077"}.fa-child:before{content:"\f1ae"}.fa-chrome:before{content:"\f268"}.fa-church:before{content:"\f51d"}.fa-circle:before{content:"\f111"}.fa-circle-notch:before{content:"\f1ce"}.fa-city:before{content:"\f64f"}.fa-clipboard:before{content:"\f328"}.fa-clipboard-check:before{content:"\f46c"}.fa-clipboard-list:before{content:"\f46d"}.fa-clock:before{content:"\f017"}.fa-clone:before{content:"\f24d"}.fa-closed-captioning:before{content:"\f20a"}.fa-cloud:before{content:"\f0c2"}.fa-cloud-download-alt:before{content:"\f381"}.fa-cloud-meatball:before{content:"\f73b"}.fa-cloud-moon:before{content:"\f6c3"}.fa-cloud-moon-rain:before{content:"\f73c"}.fa-cloud-rain:before{content:"\f73d"}.fa-cloud-showers-heavy:before{content:"\f740"}.fa-cloud-sun:before{content:"\f6c4"}.fa-cloud-sun-rain:before{content:"\f743"}.fa-cloud-upload-alt:before{content:"\f382"}.fa-cloudscale:before{content:"\f383"}.fa-cloudsmith:before{content:"\f384"}.fa-cloudversify:before{content:"\f385"}.fa-cocktail:before{content:"\f561"}.fa-code:before{content:"\f121"}.fa-code-branch:before{content:"\f126"}.fa-codepen:before{content:"\f1cb"}.fa-codiepie:before{content:"\f284"}.fa-coffee:before{content:"\f0f4"}.fa-cog:before{content:"\f013"}.fa-cogs:before{content:"\f085"}.fa-coins:before{content:"\f51e"}.fa-columns:before{content:"\f0db"}.fa-comment:before{content:"\f075"}.fa-comment-alt:before{content:"\f27a"}.fa-comment-dollar:before{content:"\f651"}.fa-comment-dots:before{content:"\f4ad"}.fa-comment-slash:before{content:"\f4b3"}.fa-comments:before{content:"\f086"}.fa-comments-dollar:before{content:"\f653"}.fa-compact-disc:before{content:"\f51f"}.fa-compass:before{content:"\f14e"}.fa-compress:before{content:"\f066"}.fa-compress-arrows-alt:before{content:"\f78c"}.fa-concierge-bell:before{content:"\f562"}.fa-confluence:before{content:"\f78d"}.fa-connectdevelop:before{content:"\f20e"}.fa-contao:before{content:"\f26d"}.fa-cookie:before{content:"\f563"}.fa-cookie-bite:before{content:"\f564"}.fa-copy:before{content:"\f0c5"}.fa-copyright:before{content:"\f1f9"}.fa-couch:before{content:"\f4b8"}.fa-cpanel:before{content:"\f388"}.fa-creative-commons:before{content:"\f25e"}.fa-creative-commons-by:before{content:"\f4e7"}.fa-creative-commons-nc:before{content:"\f4e8"}.fa-creative-commons-nc-eu:before{content:"\f4e9"}.fa-creative-commons-nc-jp:before{content:"\f4ea"}.fa-creative-commons-nd:before{content:"\f4eb"}.fa-creative-commons-pd:before{content:"\f4ec"}.fa-creative-commons-pd-alt:before{content:"\f4ed"}.fa-creative-commons-remix:before{content:"\f4ee"}.fa-creative-commons-sa:before{content:"\f4ef"}.fa-creative-commons-sampling:before{content:"\f4f0"}.fa-creative-commons-sampling-plus:before{content:"\f4f1"}.fa-creative-commons-share:before{content:"\f4f2"}.fa-creative-commons-zero:before{content:"\f4f3"}.fa-credit-card:before{content:"\f09d"}.fa-critical-role:before{content:"\f6c9"}.fa-crop:before{content:"\f125"}.fa-crop-alt:before{content:"\f565"}.fa-cross:before{content:"\f654"}.fa-crosshairs:before{content:"\f05b"}.fa-crow:before{content:"\f520"}.fa-crown:before{content:"\f521"}.fa-css3:before{content:"\f13c"}.fa-css3-alt:before{content:"\f38b"}.fa-cube:before{content:"\f1b2"}.fa-cubes:before{content:"\f1b3"}.fa-cut:before{content:"\f0c4"}.fa-cuttlefish:before{content:"\f38c"}.fa-d-and-d:before{content:"\f38d"}.fa-d-and-d-beyond:before{content:"\f6ca"}.fa-dashcube:before{content:"\f210"}.fa-database:before{content:"\f1c0"}.fa-deaf:before{content:"\f2a4"}.fa-delicious:before{content:"\f1a5"}.fa-democrat:before{content:"\f747"}.fa-deploydog:before{content:"\f38e"}.fa-deskpro:before{content:"\f38f"}.fa-desktop:before{content:"\f108"}.fa-dev:before{content:"\f6cc"}.fa-deviantart:before{content:"\f1bd"}.fa-dharmachakra:before{content:"\f655"}.fa-dhl:before{content:"\f790"}.fa-diagnoses:before{content:"\f470"}.fa-diaspora:before{content:"\f791"}.fa-dice:before{content:"\f522"}.fa-dice-d20:before{content:"\f6cf"}.fa-dice-d6:before{content:"\f6d1"}.fa-dice-five:before{content:"\f523"}.fa-dice-four:before{content:"\f524"}.fa-dice-one:before{content:"\f525"}.fa-dice-six:before{content:"\f526"}.fa-dice-three:before{content:"\f527"}.fa-dice-two:before{content:"\f528"}.fa-digg:before{content:"\f1a6"}.fa-digital-ocean:before{content:"\f391"}.fa-digital-tachograph:before{content:"\f566"}.fa-directions:before{content:"\f5eb"}.fa-discord:before{content:"\f392"}.fa-discourse:before{content:"\f393"}.fa-divide:before{content:"\f529"}.fa-dizzy:before{content:"\f567"}.fa-dna:before{content:"\f471"}.fa-dochub:before{content:"\f394"}.fa-docker:before{content:"\f395"}.fa-dog:before{content:"\f6d3"}.fa-dollar-sign:before{content:"\f155"}.fa-dolly:before{content:"\f472"}.fa-dolly-flatbed:before{content:"\f474"}.fa-donate:before{content:"\f4b9"}.fa-door-closed:before{content:"\f52a"}.fa-door-open:before{content:"\f52b"}.fa-dot-circle:before{content:"\f192"}.fa-dove:before{content:"\f4ba"}.fa-download:before{content:"\f019"}.fa-draft2digital:before{content:"\f396"}.fa-drafting-compass:before{content:"\f568"}.fa-dragon:before{content:"\f6d5"}.fa-draw-polygon:before{content:"\f5ee"}.fa-dribbble:before{content:"\f17d"}.fa-dribbble-square:before{content:"\f397"}.fa-dropbox:before{content:"\f16b"}.fa-drum:before{content:"\f569"}.fa-drum-steelpan:before{content:"\f56a"}.fa-drumstick-bite:before{content:"\f6d7"}.fa-drupal:before{content:"\f1a9"}.fa-dumbbell:before{content:"\f44b"}.fa-dumpster:before{content:"\f793"}.fa-dumpster-fire:before{content:"\f794"}.fa-dungeon:before{content:"\f6d9"}.fa-dyalog:before{content:"\f399"}.fa-earlybirds:before{content:"\f39a"}.fa-ebay:before{content:"\f4f4"}.fa-edge:before{content:"\f282"}.fa-edit:before{content:"\f044"}.fa-eject:before{content:"\f052"}.fa-elementor:before{content:"\f430"}.fa-ellipsis-h:before{content:"\f141"}.fa-ellipsis-v:before{content:"\f142"}.fa-ello:before{content:"\f5f1"}.fa-ember:before{content:"\f423"}.fa-empire:before{content:"\f1d1"}.fa-envelope:before{content:"\f0e0"}.fa-envelope-open:before{content:"\f2b6"}.fa-envelope-open-text:before{content:"\f658"}.fa-envelope-square:before{content:"\f199"}.fa-envira:before{content:"\f299"}.fa-equals:before{content:"\f52c"}.fa-eraser:before{content:"\f12d"}.fa-erlang:before{content:"\f39d"}.fa-ethereum:before{content:"\f42e"}.fa-ethernet:before{content:"\f796"}.fa-etsy:before{content:"\f2d7"}.fa-euro-sign:before{content:"\f153"}.fa-exchange-alt:before{content:"\f362"}.fa-exclamation:before{content:"\f12a"}.fa-exclamation-circle:before{content:"\f06a"}.fa-exclamation-triangle:before{content:"\f071"}.fa-expand:before{content:"\f065"}.fa-expand-arrows-alt:before{content:"\f31e"}.fa-expeditedssl:before{content:"\f23e"}.fa-external-link-alt:before{content:"\f35d"}.fa-external-link-square-alt:before{content:"\f360"}.fa-eye:before{content:"\f06e"}.fa-eye-dropper:before{content:"\f1fb"}.fa-eye-slash:before{content:"\f070"}.fa-facebook:before{content:"\f09a"}.fa-facebook-f:before{content:"\f39e"}.fa-facebook-messenger:before{content:"\f39f"}.fa-facebook-square:before{content:"\f082"}.fa-fantasy-flight-games:before{content:"\f6dc"}.fa-fast-backward:before{content:"\f049"}.fa-fast-forward:before{content:"\f050"}.fa-fax:before{content:"\f1ac"}.fa-feather:before{content:"\f52d"}.fa-feather-alt:before{content:"\f56b"}.fa-fedex:before{content:"\f797"}.fa-fedora:before{content:"\f798"}.fa-female:before{content:"\f182"}.fa-fighter-jet:before{content:"\f0fb"}.fa-figma:before{content:"\f799"}.fa-file:before{content:"\f15b"}.fa-file-alt:before{content:"\f15c"}.fa-file-archive:before{content:"\f1c6"}.fa-file-audio:before{content:"\f1c7"}.fa-file-code:before{content:"\f1c9"}.fa-file-contract:before{content:"\f56c"}.fa-file-csv:before{content:"\f6dd"}.fa-file-download:before{content:"\f56d"}.fa-file-excel:before{content:"\f1c3"}.fa-file-export:before{content:"\f56e"}.fa-file-image:before{content:"\f1c5"}.fa-file-import:before{content:"\f56f"}.fa-file-invoice:before{content:"\f570"}.fa-file-invoice-dollar:before{content:"\f571"}.fa-file-medical:before{content:"\f477"}.fa-file-medical-alt:before{content:"\f478"}.fa-file-pdf:before{content:"\f1c1"}.fa-file-powerpoint:before{content:"\f1c4"}.fa-file-prescription:before{content:"\f572"}.fa-file-signature:before{content:"\f573"}.fa-file-upload:before{content:"\f574"}.fa-file-video:before{content:"\f1c8"}.fa-file-word:before{content:"\f1c2"}.fa-fill:before{content:"\f575"}.fa-fill-drip:before{content:"\f576"}.fa-film:before{content:"\f008"}.fa-filter:before{content:"\f0b0"}.fa-fingerprint:before{content:"\f577"}.fa-fire:before{content:"\f06d"}.fa-fire-alt:before{content:"\f7e4"}.fa-fire-extinguisher:before{content:"\f134"}.fa-firefox:before{content:"\f269"}.fa-first-aid:before{content:"\f479"}.fa-first-order:before{content:"\f2b0"}.fa-first-order-alt:before{content:"\f50a"}.fa-firstdraft:before{content:"\f3a1"}.fa-fish:before{content:"\f578"}.fa-fist-raised:before{content:"\f6de"}.fa-flag:before{content:"\f024"}.fa-flag-checkered:before{content:"\f11e"}.fa-flag-usa:before{content:"\f74d"}.fa-flask:before{content:"\f0c3"}.fa-flickr:before{content:"\f16e"}.fa-flipboard:before{content:"\f44d"}.fa-flushed:before{content:"\f579"}.fa-fly:before{content:"\f417"}.fa-folder:before{content:"\f07b"}.fa-folder-minus:before{content:"\f65d"}.fa-folder-open:before{content:"\f07c"}.fa-folder-plus:before{content:"\f65e"}.fa-font:before{content:"\f031"}.fa-font-awesome:before{content:"\f2b4"}.fa-font-awesome-alt:before{content:"\f35c"}.fa-font-awesome-flag:before{content:"\f425"}.fa-font-awesome-logo-full:before{content:"\f4e6"}.fa-fonticons:before{content:"\f280"}.fa-fonticons-fi:before{content:"\f3a2"}.fa-football-ball:before{content:"\f44e"}.fa-fort-awesome:before{content:"\f286"}.fa-fort-awesome-alt:before{content:"\f3a3"}.fa-forumbee:before{content:"\f211"}.fa-forward:before{content:"\f04e"}.fa-foursquare:before{content:"\f180"}.fa-free-code-camp:before{content:"\f2c5"}.fa-freebsd:before{content:"\f3a4"}.fa-frog:before{content:"\f52e"}.fa-frown:before{content:"\f119"}.fa-frown-open:before{content:"\f57a"}.fa-fulcrum:before{content:"\f50b"}.fa-funnel-dollar:before{content:"\f662"}.fa-futbol:before{content:"\f1e3"}.fa-galactic-republic:before{content:"\f50c"}.fa-galactic-senate:before{content:"\f50d"}.fa-gamepad:before{content:"\f11b"}.fa-gas-pump:before{content:"\f52f"}.fa-gavel:before{content:"\f0e3"}.fa-gem:before{content:"\f3a5"}.fa-genderless:before{content:"\f22d"}.fa-get-pocket:before{content:"\f265"}.fa-gg:before{content:"\f260"}.fa-gg-circle:before{content:"\f261"}.fa-ghost:before{content:"\f6e2"}.fa-gift:before{content:"\f06b"}.fa-gifts:before{content:"\f79c"}.fa-git:before{content:"\f1d3"}.fa-git-square:before{content:"\f1d2"}.fa-github:before{content:"\f09b"}.fa-github-alt:before{content:"\f113"}.fa-github-square:before{content:"\f092"}.fa-gitkraken:before{content:"\f3a6"}.fa-gitlab:before{content:"\f296"}.fa-gitter:before{content:"\f426"}.fa-glass-cheers:before{content:"\f79f"}.fa-glass-martini:before{content:"\f000"}.fa-glass-martini-alt:before{content:"\f57b"}.fa-glass-whiskey:before{content:"\f7a0"}.fa-glasses:before{content:"\f530"}.fa-glide:before{content:"\f2a5"}.fa-glide-g:before{content:"\f2a6"}.fa-globe:before{content:"\f0ac"}.fa-globe-africa:before{content:"\f57c"}.fa-globe-americas:before{content:"\f57d"}.fa-globe-asia:before{content:"\f57e"}.fa-globe-europe:before{content:"\f7a2"}.fa-gofore:before{content:"\f3a7"}.fa-golf-ball:before{content:"\f450"}.fa-goodreads:before{content:"\f3a8"}.fa-goodreads-g:before{content:"\f3a9"}.fa-google:before{content:"\f1a0"}.fa-google-drive:before{content:"\f3aa"}.fa-google-play:before{content:"\f3ab"}.fa-google-plus:before{content:"\f2b3"}.fa-google-plus-g:before{content:"\f0d5"}.fa-google-plus-square:before{content:"\f0d4"}.fa-google-wallet:before{content:"\f1ee"}.fa-gopuram:before{content:"\f664"}.fa-graduation-cap:before{content:"\f19d"}.fa-gratipay:before{content:"\f184"}.fa-grav:before{content:"\f2d6"}.fa-greater-than:before{content:"\f531"}.fa-greater-than-equal:before{content:"\f532"}.fa-grimace:before{content:"\f57f"}.fa-grin:before{content:"\f580"}.fa-grin-alt:before{content:"\f581"}.fa-grin-beam:before{content:"\f582"}.fa-grin-beam-sweat:before{content:"\f583"}.fa-grin-hearts:before{content:"\f584"}.fa-grin-squint:before{content:"\f585"}.fa-grin-squint-tears:before{content:"\f586"}.fa-grin-stars:before{content:"\f587"}.fa-grin-tears:before{content:"\f588"}.fa-grin-tongue:before{content:"\f589"}.fa-grin-tongue-squint:before{content:"\f58a"}.fa-grin-tongue-wink:before{content:"\f58b"}.fa-grin-wink:before{content:"\f58c"}.fa-grip-horizontal:before{content:"\f58d"}.fa-grip-lines:before{content:"\f7a4"}.fa-grip-lines-vertical:before{content:"\f7a5"}.fa-grip-vertical:before{content:"\f58e"}.fa-gripfire:before{content:"\f3ac"}.fa-grunt:before{content:"\f3ad"}.fa-guitar:before{content:"\f7a6"}.fa-gulp:before{content:"\f3ae"}.fa-h-square:before{content:"\f0fd"}.fa-hacker-news:before{content:"\f1d4"}.fa-hacker-news-square:before{content:"\f3af"}.fa-hackerrank:before{content:"\f5f7"}.fa-hammer:before{content:"\f6e3"}.fa-hamsa:before{content:"\f665"}.fa-hand-holding:before{content:"\f4bd"}.fa-hand-holding-heart:before{content:"\f4be"}.fa-hand-holding-usd:before{content:"\f4c0"}.fa-hand-lizard:before{content:"\f258"}.fa-hand-paper:before{content:"\f256"}.fa-hand-peace:before{content:"\f25b"}.fa-hand-point-down:before{content:"\f0a7"}.fa-hand-point-left:before{content:"\f0a5"}.fa-hand-point-right:before{content:"\f0a4"}.fa-hand-point-up:before{content:"\f0a6"}.fa-hand-pointer:before{content:"\f25a"}.fa-hand-rock:before{content:"\f255"}.fa-hand-scissors:before{content:"\f257"}.fa-hand-spock:before{content:"\f259"}.fa-hands:before{content:"\f4c2"}.fa-hands-helping:before{content:"\f4c4"}.fa-handshake:before{content:"\f2b5"}.fa-hanukiah:before{content:"\f6e6"}.fa-hashtag:before{content:"\f292"}.fa-hat-wizard:before{content:"\f6e8"}.fa-haykal:before{content:"\f666"}.fa-hdd:before{content:"\f0a0"}.fa-heading:before{content:"\f1dc"}.fa-headphones:before{content:"\f025"}.fa-headphones-alt:before{content:"\f58f"}.fa-headset:before{content:"\f590"}.fa-heart:before{content:"\f004"}.fa-heart-broken:before{content:"\f7a9"}.fa-heartbeat:before{content:"\f21e"}.fa-helicopter:before{content:"\f533"}.fa-highlighter:before{content:"\f591"}.fa-hiking:before{content:"\f6ec"}.fa-hippo:before{content:"\f6ed"}.fa-hips:before{content:"\f452"}.fa-hire-a-helper:before{content:"\f3b0"}.fa-history:before{content:"\f1da"}.fa-hockey-puck:before{content:"\f453"}.fa-holly-berry:before{content:"\f7aa"}.fa-home:before{content:"\f015"}.fa-hooli:before{content:"\f427"}.fa-hornbill:before{content:"\f592"}.fa-horse:before{content:"\f6f0"}.fa-horse-head:before{content:"\f7ab"}.fa-hospital:before{content:"\f0f8"}.fa-hospital-alt:before{content:"\f47d"}.fa-hospital-symbol:before{content:"\f47e"}.fa-hot-tub:before{content:"\f593"}.fa-hotel:before{content:"\f594"}.fa-hotjar:before{content:"\f3b1"}.fa-hourglass:before{content:"\f254"}.fa-hourglass-end:before{content:"\f253"}.fa-hourglass-half:before{content:"\f252"}.fa-hourglass-start:before{content:"\f251"}.fa-house-damage:before{content:"\f6f1"}.fa-houzz:before{content:"\f27c"}.fa-hryvnia:before{content:"\f6f2"}.fa-html5:before{content:"\f13b"}.fa-hubspot:before{content:"\f3b2"}.fa-i-cursor:before{content:"\f246"}.fa-icicles:before{content:"\f7ad"}.fa-id-badge:before{content:"\f2c1"}.fa-id-card:before{content:"\f2c2"}.fa-id-card-alt:before{content:"\f47f"}.fa-igloo:before{content:"\f7ae"}.fa-image:before{content:"\f03e"}.fa-images:before{content:"\f302"}.fa-imdb:before{content:"\f2d8"}.fa-inbox:before{content:"\f01c"}.fa-indent:before{content:"\f03c"}.fa-industry:before{content:"\f275"}.fa-infinity:before{content:"\f534"}.fa-info:before{content:"\f129"}.fa-info-circle:before{content:"\f05a"}.fa-instagram:before{content:"\f16d"}.fa-intercom:before{content:"\f7af"}.fa-internet-explorer:before{content:"\f26b"}.fa-invision:before{content:"\f7b0"}.fa-ioxhost:before{content:"\f208"}.fa-italic:before{content:"\f033"}.fa-itunes:before{content:"\f3b4"}.fa-itunes-note:before{content:"\f3b5"}.fa-java:before{content:"\f4e4"}.fa-jedi:before{content:"\f669"}.fa-jedi-order:before{content:"\f50e"}.fa-jenkins:before{content:"\f3b6"}.fa-jira:before{content:"\f7b1"}.fa-joget:before{content:"\f3b7"}.fa-joint:before{content:"\f595"}.fa-joomla:before{content:"\f1aa"}.fa-journal-whills:before{content:"\f66a"}.fa-js:before{content:"\f3b8"}.fa-js-square:before{content:"\f3b9"}.fa-jsfiddle:before{content:"\f1cc"}.fa-kaaba:before{content:"\f66b"}.fa-kaggle:before{content:"\f5fa"}.fa-key:before{content:"\f084"}.fa-keybase:before{content:"\f4f5"}.fa-keyboard:before{content:"\f11c"}.fa-keycdn:before{content:"\f3ba"}.fa-khanda:before{content:"\f66d"}.fa-kickstarter:before{content:"\f3bb"}.fa-kickstarter-k:before{content:"\f3bc"}.fa-kiss:before{content:"\f596"}.fa-kiss-beam:before{content:"\f597"}.fa-kiss-wink-heart:before{content:"\f598"}.fa-kiwi-bird:before{content:"\f535"}.fa-korvue:before{content:"\f42f"}.fa-landmark:before{content:"\f66f"}.fa-language:before{content:"\f1ab"}.fa-laptop:before{content:"\f109"}.fa-laptop-code:before{content:"\f5fc"}.fa-laravel:before{content:"\f3bd"}.fa-lastfm:before{content:"\f202"}.fa-lastfm-square:before{content:"\f203"}.fa-laugh:before{content:"\f599"}.fa-laugh-beam:before{content:"\f59a"}.fa-laugh-squint:before{content:"\f59b"}.fa-laugh-wink:before{content:"\f59c"}.fa-layer-group:before{content:"\f5fd"}.fa-leaf:before{content:"\f06c"}.fa-leanpub:before{content:"\f212"}.fa-lemon:before{content:"\f094"}.fa-less:before{content:"\f41d"}.fa-less-than:before{content:"\f536"}.fa-less-than-equal:before{content:"\f537"}.fa-level-down-alt:before{content:"\f3be"}.fa-level-up-alt:before{content:"\f3bf"}.fa-life-ring:before{content:"\f1cd"}.fa-lightbulb:before{content:"\f0eb"}.fa-line:before{content:"\f3c0"}.fa-link:before{content:"\f0c1"}.fa-linkedin:before{content:"\f08c"}.fa-linkedin-in:before{content:"\f0e1"}.fa-linode:before{content:"\f2b8"}.fa-linux:before{content:"\f17c"}.fa-lira-sign:before{content:"\f195"}.fa-list:before{content:"\f03a"}.fa-list-alt:before{content:"\f022"}.fa-list-ol:before{content:"\f0cb"}.fa-list-ul:before{content:"\f0ca"}.fa-location-arrow:before{content:"\f124"}.fa-lock:before{content:"\f023"}.fa-lock-open:before{content:"\f3c1"}.fa-long-arrow-alt-down:before{content:"\f309"}.fa-long-arrow-alt-left:before{content:"\f30a"}.fa-long-arrow-alt-right:before{content:"\f30b"}.fa-long-arrow-alt-up:before{content:"\f30c"}.fa-low-vision:before{content:"\f2a8"}.fa-luggage-cart:before{content:"\f59d"}.fa-lyft:before{content:"\f3c3"}.fa-magento:before{content:"\f3c4"}.fa-magic:before{content:"\f0d0"}.fa-magnet:before{content:"\f076"}.fa-mail-bulk:before{content:"\f674"}.fa-mailchimp:before{content:"\f59e"}.fa-male:before{content:"\f183"}.fa-mandalorian:before{content:"\f50f"}.fa-map:before{content:"\f279"}.fa-map-marked:before{content:"\f59f"}.fa-map-marked-alt:before{content:"\f5a0"}.fa-map-marker:before{content:"\f041"}.fa-map-marker-alt:before{content:"\f3c5"}.fa-map-pin:before{content:"\f276"}.fa-map-signs:before{content:"\f277"}.fa-markdown:before{content:"\f60f"}.fa-marker:before{content:"\f5a1"}.fa-mars:before{content:"\f222"}.fa-mars-double:before{content:"\f227"}.fa-mars-stroke:before{content:"\f229"}.fa-mars-stroke-h:before{content:"\f22b"}.fa-mars-stroke-v:before{content:"\f22a"}.fa-mask:before{content:"\f6fa"}.fa-mastodon:before{content:"\f4f6"}.fa-maxcdn:before{content:"\f136"}.fa-medal:before{content:"\f5a2"}.fa-medapps:before{content:"\f3c6"}.fa-medium:before{content:"\f23a"}.fa-medium-m:before{content:"\f3c7"}.fa-medkit:before{content:"\f0fa"}.fa-medrt:before{content:"\f3c8"}.fa-meetup:before{content:"\f2e0"}.fa-megaport:before{content:"\f5a3"}.fa-meh:before{content:"\f11a"}.fa-meh-blank:before{content:"\f5a4"}.fa-meh-rolling-eyes:before{content:"\f5a5"}.fa-memory:before{content:"\f538"}.fa-mendeley:before{content:"\f7b3"}.fa-menorah:before{content:"\f676"}.fa-mercury:before{content:"\f223"}.fa-meteor:before{content:"\f753"}.fa-microchip:before{content:"\f2db"}.fa-microphone:before{content:"\f130"}.fa-microphone-alt:before{content:"\f3c9"}.fa-microphone-alt-slash:before{content:"\f539"}.fa-microphone-slash:before{content:"\f131"}.fa-microscope:before{content:"\f610"}.fa-microsoft:before{content:"\f3ca"}.fa-minus:before{content:"\f068"}.fa-minus-circle:before{content:"\f056"}.fa-minus-square:before{content:"\f146"}.fa-mitten:before{content:"\f7b5"}.fa-mix:before{content:"\f3cb"}.fa-mixcloud:before{content:"\f289"}.fa-mizuni:before{content:"\f3cc"}.fa-mobile:before{content:"\f10b"}.fa-mobile-alt:before{content:"\f3cd"}.fa-modx:before{content:"\f285"}.fa-monero:before{content:"\f3d0"}.fa-money-bill:before{content:"\f0d6"}.fa-money-bill-alt:before{content:"\f3d1"}.fa-money-bill-wave:before{content:"\f53a"}.fa-money-bill-wave-alt:before{content:"\f53b"}.fa-money-check:before{content:"\f53c"}.fa-money-check-alt:before{content:"\f53d"}.fa-monument:before{content:"\f5a6"}.fa-moon:before{content:"\f186"}.fa-mortar-pestle:before{content:"\f5a7"}.fa-mosque:before{content:"\f678"}.fa-motorcycle:before{content:"\f21c"}.fa-mountain:before{content:"\f6fc"}.fa-mouse-pointer:before{content:"\f245"}.fa-mug-hot:before{content:"\f7b6"}.fa-music:before{content:"\f001"}.fa-napster:before{content:"\f3d2"}.fa-neos:before{content:"\f612"}.fa-network-wired:before{content:"\f6ff"}.fa-neuter:before{content:"\f22c"}.fa-newspaper:before{content:"\f1ea"}.fa-nimblr:before{content:"\f5a8"}.fa-nintendo-switch:before{content:"\f418"}.fa-node:before{content:"\f419"}.fa-node-js:before{content:"\f3d3"}.fa-not-equal:before{content:"\f53e"}.fa-notes-medical:before{content:"\f481"}.fa-npm:before{content:"\f3d4"}.fa-ns8:before{content:"\f3d5"}.fa-nutritionix:before{content:"\f3d6"}.fa-object-group:before{content:"\f247"}.fa-object-ungroup:before{content:"\f248"}.fa-odnoklassniki:before{content:"\f263"}.fa-odnoklassniki-square:before{content:"\f264"}.fa-oil-can:before{content:"\f613"}.fa-old-republic:before{content:"\f510"}.fa-om:before{content:"\f679"}.fa-opencart:before{content:"\f23d"}.fa-openid:before{content:"\f19b"}.fa-opera:before{content:"\f26a"}.fa-optin-monster:before{content:"\f23c"}.fa-osi:before{content:"\f41a"}.fa-otter:before{content:"\f700"}.fa-outdent:before{content:"\f03b"}.fa-page4:before{content:"\f3d7"}.fa-pagelines:before{content:"\f18c"}.fa-paint-brush:before{content:"\f1fc"}.fa-paint-roller:before{content:"\f5aa"}.fa-palette:before{content:"\f53f"}.fa-palfed:before{content:"\f3d8"}.fa-pallet:before{content:"\f482"}.fa-paper-plane:before{content:"\f1d8"}.fa-paperclip:before{content:"\f0c6"}.fa-parachute-box:before{content:"\f4cd"}.fa-paragraph:before{content:"\f1dd"}.fa-parking:before{content:"\f540"}.fa-passport:before{content:"\f5ab"}.fa-pastafarianism:before{content:"\f67b"}.fa-paste:before{content:"\f0ea"}.fa-patreon:before{content:"\f3d9"}.fa-pause:before{content:"\f04c"}.fa-pause-circle:before{content:"\f28b"}.fa-paw:before{content:"\f1b0"}.fa-paypal:before{content:"\f1ed"}.fa-peace:before{content:"\f67c"}.fa-pen:before{content:"\f304"}.fa-pen-alt:before{content:"\f305"}.fa-pen-fancy:before{content:"\f5ac"}.fa-pen-nib:before{content:"\f5ad"}.fa-pen-square:before{content:"\f14b"}.fa-pencil-alt:before{content:"\f303"}.fa-pencil-ruler:before{content:"\f5ae"}.fa-penny-arcade:before{content:"\f704"}.fa-people-carry:before{content:"\f4ce"}.fa-percent:before{content:"\f295"}.fa-percentage:before{content:"\f541"}.fa-periscope:before{content:"\f3da"}.fa-person-booth:before{content:"\f756"}.fa-phabricator:before{content:"\f3db"}.fa-phoenix-framework:before{content:"\f3dc"}.fa-phoenix-squadron:before{content:"\f511"}.fa-phone:before{content:"\f095"}.fa-phone-slash:before{content:"\f3dd"}.fa-phone-square:before{content:"\f098"}.fa-phone-volume:before{content:"\f2a0"}.fa-php:before{content:"\f457"}.fa-pied-piper:before{content:"\f2ae"}.fa-pied-piper-alt:before{content:"\f1a8"}.fa-pied-piper-hat:before{content:"\f4e5"}.fa-pied-piper-pp:before{content:"\f1a7"}.fa-piggy-bank:before{content:"\f4d3"}.fa-pills:before{content:"\f484"}.fa-pinterest:before{content:"\f0d2"}.fa-pinterest-p:before{content:"\f231"}.fa-pinterest-square:before{content:"\f0d3"}.fa-place-of-worship:before{content:"\f67f"}.fa-plane:before{content:"\f072"}.fa-plane-arrival:before{content:"\f5af"}.fa-plane-departure:before{content:"\f5b0"}.fa-play:before{content:"\f04b"}.fa-play-circle:before{content:"\f144"}.fa-playstation:before{content:"\f3df"}.fa-plug:before{content:"\f1e6"}.fa-plus:before{content:"\f067"}.fa-plus-circle:before{content:"\f055"}.fa-plus-square:before{content:"\f0fe"}.fa-podcast:before{content:"\f2ce"}.fa-poll:before{content:"\f681"}.fa-poll-h:before{content:"\f682"}.fa-poo:before{content:"\f2fe"}.fa-poo-storm:before{content:"\f75a"}.fa-poop:before{content:"\f619"}.fa-portrait:before{content:"\f3e0"}.fa-pound-sign:before{content:"\f154"}.fa-power-off:before{content:"\f011"}.fa-pray:before{content:"\f683"}.fa-praying-hands:before{content:"\f684"}.fa-prescription:before{content:"\f5b1"}.fa-prescription-bottle:before{content:"\f485"}.fa-prescription-bottle-alt:before{content:"\f486"}.fa-print:before{content:"\f02f"}.fa-procedures:before{content:"\f487"}.fa-product-hunt:before{content:"\f288"}.fa-project-diagram:before{content:"\f542"}.fa-pushed:before{content:"\f3e1"}.fa-puzzle-piece:before{content:"\f12e"}.fa-python:before{content:"\f3e2"}.fa-qq:before{content:"\f1d6"}.fa-qrcode:before{content:"\f029"}.fa-question:before{content:"\f128"}.fa-question-circle:before{content:"\f059"}.fa-quidditch:before{content:"\f458"}.fa-quinscape:before{content:"\f459"}.fa-quora:before{content:"\f2c4"}.fa-quote-left:before{content:"\f10d"}.fa-quote-right:before{content:"\f10e"}.fa-quran:before{content:"\f687"}.fa-r-project:before{content:"\f4f7"}.fa-radiation:before{content:"\f7b9"}.fa-radiation-alt:before{content:"\f7ba"}.fa-rainbow:before{content:"\f75b"}.fa-random:before{content:"\f074"}.fa-raspberry-pi:before{content:"\f7bb"}.fa-ravelry:before{content:"\f2d9"}.fa-react:before{content:"\f41b"}.fa-reacteurope:before{content:"\f75d"}.fa-readme:before{content:"\f4d5"}.fa-rebel:before{content:"\f1d0"}.fa-receipt:before{content:"\f543"}.fa-recycle:before{content:"\f1b8"}.fa-red-river:before{content:"\f3e3"}.fa-reddit:before{content:"\f1a1"}.fa-reddit-alien:before{content:"\f281"}.fa-reddit-square:before{content:"\f1a2"}.fa-redhat:before{content:"\f7bc"}.fa-redo:before{content:"\f01e"}.fa-redo-alt:before{content:"\f2f9"}.fa-registered:before{content:"\f25d"}.fa-renren:before{content:"\f18b"}.fa-reply:before{content:"\f3e5"}.fa-reply-all:before{content:"\f122"}.fa-replyd:before{content:"\f3e6"}.fa-republican:before{content:"\f75e"}.fa-researchgate:before{content:"\f4f8"}.fa-resolving:before{content:"\f3e7"}.fa-restroom:before{content:"\f7bd"}.fa-retweet:before{content:"\f079"}.fa-rev:before{content:"\f5b2"}.fa-ribbon:before{content:"\f4d6"}.fa-ring:before{content:"\f70b"}.fa-road:before{content:"\f018"}.fa-robot:before{content:"\f544"}.fa-rocket:before{content:"\f135"}.fa-rocketchat:before{content:"\f3e8"}.fa-rockrms:before{content:"\f3e9"}.fa-route:before{content:"\f4d7"}.fa-rss:before{content:"\f09e"}.fa-rss-square:before{content:"\f143"}.fa-ruble-sign:before{content:"\f158"}.fa-ruler:before{content:"\f545"}.fa-ruler-combined:before{content:"\f546"}.fa-ruler-horizontal:before{content:"\f547"}.fa-ruler-vertical:before{content:"\f548"}.fa-running:before{content:"\f70c"}.fa-rupee-sign:before{content:"\f156"}.fa-sad-cry:before{content:"\f5b3"}.fa-sad-tear:before{content:"\f5b4"}.fa-safari:before{content:"\f267"}.fa-sass:before{content:"\f41e"}.fa-satellite:before{content:"\f7bf"}.fa-satellite-dish:before{content:"\f7c0"}.fa-save:before{content:"\f0c7"}.fa-schlix:before{content:"\f3ea"}.fa-school:before{content:"\f549"}.fa-screwdriver:before{content:"\f54a"}.fa-scribd:before{content:"\f28a"}.fa-scroll:before{content:"\f70e"}.fa-sd-card:before{content:"\f7c2"}.fa-search:before{content:"\f002"}.fa-search-dollar:before{content:"\f688"}.fa-search-location:before{content:"\f689"}.fa-search-minus:before{content:"\f010"}.fa-search-plus:before{content:"\f00e"}.fa-searchengin:before{content:"\f3eb"}.fa-seedling:before{content:"\f4d8"}.fa-sellcast:before{content:"\f2da"}.fa-sellsy:before{content:"\f213"}.fa-server:before{content:"\f233"}.fa-servicestack:before{content:"\f3ec"}.fa-shapes:before{content:"\f61f"}.fa-share:before{content:"\f064"}.fa-share-alt:before{content:"\f1e0"}.fa-share-alt-square:before{content:"\f1e1"}.fa-share-square:before{content:"\f14d"}.fa-shekel-sign:before{content:"\f20b"}.fa-shield-alt:before{content:"\f3ed"}.fa-ship:before{content:"\f21a"}.fa-shipping-fast:before{content:"\f48b"}.fa-shirtsinbulk:before{content:"\f214"}.fa-shoe-prints:before{content:"\f54b"}.fa-shopping-bag:before{content:"\f290"}.fa-shopping-basket:before{content:"\f291"}.fa-shopping-cart:before{content:"\f07a"}.fa-shopware:before{content:"\f5b5"}.fa-shower:before{content:"\f2cc"}.fa-shuttle-van:before{content:"\f5b6"}.fa-sign:before{content:"\f4d9"}.fa-sign-in-alt:before{content:"\f2f6"}.fa-sign-language:before{content:"\f2a7"}.fa-sign-out-alt:before{content:"\f2f5"}.fa-signal:before{content:"\f012"}.fa-signature:before{content:"\f5b7"}.fa-sim-card:before{content:"\f7c4"}.fa-simplybuilt:before{content:"\f215"}.fa-sistrix:before{content:"\f3ee"}.fa-sitemap:before{content:"\f0e8"}.fa-sith:before{content:"\f512"}.fa-skating:before{content:"\f7c5"}.fa-sketch:before{content:"\f7c6"}.fa-skiing:before{content:"\f7c9"}.fa-skiing-nordic:before{content:"\f7ca"}.fa-skull:before{content:"\f54c"}.fa-skull-crossbones:before{content:"\f714"}.fa-skyatlas:before{content:"\f216"}.fa-skype:before{content:"\f17e"}.fa-slack:before{content:"\f198"}.fa-slack-hash:before{content:"\f3ef"}.fa-slash:before{content:"\f715"}.fa-sleigh:before{content:"\f7cc"}.fa-sliders-h:before{content:"\f1de"}.fa-slideshare:before{content:"\f1e7"}.fa-smile:before{content:"\f118"}.fa-smile-beam:before{content:"\f5b8"}.fa-smile-wink:before{content:"\f4da"}.fa-smog:before{content:"\f75f"}.fa-smoking:before{content:"\f48d"}.fa-smoking-ban:before{content:"\f54d"}.fa-sms:before{content:"\f7cd"}.fa-snapchat:before{content:"\f2ab"}.fa-snapchat-ghost:before{content:"\f2ac"}.fa-snapchat-square:before{content:"\f2ad"}.fa-snowboarding:before{content:"\f7ce"}.fa-snowflake:before{content:"\f2dc"}.fa-snowman:before{content:"\f7d0"}.fa-snowplow:before{content:"\f7d2"}.fa-socks:before{content:"\f696"}.fa-solar-panel:before{content:"\f5ba"}.fa-sort:before{content:"\f0dc"}.fa-sort-alpha-down:before{content:"\f15d"}.fa-sort-alpha-up:before{content:"\f15e"}.fa-sort-amount-down:before{content:"\f160"}.fa-sort-amount-up:before{content:"\f161"}.fa-sort-down:before{content:"\f0dd"}.fa-sort-numeric-down:before{content:"\f162"}.fa-sort-numeric-up:before{content:"\f163"}.fa-sort-up:before{content:"\f0de"}.fa-soundcloud:before{content:"\f1be"}.fa-sourcetree:before{content:"\f7d3"}.fa-spa:before{content:"\f5bb"}.fa-space-shuttle:before{content:"\f197"}.fa-speakap:before{content:"\f3f3"}.fa-spider:before{content:"\f717"}.fa-spinner:before{content:"\f110"}.fa-splotch:before{content:"\f5bc"}.fa-spotify:before{content:"\f1bc"}.fa-spray-can:before{content:"\f5bd"}.fa-square:before{content:"\f0c8"}.fa-square-full:before{content:"\f45c"}.fa-square-root-alt:before{content:"\f698"}.fa-squarespace:before{content:"\f5be"}.fa-stack-exchange:before{content:"\f18d"}.fa-stack-overflow:before{content:"\f16c"}.fa-stamp:before{content:"\f5bf"}.fa-star:before{content:"\f005"}.fa-star-and-crescent:before{content:"\f699"}.fa-star-half:before{content:"\f089"}.fa-star-half-alt:before{content:"\f5c0"}.fa-star-of-david:before{content:"\f69a"}.fa-star-of-life:before{content:"\f621"}.fa-staylinked:before{content:"\f3f5"}.fa-steam:before{content:"\f1b6"}.fa-steam-square:before{content:"\f1b7"}.fa-steam-symbol:before{content:"\f3f6"}.fa-step-backward:before{content:"\f048"}.fa-step-forward:before{content:"\f051"}.fa-stethoscope:before{content:"\f0f1"}.fa-sticker-mule:before{content:"\f3f7"}.fa-sticky-note:before{content:"\f249"}.fa-stop:before{content:"\f04d"}.fa-stop-circle:before{content:"\f28d"}.fa-stopwatch:before{content:"\f2f2"}.fa-store:before{content:"\f54e"}.fa-store-alt:before{content:"\f54f"}.fa-strava:before{content:"\f428"}.fa-stream:before{content:"\f550"}.fa-street-view:before{content:"\f21d"}.fa-strikethrough:before{content:"\f0cc"}.fa-stripe:before{content:"\f429"}.fa-stripe-s:before{content:"\f42a"}.fa-stroopwafel:before{content:"\f551"}.fa-studiovinari:before{content:"\f3f8"}.fa-stumbleupon:before{content:"\f1a4"}.fa-stumbleupon-circle:before{content:"\f1a3"}.fa-subscript:before{content:"\f12c"}.fa-subway:before{content:"\f239"}.fa-suitcase:before{content:"\f0f2"}.fa-suitcase-rolling:before{content:"\f5c1"}.fa-sun:before{content:"\f185"}.fa-superpowers:before{content:"\f2dd"}.fa-superscript:before{content:"\f12b"}.fa-supple:before{content:"\f3f9"}.fa-surprise:before{content:"\f5c2"}.fa-suse:before{content:"\f7d6"}.fa-swatchbook:before{content:"\f5c3"}.fa-swimmer:before{content:"\f5c4"}.fa-swimming-pool:before{content:"\f5c5"}.fa-synagogue:before{content:"\f69b"}.fa-sync:before{content:"\f021"}.fa-sync-alt:before{content:"\f2f1"}.fa-syringe:before{content:"\f48e"}.fa-table:before{content:"\f0ce"}.fa-table-tennis:before{content:"\f45d"}.fa-tablet:before{content:"\f10a"}.fa-tablet-alt:before{content:"\f3fa"}.fa-tablets:before{content:"\f490"}.fa-tachometer-alt:before{content:"\f3fd"}.fa-tag:before{content:"\f02b"}.fa-tags:before{content:"\f02c"}.fa-tape:before{content:"\f4db"}.fa-tasks:before{content:"\f0ae"}.fa-taxi:before{content:"\f1ba"}.fa-teamspeak:before{content:"\f4f9"}.fa-teeth:before{content:"\f62e"}.fa-teeth-open:before{content:"\f62f"}.fa-telegram:before{content:"\f2c6"}.fa-telegram-plane:before{content:"\f3fe"}.fa-temperature-high:before{content:"\f769"}.fa-temperature-low:before{content:"\f76b"}.fa-tencent-weibo:before{content:"\f1d5"}.fa-tenge:before{content:"\f7d7"}.fa-terminal:before{content:"\f120"}.fa-text-height:before{content:"\f034"}.fa-text-width:before{content:"\f035"}.fa-th:before{content:"\f00a"}.fa-th-large:before{content:"\f009"}.fa-th-list:before{content:"\f00b"}.fa-the-red-yeti:before{content:"\f69d"}.fa-theater-masks:before{content:"\f630"}.fa-themeco:before{content:"\f5c6"}.fa-themeisle:before{content:"\f2b2"}.fa-thermometer:before{content:"\f491"}.fa-thermometer-empty:before{content:"\f2cb"}.fa-thermometer-full:before{content:"\f2c7"}.fa-thermometer-half:before{content:"\f2c9"}.fa-thermometer-quarter:before{content:"\f2ca"}.fa-thermometer-three-quarters:before{content:"\f2c8"}.fa-think-peaks:before{content:"\f731"}.fa-thumbs-down:before{content:"\f165"}.fa-thumbs-up:before{content:"\f164"}.fa-thumbtack:before{content:"\f08d"}.fa-ticket-alt:before{content:"\f3ff"}.fa-times:before{content:"\f00d"}.fa-times-circle:before{content:"\f057"}.fa-tint:before{content:"\f043"}.fa-tint-slash:before{content:"\f5c7"}.fa-tired:before{content:"\f5c8"}.fa-toggle-off:before{content:"\f204"}.fa-toggle-on:before{content:"\f205"}.fa-toilet:before{content:"\f7d8"}.fa-toilet-paper:before{content:"\f71e"}.fa-toolbox:before{content:"\f552"}.fa-tools:before{content:"\f7d9"}.fa-tooth:before{content:"\f5c9"}.fa-torah:before{content:"\f6a0"}.fa-torii-gate:before{content:"\f6a1"}.fa-tractor:before{content:"\f722"}.fa-trade-federation:before{content:"\f513"}.fa-trademark:before{content:"\f25c"}.fa-traffic-light:before{content:"\f637"}.fa-train:before{content:"\f238"}.fa-tram:before{content:"\f7da"}.fa-transgender:before{content:"\f224"}.fa-transgender-alt:before{content:"\f225"}.fa-trash:before{content:"\f1f8"}.fa-trash-alt:before{content:"\f2ed"}.fa-tree:before{content:"\f1bb"}.fa-trello:before{content:"\f181"}.fa-tripadvisor:before{content:"\f262"}.fa-trophy:before{content:"\f091"}.fa-truck:before{content:"\f0d1"}.fa-truck-loading:before{content:"\f4de"}.fa-truck-monster:before{content:"\f63b"}.fa-truck-moving:before{content:"\f4df"}.fa-truck-pickup:before{content:"\f63c"}.fa-tshirt:before{content:"\f553"}.fa-tty:before{content:"\f1e4"}.fa-tumblr:before{content:"\f173"}.fa-tumblr-square:before{content:"\f174"}.fa-tv:before{content:"\f26c"}.fa-twitch:before{content:"\f1e8"}.fa-twitter:before{content:"\f099"}.fa-twitter-square:before{content:"\f081"}.fa-typo3:before{content:"\f42b"}.fa-uber:before{content:"\f402"}.fa-ubuntu:before{content:"\f7df"}.fa-uikit:before{content:"\f403"}.fa-umbrella:before{content:"\f0e9"}.fa-umbrella-beach:before{content:"\f5ca"}.fa-underline:before{content:"\f0cd"}.fa-undo:before{content:"\f0e2"}.fa-undo-alt:before{content:"\f2ea"}.fa-uniregistry:before{content:"\f404"}.fa-universal-access:before{content:"\f29a"}.fa-university:before{content:"\f19c"}.fa-unlink:before{content:"\f127"}.fa-unlock:before{content:"\f09c"}.fa-unlock-alt:before{content:"\f13e"}.fa-untappd:before{content:"\f405"}.fa-upload:before{content:"\f093"}.fa-ups:before{content:"\f7e0"}.fa-usb:before{content:"\f287"}.fa-user:before{content:"\f007"}.fa-user-alt:before{content:"\f406"}.fa-user-alt-slash:before{content:"\f4fa"}.fa-user-astronaut:before{content:"\f4fb"}.fa-user-check:before{content:"\f4fc"}.fa-user-circle:before{content:"\f2bd"}.fa-user-clock:before{content:"\f4fd"}.fa-user-cog:before{content:"\f4fe"}.fa-user-edit:before{content:"\f4ff"}.fa-user-friends:before{content:"\f500"}.fa-user-graduate:before{content:"\f501"}.fa-user-injured:before{content:"\f728"}.fa-user-lock:before{content:"\f502"}.fa-user-md:before{content:"\f0f0"}.fa-user-minus:before{content:"\f503"}.fa-user-ninja:before{content:"\f504"}.fa-user-plus:before{content:"\f234"}.fa-user-secret:before{content:"\f21b"}.fa-user-shield:before{content:"\f505"}.fa-user-slash:before{content:"\f506"}.fa-user-tag:before{content:"\f507"}.fa-user-tie:before{content:"\f508"}.fa-user-times:before{content:"\f235"}.fa-users:before{content:"\f0c0"}.fa-users-cog:before{content:"\f509"}.fa-usps:before{content:"\f7e1"}.fa-ussunnah:before{content:"\f407"}.fa-utensil-spoon:before{content:"\f2e5"}.fa-utensils:before{content:"\f2e7"}.fa-vaadin:before{content:"\f408"}.fa-vector-square:before{content:"\f5cb"}.fa-venus:before{content:"\f221"}.fa-venus-double:before{content:"\f226"}.fa-venus-mars:before{content:"\f228"}.fa-viacoin:before{content:"\f237"}.fa-viadeo:before{content:"\f2a9"}.fa-viadeo-square:before{content:"\f2aa"}.fa-vial:before{content:"\f492"}.fa-vials:before{content:"\f493"}.fa-viber:before{content:"\f409"}.fa-video:before{content:"\f03d"}.fa-video-slash:before{content:"\f4e2"}.fa-vihara:before{content:"\f6a7"}.fa-vimeo:before{content:"\f40a"}.fa-vimeo-square:before{content:"\f194"}.fa-vimeo-v:before{content:"\f27d"}.fa-vine:before{content:"\f1ca"}.fa-vk:before{content:"\f189"}.fa-vnv:before{content:"\f40b"}.fa-volleyball-ball:before{content:"\f45f"}.fa-volume-down:before{content:"\f027"}.fa-volume-mute:before{content:"\f6a9"}.fa-volume-off:before{content:"\f026"}.fa-volume-up:before{content:"\f028"}.fa-vote-yea:before{content:"\f772"}.fa-vr-cardboard:before{content:"\f729"}.fa-vuejs:before{content:"\f41f"}.fa-walking:before{content:"\f554"}.fa-wallet:before{content:"\f555"}.fa-warehouse:before{content:"\f494"}.fa-water:before{content:"\f773"}.fa-weebly:before{content:"\f5cc"}.fa-weibo:before{content:"\f18a"}.fa-weight:before{content:"\f496"}.fa-weight-hanging:before{content:"\f5cd"}.fa-weixin:before{content:"\f1d7"}.fa-whatsapp:before{content:"\f232"}.fa-whatsapp-square:before{content:"\f40c"}.fa-wheelchair:before{content:"\f193"}.fa-whmcs:before{content:"\f40d"}.fa-wifi:before{content:"\f1eb"}.fa-wikipedia-w:before{content:"\f266"}.fa-wind:before{content:"\f72e"}.fa-window-close:before{content:"\f410"}.fa-window-maximize:before{content:"\f2d0"}.fa-window-minimize:before{content:"\f2d1"}.fa-window-restore:before{content:"\f2d2"}.fa-windows:before{content:"\f17a"}.fa-wine-bottle:before{content:"\f72f"}.fa-wine-glass:before{content:"\f4e3"}.fa-wine-glass-alt:before{content:"\f5ce"}.fa-wix:before{content:"\f5cf"}.fa-wizards-of-the-coast:before{content:"\f730"}.fa-wolf-pack-battalion:before{content:"\f514"}.fa-won-sign:before{content:"\f159"}.fa-wordpress:before{content:"\f19a"}.fa-wordpress-simple:before{content:"\f411"}.fa-wpbeginner:before{content:"\f297"}.fa-wpexplorer:before{content:"\f2de"}.fa-wpforms:before{content:"\f298"}.fa-wpressr:before{content:"\f3e4"}.fa-wrench:before{content:"\f0ad"}.fa-x-ray:before{content:"\f497"}.fa-xbox:before{content:"\f412"}.fa-xing:before{content:"\f168"}.fa-xing-square:before{content:"\f169"}.fa-y-combinator:before{content:"\f23b"}.fa-yahoo:before{content:"\f19e"}.fa-yandex:before{content:"\f413"}.fa-yandex-international:before{content:"\f414"}.fa-yarn:before{content:"\f7e3"}.fa-yelp:before{content:"\f1e9"}.fa-yen-sign:before{content:"\f157"}.fa-yin-yang:before{content:"\f6ad"}.fa-yoast:before{content:"\f2b1"}.fa-youtube:before{content:"\f167"}.fa-youtube-square:before{content:"\f431"}.fa-zhihu:before{content:"\f63f"}.sr-only{border:0;clip:rect(0,0,0,0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.sr-only-focusable:active,.sr-only-focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}@font-face{font-family:"Font Awesome 5 Brands";font-style:normal;font-weight:normal;src:url(../webfonts/fa-brands-400.eot);src:url(../webfonts/fa-brands-400.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-brands-400.woff2) format("woff2"),url(../webfonts/fa-brands-400.woff) format("woff"),url(../webfonts/fa-brands-400.ttf) format("truetype"),url(../webfonts/fa-brands-400.svg#fontawesome) format("svg")}.fab{font-family:"Font Awesome 5 Brands"}@font-face{font-family:"Font Awesome 5 Free";font-style:normal;font-weight:400;src:url(../webfonts/fa-regular-400.eot);src:url(../webfonts/fa-regular-400.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-regular-400.woff2) format("woff2"),url(../webfonts/fa-regular-400.woff) format("woff"),url(../webfonts/fa-regular-400.ttf) format("truetype"),url(../webfonts/fa-regular-400.svg#fontawesome) format("svg")}.far{font-weight:400}@font-face{font-family:"Font Awesome 5 Free";font-style:normal;font-weight:900;src:url(../webfonts/fa-solid-900.eot);src:url(../webfonts/fa-solid-900.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-solid-900.woff2) format("woff2"),url(../webfonts/fa-solid-900.woff) format("woff"),url(../webfonts/fa-solid-900.ttf) format("truetype"),url(../webfonts/fa-solid-900.svg#fontawesome) format("svg")}.fa,.far,.fas{font-family:"Font Awesome 5 Free"}.fa,.fas{font-weight:900} \ No newline at end of file diff --git a/css/hugo-theme.css b/css/hugo-theme.css new file mode 100644 index 0000000000..9bd8a1e4e7 --- /dev/null +++ b/css/hugo-theme.css @@ -0,0 +1,245 @@ +/* Insert here special css for hugo theme, on top of any other imported css */ + + +/* Table of contents */ + +.progress ul { + list-style: none; + margin: 0; + padding: 0 15px; +} + +#TableOfContents { + font-size: 13px !important; + max-height: 85vh; + overflow: auto; + padding: 15px 5px !important; +} + +#TableOfContents > ul > li > a { + font-weight: bold; +} + +body { + font-size: 16px !important; + color: #323232 !important; +} + +#body a.highlight, #body a.highlight:hover, #body a.highlight:focus { + text-decoration: none; + outline: none; + outline: 0; +} +#body a.highlight { + line-height: 1.1; + display: inline-block; +} +#body a.highlight:after { + display: block; + content: ""; + height: 1px; + width: 0%; + background-color: #0082a7; /*#CE3B2F*/ + -webkit-transition: width 0.5s ease; + -moz-transition: width 0.5s ease; + -ms-transition: width 0.5s ease; + transition: width 0.5s ease; +} +#body a.highlight:hover:after, #body a.highlight:focus:after { + width: 100%; +} +.progress { + position:absolute; + background-color: rgba(246, 246, 246, 0.97); + width: auto; + border: thin solid #ECECEC; + display:none; + z-index:200; +} + +#toc-menu { + border-right: thin solid #DAD8D8 !important; + padding-right: 1rem !important; + margin-right: 0.5rem !important; +} + +#sidebar-toggle-span { + border-right: thin solid #DAD8D8 !important; + padding-right: 0.5rem !important; + margin-right: 1rem !important; +} + +.btn { + display: inline-block !important; + padding: 6px 12px !important; + margin-bottom: 0 !important; + font-size: 14px !important; + font-weight: normal !important; + line-height: 1.42857143 !important; + text-align: center !important; + white-space: nowrap !important; + vertical-align: middle !important; + -ms-touch-action: manipulation !important; + touch-action: manipulation !important; + cursor: pointer !important; + -webkit-user-select: none !important; + -moz-user-select: none !important; + -ms-user-select: none !important; + user-select: none !important; + background-image: none !important; + border: 1px solid transparent !important; + border-radius: 4px !important; + -webkit-transition: all 0.15s !important; + -moz-transition: all 0.15s !important; + transition: all 0.15s !important; +} +.btn:focus { + /*outline: thin dotted; + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px;*/ + outline: none !important; +} +.btn:hover, +.btn:focus { + color: #2b2b2b !important; + text-decoration: none !important; +} + +.btn-default { + color: #333 !important; + background-color: #fff !important; + border-color: #ccc !important; +} +.btn-default:hover, +.btn-default:focus, +.btn-default:active { + color: #fff !important; + background-color: #9e9e9e !important; + border-color: #9e9e9e !important; +} +.btn-default:active { + background-image: none !important; +} + +/* anchors */ +.anchor { + color: #00bdf3; + font-size: 0.5em; + cursor:pointer; + visibility:hidden; + margin-left: 0.5em; + position: absolute; + margin-top:0.1em; +} + +h2:hover .anchor, h3:hover .anchor, h4:hover .anchor, h5:hover .anchor, h6:hover .anchor { + visibility:visible; +} + +/* Redfines headers style */ + +h2, h3, h4, h5, h6 { + font-weight: 400; + line-height: 1.1; +} + +h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { + font-weight: inherit; +} + +h2 { + font-size: 2.5rem; + line-height: 110% !important; + margin: 2.5rem 0 1.5rem 0; +} + +h3 { + font-size: 2rem; + line-height: 110% !important; + margin: 2rem 0 1rem 0; +} + +h4 { + font-size: 1.5rem; + line-height: 110% !important; + margin: 1.5rem 0 0.75rem 0; +} + +h5 { + font-size: 1rem; + line-height: 110% !important; + margin: 1rem 0 0.2rem 0; +} + +h6 { + font-size: 0.5rem; + line-height: 110% !important; + margin: 0.5rem 0 0.2rem 0; +} + +p { + margin: 1rem 0; +} + +figcaption h4 { + font-weight: 300 !important; + opacity: .85; + font-size: 1em; + text-align: center; + margin-top: -1.5em; +} + +.select-style { + border: 0; + width: 22%; + border-radius: 0.6em; + padding: 0px 6px; + overflow: hidden; + display: inline-flex; + background-color: rgba(0, 0, 0, 0.1); + color: white; +} + +.select-style svg { + fill: #ccc; + width: 14px; + height: 14px; + pointer-events: none; + margin: auto; +} + +.select-style svg:hover { + fill: #e6e6e6; +} + +.select-style select { + padding: 0; + width: 130%; + color: white; + border: none; + box-shadow: none; + background: transparent; + background-image: none; + -webkit-appearance: none; + margin: auto; + margin-left: 0.2em; + margin-right: -27px +} + +.select-style select:focus { + outline: none; +} + +.select-style :hover { + cursor: pointer; +} + +@media only all and (max-width: 47.938em) { + #breadcrumbs .links, #top-github-link-text { + display: none; + } +} + +.is-sticky #top-bar { + box-shadow: -1px 2px 5px 1px rgba(0, 0, 0, 0.1); +} \ No newline at end of file diff --git a/css/hybrid.css b/css/hybrid.css new file mode 100644 index 0000000000..29735a1890 --- /dev/null +++ b/css/hybrid.css @@ -0,0 +1,102 @@ +/* + +vim-hybrid theme by w0ng (https://github.com/w0ng/vim-hybrid) + +*/ + +/*background color*/ +.hljs { + display: block; + overflow-x: auto; + padding: 0.5em; + background: #1d1f21; +} + +/*selection color*/ +.hljs::selection, +.hljs span::selection { + background: #373b41; +} + +.hljs::-moz-selection, +.hljs span::-moz-selection { + background: #373b41; +} + +/*foreground color*/ +.hljs { + color: #c5c8c6; +} + +/*color: fg_yellow*/ +.hljs-title, +.hljs-name { + color: #f0c674; +} + +/*color: fg_comment*/ +.hljs-comment, +.hljs-meta, +.hljs-meta .hljs-keyword { + color: #707880; +} + +/*color: fg_red*/ +.hljs-number, +.hljs-symbol, +.hljs-literal, +.hljs-deletion, +.hljs-link { + color: #cc6666 +} + +/*color: fg_green*/ +.hljs-string, +.hljs-doctag, +.hljs-addition, +.hljs-regexp, +.hljs-selector-attr, +.hljs-selector-pseudo { + color: #b5bd68; +} + +/*color: fg_purple*/ +.hljs-attribute, +.hljs-code, +.hljs-selector-id { + color: #b294bb; +} + +/*color: fg_blue*/ +.hljs-keyword, +.hljs-selector-tag, +.hljs-bullet, +.hljs-tag { + color: #81a2be; +} + +/*color: fg_aqua*/ +.hljs-subst, +.hljs-variable, +.hljs-template-tag, +.hljs-template-variable { + color: #8abeb7; +} + +/*color: fg_orange*/ +.hljs-type, +.hljs-built_in, +.hljs-builtin-name, +.hljs-quote, +.hljs-section, +.hljs-selector-class { + color: #de935f; +} + +.hljs-emphasis { + font-style: italic; +} + +.hljs-strong { + font-weight: bold; +} diff --git a/css/nucleus.css b/css/nucleus.css new file mode 100644 index 0000000000..1897fc5d6d --- /dev/null +++ b/css/nucleus.css @@ -0,0 +1,615 @@ +*, *::before, *::after { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; } + +@-webkit-viewport { + width: device-width; } +@-moz-viewport { + width: device-width; } +@-ms-viewport { + width: device-width; } +@-o-viewport { + width: device-width; } +@viewport { + width: device-width; } +html { + font-size: 100%; + -ms-text-size-adjust: 100%; + -webkit-text-size-adjust: 100%; } + +body { + margin: 0; } + +article, +aside, +details, +figcaption, +figure, +footer, +header, +hgroup, +main, +nav, +section, +summary { + display: block; } + +audio, +canvas, +progress, +video { + display: inline-block; + vertical-align: baseline; } + +audio:not([controls]) { + display: none; + height: 0; } + +[hidden], +template { + display: none; } + +a { + background: transparent; + text-decoration: none; } + +a:active, +a:hover { + outline: 0; } + +abbr[title] { + border-bottom: 1px dotted; } + +b, +strong { + font-weight: bold; } + +dfn { + font-style: italic; } + +mark { + background: #FFFF27; + color: #333; } + +sub, +sup { + font-size: 0.8rem; + line-height: 0; + position: relative; + vertical-align: baseline; } + +sup { + top: -0.5em; } + +sub { + bottom: -0.25em; } + +img { + border: 0; + max-width: 100%; } + +svg:not(:root) { + overflow: hidden; } + +figure { + margin: 1em 40px; } + +hr { + height: 0; } + +pre { + overflow: auto; } + +button, +input, +optgroup, +select, +textarea { + color: inherit; + font: inherit; + margin: 0; } + +button { + overflow: visible; } + +button, +select { + text-transform: none; } + +button, +html input[type="button"], +input[type="reset"], +input[type="submit"] { + -webkit-appearance: button; + cursor: pointer; } + +button[disabled], +html input[disabled] { + cursor: default; } + +button::-moz-focus-inner, +input::-moz-focus-inner { + border: 0; + padding: 0; } + +input { + line-height: normal; } + +input[type="checkbox"], +input[type="radio"] { + padding: 0; } + +input[type="number"]::-webkit-inner-spin-button, +input[type="number"]::-webkit-outer-spin-button { + height: auto; } + +input[type="search"] { + -webkit-appearance: textfield; } + +input[type="search"]::-webkit-search-cancel-button, +input[type="search"]::-webkit-search-decoration { + -webkit-appearance: none; } + +legend { + border: 0; + padding: 0; } + +textarea { + overflow: auto; } + +optgroup { + font-weight: bold; } + +table { + border-collapse: collapse; + border-spacing: 0; + table-layout: fixed; + width: 100%; } + +tr, td, th { + vertical-align: middle; } + +th, td { + padding: 0.425rem 0; } + +th { + text-align: left; } + +.container { + width: 75em; + margin: 0 auto; + padding: 0; } + @media only all and (min-width: 60em) and (max-width: 74.938em) { + .container { + width: 60em; } } + @media only all and (min-width: 48em) and (max-width: 59.938em) { + .container { + width: 48em; } } + @media only all and (min-width: 30.063em) and (max-width: 47.938em) { + .container { + width: 30em; } } + @media only all and (max-width: 30em) { + .container { + width: 100%; } } + +.grid { + display: -webkit-box; + display: -moz-box; + display: box; + display: -webkit-flex; + display: -moz-flex; + display: -ms-flexbox; + display: flex; + -webkit-flex-flow: row; + -moz-flex-flow: row; + flex-flow: row; + list-style: none; + margin: 0; + padding: 0; } + @media only all and (max-width: 47.938em) { + .grid { + -webkit-flex-flow: row wrap; + -moz-flex-flow: row wrap; + flex-flow: row wrap; } } + +.block { + -webkit-box-flex: 1; + -moz-box-flex: 1; + box-flex: 1; + -webkit-flex: 1; + -moz-flex: 1; + -ms-flex: 1; + flex: 1; + min-width: 0; + min-height: 0; } + @media only all and (max-width: 47.938em) { + .block { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 100%; + -moz-flex: 0 100%; + -ms-flex: 0 100%; + flex: 0 100%; } } + +.content { + margin: 0.625rem; + padding: 0.938rem; } + +@media only all and (max-width: 47.938em) { + body [class*="size-"] { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 100%; + -moz-flex: 0 100%; + -ms-flex: 0 100%; + flex: 0 100%; } } + +.size-1-2 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 50%; + -moz-flex: 0 50%; + -ms-flex: 0 50%; + flex: 0 50%; } + +.size-1-3 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 33.33333%; + -moz-flex: 0 33.33333%; + -ms-flex: 0 33.33333%; + flex: 0 33.33333%; } + +.size-1-4 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 25%; + -moz-flex: 0 25%; + -ms-flex: 0 25%; + flex: 0 25%; } + +.size-1-5 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 20%; + -moz-flex: 0 20%; + -ms-flex: 0 20%; + flex: 0 20%; } + +.size-1-6 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 16.66667%; + -moz-flex: 0 16.66667%; + -ms-flex: 0 16.66667%; + flex: 0 16.66667%; } + +.size-1-7 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 14.28571%; + -moz-flex: 0 14.28571%; + -ms-flex: 0 14.28571%; + flex: 0 14.28571%; } + +.size-1-8 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 12.5%; + -moz-flex: 0 12.5%; + -ms-flex: 0 12.5%; + flex: 0 12.5%; } + +.size-1-9 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 11.11111%; + -moz-flex: 0 11.11111%; + -ms-flex: 0 11.11111%; + flex: 0 11.11111%; } + +.size-1-10 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 10%; + -moz-flex: 0 10%; + -ms-flex: 0 10%; + flex: 0 10%; } + +.size-1-11 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 9.09091%; + -moz-flex: 0 9.09091%; + -ms-flex: 0 9.09091%; + flex: 0 9.09091%; } + +.size-1-12 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 8.33333%; + -moz-flex: 0 8.33333%; + -ms-flex: 0 8.33333%; + flex: 0 8.33333%; } + +@media only all and (min-width: 48em) and (max-width: 59.938em) { + .size-tablet-1-2 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 50%; + -moz-flex: 0 50%; + -ms-flex: 0 50%; + flex: 0 50%; } + + .size-tablet-1-3 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 33.33333%; + -moz-flex: 0 33.33333%; + -ms-flex: 0 33.33333%; + flex: 0 33.33333%; } + + .size-tablet-1-4 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 25%; + -moz-flex: 0 25%; + -ms-flex: 0 25%; + flex: 0 25%; } + + .size-tablet-1-5 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 20%; + -moz-flex: 0 20%; + -ms-flex: 0 20%; + flex: 0 20%; } + + .size-tablet-1-6 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 16.66667%; + -moz-flex: 0 16.66667%; + -ms-flex: 0 16.66667%; + flex: 0 16.66667%; } + + .size-tablet-1-7 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 14.28571%; + -moz-flex: 0 14.28571%; + -ms-flex: 0 14.28571%; + flex: 0 14.28571%; } + + .size-tablet-1-8 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 12.5%; + -moz-flex: 0 12.5%; + -ms-flex: 0 12.5%; + flex: 0 12.5%; } + + .size-tablet-1-9 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 11.11111%; + -moz-flex: 0 11.11111%; + -ms-flex: 0 11.11111%; + flex: 0 11.11111%; } + + .size-tablet-1-10 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 10%; + -moz-flex: 0 10%; + -ms-flex: 0 10%; + flex: 0 10%; } + + .size-tablet-1-11 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 9.09091%; + -moz-flex: 0 9.09091%; + -ms-flex: 0 9.09091%; + flex: 0 9.09091%; } + + .size-tablet-1-12 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 8.33333%; + -moz-flex: 0 8.33333%; + -ms-flex: 0 8.33333%; + flex: 0 8.33333%; } } +@media only all and (max-width: 47.938em) { + @supports not (flex-wrap: wrap) { + .grid { + display: block; + -webkit-box-lines: inherit; + -moz-box-lines: inherit; + box-lines: inherit; + -webkit-flex-wrap: inherit; + -moz-flex-wrap: inherit; + -ms-flex-wrap: inherit; + flex-wrap: inherit; } + + .block { + display: block; + -webkit-box-flex: inherit; + -moz-box-flex: inherit; + box-flex: inherit; + -webkit-flex: inherit; + -moz-flex: inherit; + -ms-flex: inherit; + flex: inherit; } } } +.first-block { + -webkit-box-ordinal-group: 0; + -webkit-order: -1; + -ms-flex-order: -1; + order: -1; } + +.last-block { + -webkit-box-ordinal-group: 2; + -webkit-order: 1; + -ms-flex-order: 1; + order: 1; } + +.fixed-blocks { + -webkit-flex-flow: row wrap; + -moz-flex-flow: row wrap; + flex-flow: row wrap; } + .fixed-blocks .block { + -webkit-box-flex: inherit; + -moz-box-flex: inherit; + box-flex: inherit; + -webkit-flex: inherit; + -moz-flex: inherit; + -ms-flex: inherit; + flex: inherit; + width: 25%; } + @media only all and (min-width: 60em) and (max-width: 74.938em) { + .fixed-blocks .block { + width: 33.33333%; } } + @media only all and (min-width: 48em) and (max-width: 59.938em) { + .fixed-blocks .block { + width: 50%; } } + @media only all and (max-width: 47.938em) { + .fixed-blocks .block { + width: 100%; } } + +body { + font-size: 1.05rem; + line-height: 1.7; } + +h1, h2, h3, h4, h5, h6 { + margin: 0.85rem 0 1.7rem 0; + text-rendering: optimizeLegibility; } + +h1 { + font-size: 3.25rem; } + +h2 { + font-size: 2.55rem; } + +h3 { + font-size: 2.15rem; } + +h4 { + font-size: 1.8rem; } + +h5 { + font-size: 1.4rem; } + +h6 { + font-size: 0.9rem; } + +p { + margin: 1.7rem 0; } + +ul, ol { + margin-top: 1.7rem; + margin-bottom: 1.7rem; } + ul ul, ul ol, ol ul, ol ol { + margin-top: 0; + margin-bottom: 0; } + +blockquote { + margin: 1.7rem 0; + padding-left: 0.85rem; } + +cite { + display: block; + font-size: 0.925rem; } + cite:before { + content: "\2014 \0020"; } + +pre { + margin: 1.7rem 0; + padding: 0.938rem; } + +code { + vertical-align: bottom; } + +small { + font-size: 0.925rem; } + +hr { + border-left: none; + border-right: none; + border-top: none; + margin: 1.7rem 0; } + +fieldset { + border: 0; + padding: 0.938rem; + margin: 0 0 1.7rem 0; } + +input, +label, +select { + display: block; } + +label { + margin-bottom: 0.425rem; } + label.required:after { + content: "*"; } + label abbr { + display: none; } + +textarea, input[type="email"], input[type="number"], input[type="password"], input[type="search"], input[type="tel"], input[type="text"], input[type="url"], input[type="color"], input[type="date"], input[type="datetime"], input[type="datetime-local"], input[type="month"], input[type="time"], input[type="week"], select[multiple=multiple] { + -webkit-transition: border-color; + -moz-transition: border-color; + transition: border-color; + border-radius: 0.1875rem; + margin-bottom: 0.85rem; + padding: 0.425rem 0.425rem; + width: 100%; } + textarea:focus, input[type="email"]:focus, input[type="number"]:focus, input[type="password"]:focus, input[type="search"]:focus, input[type="tel"]:focus, input[type="text"]:focus, input[type="url"]:focus, input[type="color"]:focus, input[type="date"]:focus, input[type="datetime"]:focus, input[type="datetime-local"]:focus, input[type="month"]:focus, input[type="time"]:focus, input[type="week"]:focus, select[multiple=multiple]:focus { + outline: none; } + +textarea { + resize: vertical; } + +input[type="checkbox"], input[type="radio"] { + display: inline; + margin-right: 0.425rem; } + +input[type="file"] { + width: 100%; } + +select { + width: auto; + max-width: 100%; + margin-bottom: 1.7rem; } + +button, +input[type="submit"] { + cursor: pointer; + user-select: none; + vertical-align: middle; + white-space: nowrap; + border: inherit; } diff --git a/css/perfect-scrollbar.min.css b/css/perfect-scrollbar.min.css new file mode 100644 index 0000000000..ebd2cb43bc --- /dev/null +++ b/css/perfect-scrollbar.min.css @@ -0,0 +1,2 @@ +/* perfect-scrollbar v0.6.13 */ +.ps-container{-ms-touch-action:auto;touch-action:auto;overflow:hidden !important;-ms-overflow-style:none}@supports (-ms-overflow-style: none){.ps-container{overflow:auto !important}}@media screen and (-ms-high-contrast: active), (-ms-high-contrast: none){.ps-container{overflow:auto !important}}.ps-container.ps-active-x>.ps-scrollbar-x-rail,.ps-container.ps-active-y>.ps-scrollbar-y-rail{display:block;background-color:transparent}.ps-container.ps-in-scrolling.ps-x>.ps-scrollbar-x-rail{background-color:#eee;opacity:.9}.ps-container.ps-in-scrolling.ps-x>.ps-scrollbar-x-rail>.ps-scrollbar-x{background-color:#999;height:11px}.ps-container.ps-in-scrolling.ps-y>.ps-scrollbar-y-rail{background-color:#eee;opacity:.9}.ps-container.ps-in-scrolling.ps-y>.ps-scrollbar-y-rail>.ps-scrollbar-y{background-color:#999;width:11px}.ps-container>.ps-scrollbar-x-rail{display:none;position:absolute;opacity:0;-webkit-transition:background-color .2s linear, opacity .2s linear;-o-transition:background-color .2s linear, opacity .2s linear;-moz-transition:background-color .2s linear, opacity .2s linear;transition:background-color .2s linear, opacity .2s linear;bottom:0px;height:15px}.ps-container>.ps-scrollbar-x-rail>.ps-scrollbar-x{position:absolute;background-color:#aaa;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out;-o-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out;-moz-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out;bottom:2px;height:6px}.ps-container>.ps-scrollbar-x-rail:hover>.ps-scrollbar-x,.ps-container>.ps-scrollbar-x-rail:active>.ps-scrollbar-x{height:11px}.ps-container>.ps-scrollbar-y-rail{display:none;position:absolute;opacity:0;-webkit-transition:background-color .2s linear, opacity .2s linear;-o-transition:background-color .2s linear, opacity .2s linear;-moz-transition:background-color .2s linear, opacity .2s linear;transition:background-color .2s linear, opacity .2s linear;right:0;width:15px}.ps-container>.ps-scrollbar-y-rail>.ps-scrollbar-y{position:absolute;background-color:#aaa;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out;-o-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out;-moz-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out;right:2px;width:6px}.ps-container>.ps-scrollbar-y-rail:hover>.ps-scrollbar-y,.ps-container>.ps-scrollbar-y-rail:active>.ps-scrollbar-y{width:11px}.ps-container:hover.ps-in-scrolling.ps-x>.ps-scrollbar-x-rail{background-color:#eee;opacity:.9}.ps-container:hover.ps-in-scrolling.ps-x>.ps-scrollbar-x-rail>.ps-scrollbar-x{background-color:#999;height:11px}.ps-container:hover.ps-in-scrolling.ps-y>.ps-scrollbar-y-rail{background-color:#eee;opacity:.9}.ps-container:hover.ps-in-scrolling.ps-y>.ps-scrollbar-y-rail>.ps-scrollbar-y{background-color:#999;width:11px}.ps-container:hover>.ps-scrollbar-x-rail,.ps-container:hover>.ps-scrollbar-y-rail{opacity:.6}.ps-container:hover>.ps-scrollbar-x-rail:hover{background-color:#eee;opacity:.9}.ps-container:hover>.ps-scrollbar-x-rail:hover>.ps-scrollbar-x{background-color:#999}.ps-container:hover>.ps-scrollbar-y-rail:hover{background-color:#eee;opacity:.9}.ps-container:hover>.ps-scrollbar-y-rail:hover>.ps-scrollbar-y{background-color:#999} diff --git a/css/tabs.css b/css/tabs.css new file mode 100644 index 0000000000..2ad2728772 --- /dev/null +++ b/css/tabs.css @@ -0,0 +1,43 @@ +#body .tab-nav-button { + border-width: 1px 1px 1px 1px !important; + border-color: #ccc !important; + border-radius: 4px 4px 0 0 !important; + background-color: #ddd !important; + float: left; + display: block; + position: relative; + margin-left: 4px; + bottom: -1px; +} +#body .tab-nav-button:first-child { + margin-left: 0px; +} +#body .tab-nav-button.active { + background-color: #fff !important; + border-bottom-color: #fff !important; +} + +#body .tab-panel { + margin-top: 32px; + margin-bottom: 32px; +} +#body .tab-content { + display: block; + clear: both; + padding: 8px; + border-width: 1px; + border-style: solid; + border-color: #ccc; +} +#body .tab-content .tab-item{ + display: none; +} + +#body .tab-content .tab-item.active{ + display: block; +} + +#body .tab-item pre{ + margin-bottom: 0; + margin-top: 0; +} diff --git a/css/tags.css b/css/tags.css new file mode 100644 index 0000000000..495d2f9f71 --- /dev/null +++ b/css/tags.css @@ -0,0 +1,49 @@ +/* Tags */ + +#head-tags{ + margin-left:1em; + margin-top:1em; +} + +#body .tags a.tag-link { + display: inline-block; + line-height: 2em; + font-size: 0.8em; + position: relative; + margin: 0 16px 8px 0; + padding: 0 10px 0 12px; + background: #8451a1; + + -webkit-border-bottom-right-radius: 3px; + border-bottom-right-radius: 3px; + -webkit-border-top-right-radius: 3px; + border-top-right-radius: 3px; + + -webkit-box-shadow: 0 1px 2px rgba(0,0,0,0.2); + box-shadow: 0 1px 2px rgba(0,0,0,0.2); + color: #fff; +} + +#body .tags a.tag-link:before { + content: ""; + position: absolute; + top:0; + left: -1em; + width: 0; + height: 0; + border-color: transparent #8451a1 transparent transparent; + border-style: solid; + border-width: 1em 1em 1em 0; +} + +#body .tags a.tag-link:after { + content: ""; + position: absolute; + top: 10px; + left: 1px; + width: 5px; + height: 5px; + -webkit-border-radius: 50%; + border-radius: 100%; + background: #fff; +} diff --git a/css/theme-blue.css b/css/theme-blue.css new file mode 100644 index 0000000000..91369947ca --- /dev/null +++ b/css/theme-blue.css @@ -0,0 +1,133 @@ + +:root{ + + --MAIN-TEXT-color:#323232; /* Color of text by default */ + --MAIN-TITLES-TEXT-color: #5e5e5e; /* Color of titles h2-h3-h4-h5 */ + --MAIN-LINK-color:#1C90F3; /* Color of links */ + --MAIN-LINK-HOVER-color:#167ad0; /* Color of hovered links */ + --MAIN-ANCHOR-color: #1C90F3; /* color of anchors on titles */ + + --MENU-HOME-LINK-color: #323232; /* Color of the home button text */ + --MENU-HOME-LINK-HOVER-color: #5e5e5e; /* Color of the hovered home button text */ + + --MENU-HEADER-BG-color:#1C90F3; /* Background color of menu header */ + --MENU-HEADER-BORDER-color:#33a1ff; /*Color of menu header border */ + + --MENU-SEARCH-BG-color:#167ad0; /* Search field background color (by default borders + icons) */ + --MENU-SEARCH-BOX-color: #33a1ff; /* Override search field border color */ + --MENU-SEARCH-BOX-ICONS-color: #a1d2fd; /* Override search field icons color */ + + --MENU-SECTIONS-ACTIVE-BG-color:#20272b; /* Background color of the active section and its childs */ + --MENU-SECTIONS-BG-color:#252c31; /* Background color of other sections */ + --MENU-SECTIONS-LINK-color: #ccc; /* Color of links in menu */ + --MENU-SECTIONS-LINK-HOVER-color: #e6e6e6; /* Color of links in menu, when hovered */ + --MENU-SECTION-ACTIVE-CATEGORY-color: #777; /* Color of active category text */ + --MENU-SECTION-ACTIVE-CATEGORY-BG-color: #fff; /* Color of background for the active category (only) */ + + --MENU-VISITED-color: #33a1ff; /* Color of 'page visited' icons in menu */ + --MENU-SECTION-HR-color: #20272b; /* Color of
separator in menu */ + +} + +body { + color: var(--MAIN-TEXT-color) !important; +} + +textarea:focus, input[type="email"]:focus, input[type="number"]:focus, input[type="password"]:focus, input[type="search"]:focus, input[type="tel"]:focus, input[type="text"]:focus, input[type="url"]:focus, input[type="color"]:focus, input[type="date"]:focus, input[type="datetime"]:focus, input[type="datetime-local"]:focus, input[type="month"]:focus, input[type="time"]:focus, input[type="week"]:focus, select[multiple=multiple]:focus { + border-color: none; + box-shadow: none; +} + +h2, h3, h4, h5 { + color: var(--MAIN-TITLES-TEXT-color) !important; +} + +a { + color: var(--MAIN-LINK-color); +} + +.anchor { + color: var(--MAIN-ANCHOR-color); +} + +a:hover { + color: var(--MAIN-LINK-HOVER-color); +} + +#sidebar ul li.visited > a .read-icon { + color: var(--MENU-VISITED-color); +} + +#body a.highlight:after { + display: block; + content: ""; + height: 1px; + width: 0%; + -webkit-transition: width 0.5s ease; + -moz-transition: width 0.5s ease; + -ms-transition: width 0.5s ease; + transition: width 0.5s ease; + background-color: var(--MAIN-LINK-HOVER-color); +} +#sidebar { + background-color: var(--MENU-SECTIONS-BG-color); +} +#sidebar #header-wrapper { + background: var(--MENU-HEADER-BG-color); + color: var(--MENU-SEARCH-BOX-color); + border-color: var(--MENU-HEADER-BORDER-color); +} +#sidebar .searchbox { + border-color: var(--MENU-SEARCH-BOX-color); + background: var(--MENU-SEARCH-BG-color); +} +#sidebar ul.topics > li.parent, #sidebar ul.topics > li.active { + background: var(--MENU-SECTIONS-ACTIVE-BG-color); +} +#sidebar ul.topics li li { + border-top: 1px solid rgba(255,255,255, 0.1); + border-left: 1px solid rgba(255,255,255, 0.1); + background: rgba(127,127,127, 0.1) +} +#sidebar .searchbox * { + color: var(--MENU-SEARCH-BOX-ICONS-color); +} + +#sidebar a { + color: var(--MENU-SECTIONS-LINK-color); +} + +#sidebar a:hover { + color: var(--MENU-SECTIONS-LINK-HOVER-color); +} + +#sidebar ul li.active > a { + background: var(--MENU-SECTION-ACTIVE-CATEGORY-BG-color); + color: var(--MENU-SECTION-ACTIVE-CATEGORY-color) !important; +} + +#sidebar hr { + border-color: var(--MENU-SECTION-HR-color); +} + +#body .tags a.tag-link { + background-color: var(--MENU-HEADER-BG-color); +} + +#body .tags a.tag-link:before { + border-right-color: var(--MENU-HEADER-BG-color); +} + +#homelinks { + background: var(--MENU-HEADER-BG-color); + background-color: var(--MENU-HEADER-BORDER-color); + border-bottom-color: var(--MENU-HEADER-BORDER-color); +} + +#homelinks a { + color: var(--MENU-HOME-LINK-color); +} + +#homelinks a:hover { + color: var(--MENU-HOME-LINK-HOVERED-color); +} \ No newline at end of file diff --git a/css/theme-green.css b/css/theme-green.css new file mode 100644 index 0000000000..c074679aaf --- /dev/null +++ b/css/theme-green.css @@ -0,0 +1,128 @@ + +:root{ + + --MAIN-TEXT-color:#323232; /* Color of text by default */ + --MAIN-TITLES-TEXT-color: #5e5e5e; /* Color of titles h2-h3-h4-h5 */ + --MAIN-LINK-color:#599a3e; /* Color of links */ + --MAIN-LINK-HOVER-color:#3f6d2c; /* Color of hovered links */ + --MAIN-ANCHOR-color: #599a3e; /* color of anchors on titles */ + + --MENU-HOME-LINK-color: #323232; /* Color of the home button text */ + --MENU-HOME-LINK-HOVER-color: #5e5e5e; /* Color of the hovered home button text */ + + --MENU-HEADER-BG-color:#74b559; /* Background color of menu header */ + --MENU-HEADER-BORDER-color:#9cd484; /*Color of menu header border */ + + --MENU-SEARCH-BG-color:#599a3e; /* Search field background color (by default borders + icons) */ + --MENU-SEARCH-BOX-color: #84c767; /* Override search field border color */ + --MENU-SEARCH-BOX-ICONS-color: #c7f7c4; /* Override search field icons color */ + + --MENU-SECTIONS-ACTIVE-BG-color:#1b211c; /* Background color of the active section and its childs */ + --MENU-SECTIONS-BG-color:#222723; /* Background color of other sections */ + --MENU-SECTIONS-LINK-color: #ccc; /* Color of links in menu */ + --MENU-SECTIONS-LINK-HOVER-color: #e6e6e6; /* Color of links in menu, when hovered */ + --MENU-SECTION-ACTIVE-CATEGORY-color: #777; /* Color of active category text */ + --MENU-SECTION-ACTIVE-CATEGORY-BG-color: #fff; /* Color of background for the active category (only) */ + + --MENU-VISITED-color: #599a3e; /* Color of 'page visited' icons in menu */ + --MENU-SECTION-HR-color: #18211c; /* Color of
separator in menu */ + +} + +body { + color: var(--MAIN-TEXT-color) !important; +} + +textarea:focus, input[type="email"]:focus, input[type="number"]:focus, input[type="password"]:focus, input[type="search"]:focus, input[type="tel"]:focus, input[type="text"]:focus, input[type="url"]:focus, input[type="color"]:focus, input[type="date"]:focus, input[type="datetime"]:focus, input[type="datetime-local"]:focus, input[type="month"]:focus, input[type="time"]:focus, input[type="week"]:focus, select[multiple=multiple]:focus { + border-color: none; + box-shadow: none; +} + +h2, h3, h4, h5 { + color: var(--MAIN-TITLES-TEXT-color) !important; +} + +a { + color: var(--MAIN-LINK-color); +} + +.anchor { + color: var(--MAIN-ANCHOR-color); +} + +a:hover { + color: var(--MAIN-LINK-HOVER-color); +} + +#sidebar ul li.visited > a .read-icon { + color: var(--MENU-VISITED-color); +} + +#body a.highlight:after { + display: block; + content: ""; + height: 1px; + width: 0%; + -webkit-transition: width 0.5s ease; + -moz-transition: width 0.5s ease; + -ms-transition: width 0.5s ease; + transition: width 0.5s ease; + background-color: var(--MAIN-LINK-HOVER-color); +} +#sidebar { + background-color: var(--MENU-SECTIONS-BG-color); +} +#sidebar #header-wrapper { + background: var(--MENU-HEADER-BG-color); + color: var(--MENU-SEARCH-BOX-color); + border-color: var(--MENU-HEADER-BORDER-color); +} +#sidebar .searchbox { + border-color: var(--MENU-SEARCH-BOX-color); + background: var(--MENU-SEARCH-BG-color); +} +#sidebar ul.topics > li.parent, #sidebar ul.topics > li.active { + background: var(--MENU-SECTIONS-ACTIVE-BG-color); +} +#sidebar .searchbox * { + color: var(--MENU-SEARCH-BOX-ICONS-color); +} + +#sidebar a { + color: var(--MENU-SECTIONS-LINK-color); +} + +#sidebar a:hover { + color: var(--MENU-SECTIONS-LINK-HOVER-color); +} + +#sidebar ul li.active > a { + background: var(--MENU-SECTION-ACTIVE-CATEGORY-BG-color); + color: var(--MENU-SECTION-ACTIVE-CATEGORY-color) !important; +} + +#sidebar hr { + border-color: var(--MENU-SECTION-HR-color); +} + +#body .tags a.tag-link { + background-color: var(--MENU-HEADER-BG-color); +} + +#body .tags a.tag-link:before { + border-right-color: var(--MENU-HEADER-BG-color); +} + +#homelinks { + background: var(--MENU-HEADER-BG-color); + background-color: var(--MENU-HEADER-BORDER-color); + border-bottom-color: var(--MENU-HEADER-BORDER-color); +} + +#homelinks a { + color: var(--MENU-HOME-LINK-color); +} + +#homelinks a:hover { + color: var(--MENU-HOME-LINK-HOVERED-color); +} \ No newline at end of file diff --git a/css/theme-red.css b/css/theme-red.css new file mode 100644 index 0000000000..c5f2674243 --- /dev/null +++ b/css/theme-red.css @@ -0,0 +1,128 @@ + +:root{ + + --MAIN-TEXT-color:#323232; /* Color of text by default */ + --MAIN-TITLES-TEXT-color: #5e5e5e; /* Color of titles h2-h3-h4-h5 */ + --MAIN-LINK-color:#f31c1c; /* Color of links */ + --MAIN-LINK-HOVER-color:#d01616; /* Color of hovered links */ + --MAIN-ANCHOR-color: #f31c1c; /* color of anchors on titles */ + + --MENU-HOME-LINK-color: #ccc; /* Color of the home button text */ + --MENU-HOME-LINK-HOVER-color: #e6e6e6; /* Color of the hovered home button text */ + + --MENU-HEADER-BG-color:#dc1010; /* Background color of menu header */ + --MENU-HEADER-BORDER-color:#e23131; /*Color of menu header border */ + + --MENU-SEARCH-BG-color:#b90000; /* Search field background color (by default borders + icons) */ + --MENU-SEARCH-BOX-color: #ef2020; /* Override search field border color */ + --MENU-SEARCH-BOX-ICONS-color: #fda1a1; /* Override search field icons color */ + + --MENU-SECTIONS-ACTIVE-BG-color:#2b2020; /* Background color of the active section and its childs */ + --MENU-SECTIONS-BG-color:#312525; /* Background color of other sections */ + --MENU-SECTIONS-LINK-color: #ccc; /* Color of links in menu */ + --MENU-SECTIONS-LINK-HOVER-color: #e6e6e6; /* Color of links in menu, when hovered */ + --MENU-SECTION-ACTIVE-CATEGORY-color: #777; /* Color of active category text */ + --MENU-SECTION-ACTIVE-CATEGORY-BG-color: #fff; /* Color of background for the active category (only) */ + + --MENU-VISITED-color: #ff3333; /* Color of 'page visited' icons in menu */ + --MENU-SECTION-HR-color: #2b2020; /* Color of
separator in menu */ + +} + +body { + color: var(--MAIN-TEXT-color) !important; +} + +textarea:focus, input[type="email"]:focus, input[type="number"]:focus, input[type="password"]:focus, input[type="search"]:focus, input[type="tel"]:focus, input[type="text"]:focus, input[type="url"]:focus, input[type="color"]:focus, input[type="date"]:focus, input[type="datetime"]:focus, input[type="datetime-local"]:focus, input[type="month"]:focus, input[type="time"]:focus, input[type="week"]:focus, select[multiple=multiple]:focus { + border-color: none; + box-shadow: none; +} + +h2, h3, h4, h5 { + color: var(--MAIN-TITLES-TEXT-color) !important; +} + +a { + color: var(--MAIN-LINK-color); +} + +.anchor { + color: var(--MAIN-ANCHOR-color); +} + +a:hover { + color: var(--MAIN-LINK-HOVER-color); +} + +#sidebar ul li.visited > a .read-icon { + color: var(--MENU-VISITED-color); +} + +#body a.highlight:after { + display: block; + content: ""; + height: 1px; + width: 0%; + -webkit-transition: width 0.5s ease; + -moz-transition: width 0.5s ease; + -ms-transition: width 0.5s ease; + transition: width 0.5s ease; + background-color: var(--MAIN-LINK-HOVER-color); +} +#sidebar { + background-color: var(--MENU-SECTIONS-BG-color); +} +#sidebar #header-wrapper { + background: var(--MENU-HEADER-BG-color); + color: var(--MENU-SEARCH-BOX-color); + border-color: var(--MENU-HEADER-BORDER-color); +} +#sidebar .searchbox { + border-color: var(--MENU-SEARCH-BOX-color); + background: var(--MENU-SEARCH-BG-color); +} +#sidebar ul.topics > li.parent, #sidebar ul.topics > li.active { + background: var(--MENU-SECTIONS-ACTIVE-BG-color); +} +#sidebar .searchbox * { + color: var(--MENU-SEARCH-BOX-ICONS-color); +} + +#sidebar a { + color: var(--MENU-SECTIONS-LINK-color); +} + +#sidebar a:hover { + color: var(--MENU-SECTIONS-LINK-HOVER-color); +} + +#sidebar ul li.active > a { + background: var(--MENU-SECTION-ACTIVE-CATEGORY-BG-color); + color: var(--MENU-SECTION-ACTIVE-CATEGORY-color) !important; +} + +#sidebar hr { + border-color: var(--MENU-SECTION-HR-color); +} + +#body .tags a.tag-link { + background-color: var(--MENU-HEADER-BG-color); +} + +#body .tags a.tag-link:before { + border-right-color: var(--MENU-HEADER-BG-color); +} + +#homelinks { + background: var(--MENU-HEADER-BG-color); + background-color: var(--MENU-HEADER-BORDER-color); + border-bottom-color: var(--MENU-HEADER-BORDER-color); +} + +#homelinks a { + color: var(--MENU-HOME-LINK-color); +} + +#homelinks a:hover { + color: var(--MENU-HOME-LINK-HOVERED-color); +} \ No newline at end of file diff --git a/css/theme.css b/css/theme.css new file mode 100644 index 0000000000..a6c291d795 --- /dev/null +++ b/css/theme.css @@ -0,0 +1,1148 @@ +@charset "UTF-8"; + +/* Tags */ +@import "tags.css"; + +#top-github-link, #body #breadcrumbs { + position: relative; + top: 50%; + -webkit-transform: translateY(-50%); + -moz-transform: translateY(-50%); + -o-transform: translateY(-50%); + -ms-transform: translateY(-50%); + transform: translateY(-50%); +} +.button, .button-secondary { + display: inline-block; + padding: 7px 12px; +} +.button:active, .button-secondary:active { + margin: 2px 0 -2px 0; +} +@font-face { + font-family: 'Novacento Sans Wide'; + src: url("../fonts/Novecentosanswide-UltraLight-webfont.eot"); + src: url("../fonts/Novecentosanswide-UltraLight-webfont.eot?#iefix") format("embedded-opentype"), url("../fonts/Novecentosanswide-UltraLight-webfont.woff2") format("woff2"), url("../fonts/Novecentosanswide-UltraLight-webfont.woff") format("woff"), url("../fonts/Novecentosanswide-UltraLight-webfont.ttf") format("truetype"), url("../fonts/Novecentosanswide-UltraLight-webfont.svg#novecento_sans_wideultralight") format("svg"); + font-style: normal; + font-weight: 200; +} +@font-face { + font-family: 'Work Sans'; + font-style: normal; + font-weight: 300; + src: url("../fonts/Work_Sans_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Work_Sans_300.woff") format("woff"), url("../fonts/Work_Sans_300.woff2") format("woff2"), url("../fonts/Work_Sans_300.svg#WorkSans") format("svg"), url("../fonts/Work_Sans_300.ttf") format("truetype"); +} +@font-face { + font-family: 'Work Sans'; + font-style: normal; + font-weight: 500; + src: url("../fonts/Work_Sans_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Work_Sans_500.woff") format("woff"), url("../fonts/Work_Sans_500.woff2") format("woff2"), url("../fonts/Work_Sans_500.svg#WorkSans") format("svg"), url("../fonts/Work_Sans_500.ttf") format("truetype"); +} +body { + background: #fff; + color: #777; +} +body #chapter h1 { + font-size: 3.5rem; +} +@media only all and (min-width: 48em) and (max-width: 59.938em) { + body #chapter h1 { + font-size: 3rem; + } +} +@media only all and (max-width: 47.938em) { + body #chapter h1 { + font-size: 2rem; + } +} +a { + color: #00bdf3; +} +a:hover { + color: #0082a7; +} +pre { + position: relative; + color: #ffffff; +} +.bg { + background: #fff; + border: 1px solid #eaeaea; +} +b, strong, label, th { + font-weight: 600; +} +.default-animation, #header #logo-svg, #header #logo-svg path, #sidebar, #sidebar ul, #body, #body .padding, #body .nav { + -webkit-transition: all 0.5s ease; + -moz-transition: all 0.5s ease; + transition: all 0.5s ease; +} +#grav-logo { + max-width: 60%; +} +#grav-logo path { + fill: #fff !important; +} +#sidebar { + font-weight: 300 !important; +} +fieldset { + border: 1px solid #ddd; +} +textarea, input[type="email"], input[type="number"], input[type="password"], input[type="search"], input[type="tel"], input[type="text"], input[type="url"], input[type="color"], input[type="date"], input[type="datetime"], input[type="datetime-local"], input[type="month"], input[type="time"], input[type="week"], select[multiple=multiple] { + background-color: white; + border: 1px solid #ddd; + box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.06); +} +textarea:hover, input[type="email"]:hover, input[type="number"]:hover, input[type="password"]:hover, input[type="search"]:hover, input[type="tel"]:hover, input[type="text"]:hover, input[type="url"]:hover, input[type="color"]:hover, input[type="date"]:hover, input[type="datetime"]:hover, input[type="datetime-local"]:hover, input[type="month"]:hover, input[type="time"]:hover, input[type="week"]:hover, select[multiple=multiple]:hover { + border-color: #c4c4c4; +} +textarea:focus, input[type="email"]:focus, input[type="number"]:focus, input[type="password"]:focus, input[type="search"]:focus, input[type="tel"]:focus, input[type="text"]:focus, input[type="url"]:focus, input[type="color"]:focus, input[type="date"]:focus, input[type="datetime"]:focus, input[type="datetime-local"]:focus, input[type="month"]:focus, input[type="time"]:focus, input[type="week"]:focus, select[multiple=multiple]:focus { + border-color: #00bdf3; + box-shadow: inset 0 1px 3px rgba(0,0,0,.06),0 0 5px rgba(0,169,218,.7) +} +#header-wrapper { + background: #8451a1; + color: #fff; + text-align: center; + border-bottom: 4px solid #9c6fb6; + padding: 1rem; +} +#header a { + display: inline-block; +} +#logo { + color: white !important; + font-size: 1.1em; + float: left; +} +#header #logo-svg { + width: 8rem; + height: 2rem; +} +#header #logo-svg path { + fill: #fff; +} +.searchbox { + margin-top: 1rem; + position: relative; + border: 1px solid #915eae; + background: #764890; + border-radius: 4px; +} +.searchbox label { + color: rgba(255, 255, 255, 0.8); + position: absolute; + left: 10px; + top: 3px; +} +.searchbox span { + color: rgba(255, 255, 255, 0.6); + position: absolute; + right: 10px; + top: 3px; + cursor: pointer; +} +.searchbox span:hover { + color: rgba(255, 255, 255, 0.9); +} +.searchbox input { + display: inline-block; + color: #fff; + width: 100%; + height: 30px; + background: transparent; + border: 0; + padding: 0 25px 0 30px; + margin: 0; + font-weight: 300; +} +.searchbox input::-webkit-input-placeholder { + color: rgba(255, 255, 255, 0.6); +} +.searchbox input::-moz-placeholder { + color: rgba(255, 255, 255, 0.6); +} +.searchbox input:-moz-placeholder { + color: rgba(255, 255, 255, 0.6); +} +.searchbox input:-ms-input-placeholder { + color: rgba(255, 255, 255, 0.6); +} +#sidebar-toggle-span { + display: none; +} +@media only all and (max-width: 47.938em) { + #sidebar-toggle-span { + display: inline; + } +} +#sidebar { + background-color: #322A38; + position: fixed; + top: 0; + width: 300px; + bottom: 0; + left: 0; + font-weight: 400; + font-size: 15px; +} +#sidebar a { + color: #ccc; +} +#sidebar a:hover { + color: #e6e6e6; +} +#sidebar a.subtitle { + color: rgba(204, 204, 204, 0.6); +} +#sidebar hr { + border-bottom: 1px solid #2a232f; +} +#sidebar a.padding { + padding: 0 1rem; +} +#sidebar h5 { + margin: 2rem 0 0; + position: relative; + line-height: 2; +} +#sidebar h5 a { + display: block; + margin-left: 0; + margin-right: 0; + padding-left: 1rem; + padding-right: 1rem; +} +#sidebar h5 i { + color: rgba(204, 204, 204, 0.6); + position: absolute; + right: 0.6rem; + top: 0.7rem; + font-size: 80%; +} +#sidebar h5.parent a { + background: #201b24; + color: #d9d9d9 !important; +} +#sidebar h5.active a { + background: #fff; + color: #777 !important; +} +#sidebar h5.active i { + color: #777 !important; +} +#sidebar h5 + ul.topics { + display: none; + margin-top: 0; +} +#sidebar h5.parent + ul.topics, #sidebar h5.active + ul.topics { + display: block; +} +#sidebar ul { + list-style: none; + padding: 0; + margin: 0; +} +#sidebar ul.searched a { + color: #999999; +} +#sidebar ul.searched .search-match a { + color: #e6e6e6; +} +#sidebar ul.searched .search-match a:hover { + color: white; +} +#sidebar ul.topics { + margin: 0 1rem; +} +#sidebar ul.topics.searched ul { + display: block; +} +#sidebar ul.topics ul { + display: none; + padding-bottom: 1rem; +} +#sidebar ul.topics ul ul { + padding-bottom: 0; +} +#sidebar ul.topics li.parent ul, #sidebar ul.topics > li.active ul { + display: block; +} +#sidebar ul.topics > li > a { + line-height: 2rem; + font-size: 1.1rem; +} +#sidebar ul.topics > li > a b { + opacity: 0.5; + font-weight: normal; +} +#sidebar ul.topics > li > a .fa { + margin-top: 9px; +} +#sidebar ul.topics > li.parent, #sidebar ul.topics > li.active { + background: #251f29; + margin-left: -1rem; + margin-right: -1rem; + padding-left: 1rem; + padding-right: 1rem; +} +#sidebar ul li.active > a { + background: #fff; + color: #777 !important; + margin-left: -1rem; + margin-right: -1rem; + padding-left: 1rem; + padding-right: 1rem; +} +#sidebar ul li { + padding: 0; +} +#sidebar ul li.visited + span { + margin-right: 16px; +} +#sidebar ul li a { + display: block; + padding: 2px 0; +} +#sidebar ul li a span { + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + display: block; +} +#sidebar ul li > a { + padding: 4px 0; +} +#sidebar ul li.visited > a .read-icon { + color: #9c6fb6; + display: inline; +} +#sidebar ul li li { + padding-left: 1rem; + text-indent: 0.2rem; +} +#main { + background: #f7f7f7; + margin: 0 0 1.563rem 0; +} +#body { + position: relative; + margin-left: 300px; + min-height: 100%; +} +#body img, #body .video-container { + margin: 3rem auto; + display: block; + text-align: center; +} +#body img.border, #body .video-container.border { + border: 2px solid #e6e6e6 !important; + padding: 2px; +} +#body img.shadow, #body .video-container.shadow { + box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1); +} +#body img.inline { + display: inline !important; + margin: 0 !important; + vertical-align: bottom; +} +#body .bordered { + border: 1px solid #ccc; +} +#body .padding { + padding: 3rem 6rem; +} +@media only all and (max-width: 59.938em) { + #body .padding { + position: static; + padding: 15px 3rem; + } +} +@media only all and (max-width: 47.938em) { + #body .padding { + padding: 5px 1rem; + } +} +#body h1 + hr { + margin-top: -1.7rem; + margin-bottom: 3rem; +} +@media only all and (max-width: 59.938em) { + #body #navigation { + position: static; + margin-right: 0 !important; + width: 100%; + display: table; + } +} +#body .nav { + position: fixed; + top: 0; + bottom: 0; + width: 4rem; + font-size: 50px; + height: 100%; + cursor: pointer; + display: table; + text-align: center; +} +#body .nav > i { + display: table-cell; + vertical-align: middle; + text-align: center; +} +@media only all and (max-width: 59.938em) { + #body .nav { + display: table-cell; + position: static; + top: auto; + width: 50%; + text-align: center; + height: 100px; + line-height: 100px; + padding-top: 0; + } + #body .nav > i { + display: inline-block; + } +} +#body .nav:hover { + background: #F6F6F6; +} +#body .nav.nav-pref { + left: 0; +} +#body .nav.nav-next { + right: 0; +} +#body-inner { + margin-bottom: 5rem; +} +#chapter { + display: flex; + align-items: center; + justify-content: center; + height: 100%; + padding: 2rem 0; +} +#chapter #body-inner { + padding-bottom: 3rem; + max-width: 80%; +} +#chapter h3 { + font-family: "Work Sans", "Helvetica", "Tahoma", "Geneva", "Arial", sans-serif; + font-weight: 300; + text-align: center; +} +#chapter h1 { + font-size: 5rem; + border-bottom: 4px solid #F0F2F4; +} +#chapter p { + text-align: center; + font-size: 1.2rem; +} +#footer { + padding: 3rem 1rem; + color: #b3b3b3; + font-size: 13px; +} +#footer p { + margin: 0; +} +body { + font-family: "Work Sans", "Helvetica", "Tahoma", "Geneva", "Arial", sans-serif; + font-weight: 300; + line-height: 1.6; + font-size: 18px !important; +} +h2, h3, h4, h5, h6 { + font-family: "Work Sans", "Helvetica", "Tahoma", "Geneva", "Arial", sans-serif; + text-rendering: optimizeLegibility; + color: #5e5e5e; + font-weight: 400; + letter-spacing: -1px; +} +h1 { + font-family: "Novacento Sans Wide", "Helvetica", "Tahoma", "Geneva", "Arial", sans-serif; + text-align: center; + text-transform: uppercase; + color: #222; + font-weight: 200; +} +blockquote { + border-left: 10px solid #F0F2F4; +} +blockquote p { + font-size: 1.1rem; + color: #999; +} +blockquote cite { + display: block; + text-align: right; + color: #666; + font-size: 1.2rem; +} +div.notices { + margin: 2rem 0; + position: relative; +} +div.notices p { + padding: 15px; + display: block; + font-size: 1rem; + margin-top: 0rem; + margin-bottom: 0rem; + color: #666; +} +div.notices p:first-child:before { + position: absolute; + top: 2px; + color: #fff; + font-family: "Font Awesome 5 Free"; + font-weight: 900; + content: "\f06a"; + left: 10px; +} +div.notices p:first-child:after { + position: absolute; + top: 2px; + color: #fff; + left: 2rem; +} +div.notices.info p { + border-top: 30px solid #F0B37E; + background: #FFF2DB; +} +div.notices.info p:first-child:after { + content: 'Info'; +} +div.notices.warning p { + border-top: 30px solid rgba(217, 83, 79, 0.8); + background: #FAE2E2; +} +div.notices.warning p:first-child:after { + content: 'Warning'; +} +div.notices.note p { + border-top: 30px solid #6AB0DE; + background: #E7F2FA; +} +div.notices.note p:first-child:after { + content: 'Note'; +} +div.notices.tip p { + border-top: 30px solid rgba(92, 184, 92, 0.8); + background: #E6F9E6; +} +div.notices.tip p:first-child:after { + content: 'Tip'; +} + +/* attachments shortcode */ + +section.attachments { + margin: 2rem 0; + position: relative; +} + +section.attachments label { + font-weight: 400; + padding-left: 0.5em; + padding-top: 0.2em; + padding-bottom: 0.2em; + margin: 0; +} + +section.attachments .attachments-files { + padding: 15px; + display: block; + font-size: 1rem; + margin-top: 0rem; + margin-bottom: 0rem; + color: #666; +} + +section.attachments.orange label { + color: #fff; + background: #F0B37E; +} + +section.attachments.orange .attachments-files { + background: #FFF2DB; +} + +section.attachments.green label { + color: #fff; + background: rgba(92, 184, 92, 0.8); +} + +section.attachments.green .attachments-files { + background: #E6F9E6; +} + +section.attachments.blue label { + color: #fff; + background: #6AB0DE; +} + +section.attachments.blue .attachments-files { + background: #E7F2FA; +} + +section.attachments.grey label { + color: #fff; + background: #505d65; +} + +section.attachments.grey .attachments-files { + background: #f4f4f4; +} + +/* Children shortcode */ + +/* Children shortcode */ +.children p { + font-size: small; + margin-top: 0px; + padding-top: 0px; + margin-bottom: 0px; + padding-bottom: 0px; +} +.children-li p { + font-size: small; + font-style: italic; + +} +.children-h2 p, .children-h3 p { + font-size: small; + margin-top: 0px; + padding-top: 0px; + margin-bottom: 0px; + padding-bottom: 0px; +} +.children h3,.children h2 { + margin-bottom: 0px; + margin-top: 5px; +} + +code, kbd, pre, samp { + font-family: "Consolas", menlo, monospace; + font-size: 92%; +} +code { + border-radius: 2px; + white-space: nowrap; + color: #5e5e5e; + background: #FFF7DD; + border: 1px solid #fbf0cb; + padding: 0px 2px; +} +code + .copy-to-clipboard { + margin-left: -1px; + border-left: 0 !important; + font-size: inherit !important; + vertical-align: middle; + height: 21px; + top: 0; +} +pre { + padding: 1rem; + margin: 2rem 0; + background: #282c34; + border: 0; + border-radius: 2px; + line-height: 1.15; +} +pre code { + color: whitesmoke; + background: inherit; + white-space: inherit; + border: 0; + padding: 0; + margin: 0; + font-size: 15px; +} +hr { + border-bottom: 4px solid #F0F2F4; +} +.page-title { + margin-top: -25px; + padding: 25px; + float: left; + clear: both; + background: #9c6fb6; + color: #fff; +} +#body a.anchor-link { + color: #ccc; +} +#body a.anchor-link:hover { + color: #9c6fb6; +} +#body-inner .tabs-wrapper.ui-theme-badges { + background: #1d1f21; +} +#body-inner .tabs-wrapper.ui-theme-badges .tabs-nav li { + font-size: 0.9rem; + text-transform: uppercase; +} +#body-inner .tabs-wrapper.ui-theme-badges .tabs-nav li a { + background: #35393c; +} +#body-inner .tabs-wrapper.ui-theme-badges .tabs-nav li.current a { + background: #4d5257; +} +#body-inner pre { + white-space: pre-wrap; +} +.tabs-wrapper pre { + margin: 1rem 0; + border: 0; + padding: 0; + background: inherit; +} +table { + border: 1px solid #eaeaea; + table-layout: auto; +} +th { + background: #f7f7f7; + padding: 0.5rem; +} +td { + padding: 0.5rem; + border: 1px solid #eaeaea; +} +.button { + background: #9c6fb6; + color: #fff; + box-shadow: 0 3px 0 #00a5d4; +} +.button:hover { + background: #00a5d4; + box-shadow: 0 3px 0 #008db6; + color: #fff; +} +.button:active { + box-shadow: 0 1px 0 #008db6; +} +.button-secondary { + background: #F8B450; + color: #fff; + box-shadow: 0 3px 0 #f7a733; +} +.button-secondary:hover { + background: #f7a733; + box-shadow: 0 3px 0 #f69b15; + color: #fff; +} +.button-secondary:active { + box-shadow: 0 1px 0 #f69b15; +} +.bullets { + margin: 1.7rem 0; + margin-left: -0.85rem; + margin-right: -0.85rem; + overflow: auto; +} +.bullet { + float: left; + padding: 0 0.85rem; +} +.two-column-bullet { + width: 50%; +} +@media only all and (max-width: 47.938em) { + .two-column-bullet { + width: 100%; + } +} +.three-column-bullet { + width: 33.33333%; +} +@media only all and (max-width: 47.938em) { + .three-column-bullet { + width: 100%; + } +} +.four-column-bullet { + width: 25%; +} +@media only all and (max-width: 47.938em) { + .four-column-bullet { + width: 100%; + } +} +.bullet-icon { + float: left; + background: #9c6fb6; + padding: 0.875rem; + width: 3.5rem; + height: 3.5rem; + border-radius: 50%; + color: #fff; + font-size: 1.75rem; + text-align: center; +} +.bullet-icon-1 { + background: #9c6fb6; +} +.bullet-icon-2 { + background: #00f3d8; +} +.bullet-icon-3 { + background: #e6f300; +} +.bullet-content { + margin-left: 4.55rem; +} +.tooltipped { + position: relative; +} +.tooltipped:after { + position: absolute; + z-index: 1000000; + display: none; + padding: 5px 8px; + font: normal normal 11px/1.5 "Work Sans", "Helvetica", "Tahoma", "Geneva", "Arial", sans-serif; + color: #fff; + text-align: center; + text-decoration: none; + text-shadow: none; + text-transform: none; + letter-spacing: normal; + word-wrap: break-word; + white-space: pre; + pointer-events: none; + content: attr(aria-label); + background: rgba(0, 0, 0, 0.8); + border-radius: 3px; + -webkit-font-smoothing: subpixel-antialiased; +} +.tooltipped:before { + position: absolute; + z-index: 1000001; + display: none; + width: 0; + height: 0; + color: rgba(0, 0, 0, 0.8); + pointer-events: none; + content: ""; + border: 5px solid transparent; +} +.tooltipped:hover:before, .tooltipped:hover:after, .tooltipped:active:before, .tooltipped:active:after, .tooltipped:focus:before, .tooltipped:focus:after { + display: inline-block; + text-decoration: none; +} +.tooltipped-s:after, .tooltipped-se:after, .tooltipped-sw:after { + top: 100%; + right: 50%; + margin-top: 5px; +} +.tooltipped-s:before, .tooltipped-se:before, .tooltipped-sw:before { + top: auto; + right: 50%; + bottom: -5px; + margin-right: -5px; + border-bottom-color: rgba(0, 0, 0, 0.8); +} +.tooltipped-se:after { + right: auto; + left: 50%; + margin-left: -15px; +} +.tooltipped-sw:after { + margin-right: -15px; +} +.tooltipped-n:after, .tooltipped-ne:after, .tooltipped-nw:after { + right: 50%; + bottom: 100%; + margin-bottom: 5px; +} +.tooltipped-n:before, .tooltipped-ne:before, .tooltipped-nw:before { + top: -5px; + right: 50%; + bottom: auto; + margin-right: -5px; + border-top-color: rgba(0, 0, 0, 0.8); +} +.tooltipped-ne:after { + right: auto; + left: 50%; + margin-left: -15px; +} +.tooltipped-nw:after { + margin-right: -15px; +} +.tooltipped-s:after, .tooltipped-n:after { + transform: translateX(50%); +} +.tooltipped-w:after { + right: 100%; + bottom: 50%; + margin-right: 5px; + transform: translateY(50%); +} +.tooltipped-w:before { + top: 50%; + bottom: 50%; + left: -5px; + margin-top: -5px; + border-left-color: rgba(0, 0, 0, 0.8); +} +.tooltipped-e:after { + bottom: 50%; + left: 100%; + margin-left: 5px; + transform: translateY(50%); +} +.tooltipped-e:before { + top: 50%; + right: -5px; + bottom: 50%; + margin-top: -5px; + border-right-color: rgba(0, 0, 0, 0.8); +} +.highlightable { + padding: 1rem 0 1rem; + overflow: auto; + position: relative; +} +.hljs::selection, .hljs span::selection { + background: #b7b7b7; +} +.lightbox-active #body { + overflow: visible; +} +.lightbox-active #body .padding { + overflow: visible; +} +#github-contrib i { + vertical-align: middle; +} +.featherlight img { + margin: 0 !important; +} +.lifecycle #body-inner ul { + list-style: none; + margin: 0; + padding: 2rem 0 0; + position: relative; +} +.lifecycle #body-inner ol { + margin: 1rem 0 1rem 0; + padding: 2rem; + position: relative; +} +.lifecycle #body-inner ol li { + margin-left: 1rem; +} +.lifecycle #body-inner ol strong, .lifecycle #body-inner ol label, .lifecycle #body-inner ol th { + text-decoration: underline; +} +.lifecycle #body-inner ol ol { + margin-left: -1rem; +} +.lifecycle #body-inner h3[class*='level'] { + font-size: 20px; + position: absolute; + margin: 0; + padding: 4px 10px; + right: 0; + z-index: 1000; + color: #fff; + background: #1ABC9C; +} +.lifecycle #body-inner ol h3 { + margin-top: 1rem !important; + right: 2rem !important; +} +.lifecycle #body-inner .level-1 + ol { + background: #f6fefc; + border: 4px solid #1ABC9C; + color: #16A085; +} +.lifecycle #body-inner .level-1 + ol h3 { + background: #2ECC71; +} +.lifecycle #body-inner .level-2 + ol { + background: #f7fdf9; + border: 4px solid #2ECC71; + color: #27AE60; +} +.lifecycle #body-inner .level-2 + ol h3 { + background: #3498DB; +} +.lifecycle #body-inner .level-3 + ol { + background: #f3f9fd; + border: 4px solid #3498DB; + color: #2980B9; +} +.lifecycle #body-inner .level-3 + ol h3 { + background: #34495E; +} +.lifecycle #body-inner .level-4 + ol { + background: #e4eaf0; + border: 4px solid #34495E; + color: #2C3E50; +} +.lifecycle #body-inner .level-4 + ol h3 { + background: #34495E; +} +#top-bar { + background: #F6F6F6; + border-radius: 2px; + padding: 0 1rem; + height: 0; + min-height: 3rem; +} +#top-github-link { + position: relative; + z-index: 1; + float: right; + display: block; +} +#body #breadcrumbs { + height: auto; + margin-bottom: 0; + padding-left: 0; + line-height: 1.4; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; + width: 70%; + display: inline-block; + float: left; +} +#body #breadcrumbs span { + padding: 0 0.1rem; +} +@media only all and (max-width: 59.938em) { + #sidebar { + width: 230px; + } + #body { + margin-left: 230px; + } +} +@media only all and (max-width: 47.938em) { + #sidebar { + width: 230px; + left: -230px; + } + #body { + margin-left: 0; + width: 100%; + } + .sidebar-hidden { + overflow: hidden; + } + .sidebar-hidden #sidebar { + left: 0; + } + .sidebar-hidden #body { + margin-left: 230px; + overflow: hidden; + } + .sidebar-hidden #overlay { + position: absolute; + left: 0; + right: 0; + top: 0; + bottom: 0; + z-index: 10; + background: rgba(255, 255, 255, 0.5); + cursor: pointer; + } +} +.copy-to-clipboard { + background-image: url(../images/clippy.svg); + background-position: 50% 50%; + background-size: 16px 16px; + background-repeat: no-repeat; + width: 27px; + height: 1.45rem; + top: -1px; + display: inline-block; + vertical-align: middle; + position: relative; + color: #5e5e5e; + background-color: #FFF7DD; + margin-left: -.2rem; + cursor: pointer; + border-radius: 0 2px 2px 0; + margin-bottom: 1px; +} +.copy-to-clipboard:hover { + background-color: #E8E2CD; +} +pre .copy-to-clipboard { + position: absolute; + right: 4px; + top: 4px; + background-color: #C1C4C6; + color: #ccc; + border-radius: 2px; +} +pre .copy-to-clipboard:hover { + background-color: #00bdf3; + color: #fff; +} +.parent-element { + -webkit-transform-style: preserve-3d; + -moz-transform-style: preserve-3d; + transform-style: preserve-3d; +} + +#sidebar ul.topics > li > a .read-icon { + margin-top: 9px; +} + +#sidebar ul { + list-style: none; + padding: 0; + margin: 0; +} + +#sidebar #shortcuts li { + padding: 2px 0; + list-style: none; +} + +#sidebar ul li .read-icon { + display: none; + float: right; + font-size: 13px; + min-width: 16px; + margin: 4px 0 0 0; + text-align: right; +} +#sidebar ul li.visited > a .read-icon { + color: #00bdf3; + display: inline; +} + +#sidebar #shortcuts h3 { + font-family: "Novacento Sans Wide", "Helvetica", "Tahoma", "Geneva", "Arial", sans-serif; + color: white ; + margin-top:1rem; + padding-left: 1rem; +} + +#homelinks { + background-color: #9c6fb6; + color: #fff; + padding: 7px 0; + border-bottom: 4px solid #9c6fb6; +} + +#searchResults { + text-align: left; +} + +option { + color: initial; +} diff --git a/en-21/404.html b/en-21/404.html new file mode 100644 index 0000000000..a2c8dae11f --- /dev/null +++ b/en-21/404.html @@ -0,0 +1,59 @@ + + + + + + + + + 404 Page not found + + + + + + + + + + + + + + + + + + +
+
+
+
+

+

+

+

+

+

+

Page not found!

+
+
+ +
+ + + diff --git a/en-21/categories/index.html b/en-21/categories/index.html new file mode 100644 index 0000000000..83f95aa950 --- /dev/null +++ b/en-21/categories/index.html @@ -0,0 +1,560 @@ + + + + + + + + + + + + Categories :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + category :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-21/categories/index.xml b/en-21/categories/index.xml new file mode 100644 index 0000000000..79a6350dc9 --- /dev/null +++ b/en-21/categories/index.xml @@ -0,0 +1,11 @@ + + + + Categories on System Admin Toolkit (SAT) + /docs-sat/en-21/categories/ + Recent content in Categories on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-21 + + + diff --git a/en-21/dashboards/index.html b/en-21/dashboards/index.html new file mode 100644 index 0000000000..7806c092ce --- /dev/null +++ b/en-21/dashboards/index.html @@ -0,0 +1,542 @@ + + + + + + + + + + + + SAT Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Dashboards

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-21/dashboards/index.xml b/en-21/dashboards/index.xml new file mode 100644 index 0000000000..fdee3dd749 --- /dev/null +++ b/en-21/dashboards/index.xml @@ -0,0 +1,26 @@ + + + + SAT Dashboards on System Admin Toolkit (SAT) + /docs-sat/en-21/dashboards/ + Recent content in SAT Dashboards on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-21 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Grafana Dashboards + /docs-sat/en-21/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-21/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.<system_name>.<system_domain> For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the SMA product documentation. For more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry Kafka Topics in the SMA product documentation. + + + SAT Kibana Dashboards + /docs-sat/en-21/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-21/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + diff --git a/en-21/dashboards/sat_grafana_dashboards/index.html b/en-21/dashboards/sat_grafana_dashboards/index.html new file mode 100644 index 0000000000..53692266dc --- /dev/null +++ b/en-21/dashboards/sat_grafana_dashboards/index.html @@ -0,0 +1,641 @@ + + + + + + + + + + + + SAT Grafana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Grafana Dashboards

+

The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.

+

Grafana can be accessed via web browser at the following URL:

+
    +
  • https://sma-grafana.<system_name>.<system_domain>
  • +
+

For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the +SMA product documentation.

+

For more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry +Kafka Topics in the SMA product documentation.

+ +

There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Dashboard NameDisplay Type
Fabric CongestionChart Panels
Fabric RFC3635Chart Panels
Fabric ErrorsTabular Format
Fabric Port StateTabular Format
+

The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.

+

SAT Grafana Interval and Locations Options

+

Shows the Interval and Locations Options for the available telemetry.

+

+

The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.

+

For additional information, refer to Grafana Templates and Variables.

+

The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.

+

The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.

+

Grafana Fabric Congestion Dashboard

+

+

SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.

+

This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.

+

The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.

+

The port type of a link is reported as a port state “subtype” event when defined at port initialization.

+

Grafana Fabric Errors Dashboard

+

+

This dashboard reports error counters in a tabular format in three panels.

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.

+

Grafana Fabric Port State Dashboard

+

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.

+

The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.

+

Grafana Fabric RFC3635 Dashboard

+

+

For additional information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.

+

Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-21/dashboards/sat_kibana_dashboards/index.html b/en-21/dashboards/sat_kibana_dashboards/index.html new file mode 100644 index 0000000000..c038d205d6 --- /dev/null +++ b/en-21/dashboards/sat_kibana_dashboards/index.html @@ -0,0 +1,830 @@ + + + + + + + + + + + + SAT Kibana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Kibana Dashboards

+

Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.

+

Kibana can be accessed via web browser at the following URL:

+
    +
  • https://sma-kibana.<system_name>.<system_domain>
  • +
+

For additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product +documentation.

+

Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this +table.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DashboardShort DescriptionLong DescriptionKibana Visualization and Search Name
sat-aerAER correctedCorrected Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-corrected Search: sat-aer-corrected
sat-aerAER fatalFatal Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-fatal Search: sat-aer-fatal
sat-atomATOM failuresApplication Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged.sat-atom-failed
sat-atomATOM admindownApplication Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch.sat-atom-admindown
sat-heartbeatHeartbeat loss eventsHeartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system.sat-heartbeat
sat-kernelKernel assertionsThe kernel software performs a failed assertion when some condition represents a serious fault. The node goes down.sat-kassertions
sat-kernelKernel panicsThe kernel panics when something is seriously wrong. The node goes down.sat-kernel-panic
sat-kernelLustre bugs (LBUGs)The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down.sat-lbug
sat-kernelCPU stallsCPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric.sat-cpu-stall
sat-kernelOut of memoryAn Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided.sat-oom
sat-mceMCEMachine Check Exceptions (MCE) are errors detected at the processor level.sat-mce
sat-rasdaemonrasdaemon errorsErrors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future.sat-rasdaemon-error
sat-rasdaemonrasdaemon messagesAll messages from the rasdaemon service on nodes.sat-rasdaemon
+

Disable Search Highlighting in Kibana Dashboard

+

By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.

+

The Kibana Dashboard should be open on your system.

+
    +
  1. +

    Navigate to Management

    +
  2. +
  3. +

    Navigate to Advanced Settings in the Kibana section, below the Elastic search section

    +
  4. +
  5. +

    Scroll down to the Discover section

    +
  6. +
  7. +

    Change Highlight results from on to off

    +
  8. +
  9. +

    Click Save to save changes

    +
  10. +
+

AER Kibana Dashboard

+

The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.

+

View the AER Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-aer dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.

    +
  8. +
+

ATOM Kibana Dashboard

+

The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health +checks and application test failures. Some test failures are of possible interest even though a node is not marked +admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide +clues if a node otherwise fails. They might also show application problems.

+

View the ATOM Kibana Dashboard

+

HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-atom dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View any nodes marked admindown and any ATOM test failures. These failures occur during health checks and +application test failures. Test failures marked admindown are important to note. View the matching log messages +in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, +results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

Heartbeat Kibana Dashboard

+

The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods +are responsible for monitoring nodes in the system for heartbeat loss.

+

View the Heartbeat Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-heartbeat dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for +monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.

    +
  8. +
+

Kernel Kibana Dashboard

+

The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.

+

View the Kernel Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-kernel dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.

    +
  8. +
+

MCE Kibana Dashboard

+

The MCE Dashboard displays CPU detected processor-level hardware errors.

+

View the MCE Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-mce dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

Rasdaemon Kibana Dashboard

+

The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon +service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including +PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages +presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one +for only messages of severity “emerg” or “err” and another for all messages from rasdaemon.

+

View the Rasdaemon Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-rasdaemon dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.

    +
  8. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-21/img/Fabric_PortState_Locations_UI.png b/en-21/img/Fabric_PortState_Locations_UI.png new file mode 100644 index 0000000000..704511ebce Binary files /dev/null and b/en-21/img/Fabric_PortState_Locations_UI.png differ diff --git a/en-21/img/Grafana_Fabric_Congestion.png b/en-21/img/Grafana_Fabric_Congestion.png new file mode 100644 index 0000000000..dbf481d94c Binary files /dev/null and b/en-21/img/Grafana_Fabric_Congestion.png differ diff --git a/en-21/img/Grafana_HSN_Errors.png b/en-21/img/Grafana_HSN_Errors.png new file mode 100644 index 0000000000..f43b7d02a6 Binary files /dev/null and b/en-21/img/Grafana_HSN_Errors.png differ diff --git a/en-21/img/Grafana_rfc3635.png b/en-21/img/Grafana_rfc3635.png new file mode 100644 index 0000000000..dff176c82d Binary files /dev/null and b/en-21/img/Grafana_rfc3635.png differ diff --git a/en-21/img/SAT_Grafana_Fabric_Vars.png b/en-21/img/SAT_Grafana_Fabric_Vars.png new file mode 100644 index 0000000000..194d75b124 Binary files /dev/null and b/en-21/img/SAT_Grafana_Fabric_Vars.png differ diff --git a/en-21/index.html b/en-21/index.html new file mode 100644 index 0000000000..84c943bf80 --- /dev/null +++ b/en-21/index.html @@ -0,0 +1,537 @@ + + + + + + + + + + + + HPE Cray EX System Admin Toolkit (SAT) Guide :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ + + + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-21/index.json b/en-21/index.json new file mode 100644 index 0000000000..ba760ac7e0 --- /dev/null +++ b/en-21/index.json @@ -0,0 +1,64 @@ +[ +{ + "uri": "/docs-sat/en-21/", + "title": "HPE Cray EX System Admin Toolkit (SAT) Guide", + "tags": [], + "description": "", + "content": "HPE Cray EX System Admin Toolkit (SAT) Guide Introduction to SAT About System Admin Toolkit (SAT) System Admin Toolkit Command Overview Command Prompt Conventions in SAT SAT Dependencies SAT Installation Install SAT Install the System Admin Toolkit Product Stream Perform NCN Personalization SAT Setup SAT Authentication Generate SAT S3 Credentials Run sat setrev to Set System Information SAT Post-Upgrade Optional: Remove old versions after an upgrade Remove obsolete configuration file sections SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards " +}, +{ + "uri": "/docs-sat/en-21/dashboards/", + "title": "SAT Dashboards", + "tags": [], + "description": "", + "content": "SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards " +}, +{ + "uri": "/docs-sat/en-21/install/", + "title": "SAT Installation", + "tags": [], + "description": "", + "content": "SAT Installation Install the System Admin Toolkit Product Stream Describes how to install the System Admin Toolkit (SAT) product stream.\nPrerequisites CSM is installed and verified. cray-product-catalog is running. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.1.x with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. To upgrade SAT, execute the pre-installation, installation, and post-installation procedures for a newer distribution. The newly installed version will become the default. Pre-Installation Procedure Start a typescript.\nThe typescript will record the commands and the output from this installation.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Installation Procedure Copy the release distribution gzipped tar file to ncn-m001.\nUnzip and extract the release distribution, 2.1.x.\nncn-m001# tar -xvzf sat-2.1.x.tar.gz Change directory to the extracted release distribution directory.\nncn-m001# cd sat-2.1.x Run the installer: install.sh.\nThe script produces a lot of output. The last several lines are included below for reference.\nncn-m001# ./install.sh ... ConfigMap data updates exist; Exiting. + clean-install-deps + for image in \u0026#34;${vendor_images[@]}\u0026#34; + podman rmi -f docker.io/library/cray-nexus-setup:sat-2.1.x-20210804163905-8dbb87d Untagged: docker.io/library/cray-nexus-setup:sat-2.1.x-20210804163905-8dbb87d Deleted: 2c196c0c6364d9a1699d83dc98550880dc491cc3433a015d35f6cab1987dd6da + for image in \u0026#34;${vendor_images[@]}\u0026#34; + podman rmi -f docker.io/library/skopeo:sat-2.1.x-20210804163905-8dbb87d Untagged: docker.io/library/skopeo:sat-2.1.x-20210804163905-8dbb87d Deleted: 1b38b7600f146503e246e753cd9df801e18409a176b3dbb07b0564e6bc27144c Check the return code of the installer. Zero indicates a successful installation.\nncn-m001# echo $? 0 Check the progress of the SAT configuration import Kubernetes job, which is initiated by install.sh.\nIf the \u0026ldquo;Pods Statuses\u0026rdquo; appear as \u0026ldquo;Succeeded\u0026rdquo;, the job has completed successfully. The job usually takes between 30 seconds and 2 minutes.\nncn-m001# kubectl describe job sat-config-import-2.1.x -n services ... Pods Statuses: 0 Running / 1 Succeeded / 0 Failed ... The job\u0026rsquo;s progress may be monitored using kubectl logs. The example below includes the final log lines from a successful configuration import Kubernetes job.\nncn-m001# kubectl logs -f -n services --selector \\ job-name=sat-config-import-2.1.x --all-containers ... ConfigMap update attempt=1 Resting 1s before reading ConfigMap ConfigMap data updates exist; Exiting. 2021-08-04T21:50:10.275886Z info Agent has successfully terminated 2021-08-04T21:50:10.276118Z warning envoy main caught SIGTERM # Completed on Wed Aug 4 21:49:44 2021 The following error may appear in this log, but it can be ignored.\nerror accept tcp [::]:15020: use of closed network connection Post-Installation Procedure Optional: Remove the SAT release distribution tar file and extracted directory.\nncn-m001# rm sat-2.2.x.tar.gz ncn-m001# rm -rf sat-2.2.x/ Upgrade only: Ensure that the environment variable SAT_TAG is not set in the ~/.bashrc file on any of the management NCNs.\nNOTE: This step should only be required when updating from Shasta 1.4.1 or Shasta 1.4.2.\nThe following example assumes three manager NCNs: ncn-m001, ncn-m002, and ncn-m003, and shows output from a system in which no further action is needed.\nncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc ncn-m001: source \u0026lt;(kubectl completion bash) ncn-m003: source \u0026lt;(kubectl completion bash) ncn-m002: source \u0026lt;(kubectl completion bash) The following example shows that SAT_TAG is set in ~/.bashrc on ncn-m002. Remove that line from the ~/.bashrc file on ncn-m002.\nncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc ncn-m001: source \u0026lt;(kubectl completion bash) ncn-m002: source \u0026lt;(kubectl completion bash) ncn-m002: export SAT_TAG=3.5.0 ncn-m003: source \u0026lt;(kubectl completion bash) Stop the typescript.\nNOTE: This step can be skipped if you wish to use the same typescript for the remainder of the SAT install. See Next Steps.\nncn-m001# exit SAT version 2.1.x is now installed/upgraded, meaning the SAT 2.1.x release has been loaded into the system software repository.\nSAT configuration content for this release has been uploaded to VCS. SAT content for this release has been uploaded to the CSM product catalog. SAT content for this release has been uploaded to Nexus repositories. The sat command won\u0026rsquo;t be available until the NCN Personalization procedure has been executed. Next Steps If other HPE Cray EX software products are being installed or upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide to determine which step to execute next.\nIf no other HPE Cray EX software products are being installed or upgraded at this time, proceed to the sections listed below.\nNOTE: The NCN Personalization procedure is required when upgrading SAT. The setup procedures in SAT Setup, however, are not required when upgrading SAT. They should have been executed during the first installation of SAT.\nExecute the NCN Personalization procedure:\nPerform NCN Personalization If performing a fresh install, execute the SAT Setup procedures:\nSAT Authentication Generate SAT S3 Credentials Run Sat Setrev to Set System Information If performing an upgrade, execute the upgrade procedures:\nOptional: Remove old versions after an upgrade Remove obsolete configuration file sections Perform NCN Personalization Describes how to perform NCN personalization using CFS. This personalization process will configure the System Admin Toolkit (SAT) product stream.\nPrerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. Notes on the Procedure Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.1.x with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. If upgrading SAT, the existing configuration will likely include other Cray EX product entries. Update the SAT entry as described in this procedure. The HPE Cray EX System Software Getting Started Guide provides guidance on how and when to update the entries for the other products. Procedure Start a typescript if not already using one.\nThe typescript will capture the commands and the output from this installation procedure.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Get the git commit ID for the branch with a version number matching the version of SAT.\nThis represents a revision of Ansible configuration content stored in VCS.\nGet and store the VCS password (required to access the remote VCS repo).\nncn-m001# VCS_PASS=$(kubectl get secret -n services vcs-user-credentials \\ --template={{.data.vcs_password}} | base64 --decode) In this example, the git commit ID is 82537e59c24dd5607d5f5d6f92cdff971bd9c615, and the version number is 2.1.x.\nncn-m001# git ls-remote \\ https://crayvcs:$VCS_PASS@api-gw-service-nmn.local/vcs/cray/sat-config-management.git \\ refs/heads/cray/sat/* ... 82537e59c24dd5607d5f5d6f92cdff971bd9c615 refs/heads/cray/sat/2.1.x Add a sat layer to the CFS configuration(s) associated with the manager NCNs.\nGet the name(s) of the CFS configuration(s).\nNOTE: Each manager NCN uses a single CFS configuration. An individual CFS configuration may be used by any number of manage NCNs, i.e., three manager NCNs might use one, two, or three CFS configurations.\nIn the following example, all three manager NCNs use the same CFS configuration – ncn-personalization.\nncn-m001:~ # for component in $(cray hsm state components list \\ --role Management --subrole Master --format json | jq -r \\ \u0026#39;.Components | .[].ID\u0026#39;); do cray cfs components describe $component \\ --format json | jq -r \u0026#39;.desiredConfig\u0026#39;; done ncn-personalization ncn-personalization ncn-personalization In the following example, the three manager NCNs all use different configurations, each with a unique name.\nncn-personalization-m001 ncn-personalization-m002 ncn-personalization-m003 Execute the following sub-steps (3.2 through 3.5) once for each unique CFS configuration name.\nNOTE: Examples in the following sub-steps assume that all manager NCNs use the CFS configuration ncn-personalization.\nGet the current configuration layers for each CFS configuration, and save the data to a local JSON file.\nThe JSON file created in this sub-step will serve as a template for updating an existing CFS configuration, or creating a new one.\nncn-m001# cray cfs configurations describe ncn-personalization --format \\ json | jq \u0026#39;{ layers }\u0026#39; \u0026gt; ncn-personalization.json If the configuration does not exist yet, you may see the following error. In this case, create a new JSON file for that CFS configuration, e.g., ncn-personalization.json.\nError: Configuration could not found.: Configuration ncn-personalization could not be found NOTE: For more on CFS configuration management, refer to \u0026ldquo;Manage a Configuration with CFS\u0026rdquo; in the CSM product documentation.\nAppend a sat layer to the end of the JSON file\u0026rsquo;s list of layers.\nIf the file already contains a sat layer entry, update it.\nIf the configuration data could not be found in the previous sub-step, the JSON file will be empty. In this case, copy the ncn-personalization.json example below, paste it into the JSON file, delete the ellipsis, and make appropriate changes to the sat layer entry.\nUse the git commit ID from step 8, e.g. 82537e59c24dd5607d5f5d6f92cdff971bd9c615.\nNOTE: The name value in the example below may be changed, but the installation procedure uses the example value, sat-ncn. If an alternate value is used, some of the following examples must be updated accordingly before they are executed.\nncn-m001# vim ncn-personalization.json ... ncn-m001# cat ncn-personalization.json { \u0026#34;layers\u0026#34;: [ ... { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;82537e59c24dd5607d5f5d6f92cdff971bd9c615\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;sat-ncn\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;sat-ncn.yml\u0026#34; } ] } Update the existing CFS configuration, or create a new one.\nThe command should output a JSON-formatted representation of the CFS configuration, which will look like the JSON file, but with lastUpdated and name fields.\nncn-m001# cray cfs configurations update ncn-personalization --file \\ ncn-personalization.json --format json { \u0026#34;lastUpdated\u0026#34;: \u0026#34;2021-08-05T16:38:53Z\u0026#34;, \u0026#34;layers\u0026#34;: { ... }, \u0026#34;name\u0026#34;: \u0026#34;ncn-personalization\u0026#34; } Optional: Delete the JSON file.\nNOTE: There is no reason to keep the file. If you keep it, verify that it is up-to-date with the actual CFS configuration before using it again.\nncn-m001# rm ncn-personalization.json Invoke the CFS configurations that you created or updated in the previous step.\nThis step will create a CFS session based on the given configuration and install SAT on the associated manager NCNs.\nThe --configuration-limit option causes only the sat-ncn layer of the configuration, ncn-personalization, to run.\nCAUTION: In this example, the session --name is sat-session. That value is only an example. Declare a unique name for each configuration session.\nYou should see a representation of the CFS session in the output.\nncn-m001# cray cfs sessions create --name sat-session --configuration-name \\ ncn-personalization --configuration-limit sat-ncn name=\u0026#34;sat-session\u0026#34; [ansible] ... Execute this step once for each unique CFS configuration that you created or updated in the previous step.\nMonitor the progress of each CFS session.\nFirst, list all containers associated with the CFS session:\nncn-m001# kubectl get pod -n services --selector=cfsession=sat-session \\ -o json | jq \u0026#39;.items[0].spec.containers[] | .name\u0026#39; \u0026#34;inventory\u0026#34; \u0026#34;ansible-1\u0026#34; \u0026#34;istio-proxy\u0026#34; Next, get the logs for the ansible-1 container.\nNOTE: the trailing digit might differ from \u0026ldquo;1\u0026rdquo;. It is the zero-based index of the sat-ncn layer within the configuration\u0026rsquo;s layers.\nncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \\ --selector=cfsession=sat-session Ansible plays, which are run by the CFS session, will install SAT on all the manager NCNs on the system. Successful results for all of the manager NCN xnames can be found at the end of the container log. For example:\n... PLAY RECAP ********************************************************************* x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 Execute this step for each unique CFS configuration.\nNOTE: Ensure that the PLAY RECAPs for each session show successes for all manager NCNs before proceeding.\nVerify that SAT was successfully configured.\nIf sat is configured, the --version command will indicate which version is installed. If sat is not properly configured, the command will fail.\nNOTE: This version number will differ from the version number of the SAT release distribution. This is the semantic version of the sat Python package, which is different from the version number of the overall SAT release distribution.\nncn-m001# sat --version sat 3.7.0 NOTE: Upon first running sat, you may see additional output while the sat container image is downloaded. This will occur the first time sat is run on each manager NCN. For example, if you run sat for the first time on ncn-m001 and then for the first time on ncn-m002, you will see this additional output both times.\nTrying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037... Getting image source signatures Copying blob da64e8df3afc done Copying blob 0f36fd81d583 done Copying blob 12527cf455ba done ... sat 3.7.0 Stop the typescript.\nncn-m001# exit SAT version 2.1.x is now configured:\nThe SAT RPM package is installed on the associated NCNs. Next Steps If other HPE Cray EX software products are being installed or upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide to determine which step to execute next.\nIf no other HPE Cray EX software products are being installed or upgraded at this time, proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.\nIf performing a fresh install, execute the SAT Setup procedures:\nSAT Authentication Generate SAT S3 Credentials Run Sat Setrev to Set System Information If performing an upgrade, execute the SAT Post-Upgrade procedures:\nOptional: Remove old versions after an upgrade Remove obsolete configuration file sections SAT Authentication Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in later steps of the install process. The admin account used to authenticate with sat auth must be enabled in Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation. For additional information on SAT authentication, see System Security and Authentication in the CSM documentation.\nNOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nDescription of SAT Command Authentication Types Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes manager node where SAT commands are run.\nBelow is a table describing SAT commands and the types of authentication they require.\nSAT Subcommand Authentication/Credentials Required Man Page Description sat auth Responsible for authenticating to the API gateway and storing a token. sat-auth Authenticate to the API gateway and save the token. sat bootsys Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. sat-bootsys Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. sat diag Requires authentication to the API gateway. sat-diag Launch diagnostics on the HSN switches and generate a report. sat firmware Requires authentication to the API gateway. sat-firmware Report firmware version. sat hwinv Requires authentication to the API gateway. sat-hwinv Give a listing of the hardware of the HPE Cray EX system. sat hwmatch Requires authentication to the API gateway. sat-hwmatch Report hardware mismatches. sat init None sat-init Create a default SAT configuration file. sat k8s Requires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install. sat-k8s Report on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node). sat linkhealth This command has been deprecated. sat nid2xname Requires authentication to the API gateway. sat-nid2xname Translate node IDs to node xnames. sat sensors Requires authentication to the API gateway. sat-sensors Report current sensor data. sat setrev Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-setrev Set HPE Cray EX system revision information. sat showrev Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-showrev Print revision information for the HPE Cray EX system. sat status Requires authentication to the API gateway. sat-status Report node status across the HPE Cray EX system. sat swap Requires authentication to the API gateway. sat-swap Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. sat xname2nid Requires authentication to the API gateway. sat-xname2nid Translate node and node BMC xnames to node IDs. sat switch This command has been deprecated. It has been replaced by sat swap. In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password on the command line. The username value is obtained from the following locations, in order of higher precedence to lower precedence:\nThe --username global command-line option. The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml. The name of currently logged in user running the sat command. If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to ~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above, and will use the token for that username if it has been obtained and saved by sat auth.\nPrerequisites The sat CLI has been installed following Install The System Admin Toolkit Product Stream. Procedure The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:\nGenerate a default SAT configuration file, if one does not exist.\nncn-m001# sat init Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; generated. Note: If the config file already exists, it will print out an error:\nERROR: Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; already exists. Not generating configuration file. Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file. E.g.:\nusername = \u0026#34;crayadmin\u0026#34; Run sat auth. Enter your password when prompted. E.g.:\nncn-m001# sat auth Password for crayadmin: Succeeded! Other sat commands are now authenticated to make requests to the API gateway. E.g.:\nncn-m001# sat status Generate SAT S3 Credentials Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes master node where SAT commands are run.\nSAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev (see: Run Sat Setrev to Set System Information).\nNOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nPrerequisites The sat CLI has been installed following Install The System Admin Toolkit Product Stream. The sat configuration file has been created (See SAT Authentication). CSM has been installed and verified. Procedure Ensure the files are readable only by root.\nncn-m001# touch /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key ncn-m001# chmod 600 /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key Write the credentials to local files using kubectl.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.access_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_access_key ncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.secret_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_secret_key Verify the S3 endpoint specified in the SAT configuration file is correct.\nGet the SAT configuration file\u0026rsquo;s endpoint valie.\nNOTE: If the command\u0026rsquo;s output is commented out, indicated by an initial # character, the SAT configuration will take the default value – \u0026quot;https://rgw-vip.nmn\u0026quot;.\nncn-m001# grep endpoint ~/.config/sat/sat.toml # endpoint = \u0026#34;https://rgw-vip.nmn\u0026#34; Get the sat-s3-credentials secret\u0026rsquo;s endpoint value.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.s3_endpoint}\u0026#39; | base64 -d | xargs https://rgw-vip.nmn Compare the two endpoint values.\nIf the values differ, modify the SAT configuration file\u0026rsquo;s endpoint value to match the secret\u0026rsquo;s.\nCopy SAT configurations to every manager node on the system.\nncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \\ mkdir -p /root/.config/sat; \\ scp -pr /root/.config/sat ${i}:/root/.config; done NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may be different. This example assumes three manager nodes, where the configuration files must be copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 and ncn-m003.\nRun sat setrev to Set System Information NOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nPrerequisites S3 credentials have been generated. See Generate SAT S3 Credentials. SAT authentication has been set up. See SAT Authentication. Procedure Run sat setrev to set System Revision Information. Follow the on-screen prompts.\nncn-m001# sat setrev -------------------------------------------------------------------------------- Setting: Serial number Purpose: System identification. This will affect how snapshots are identified in the HPE backend services. Description: This is the top-level serial number which uniquely identifies the system. It can be requested from an HPE representative. Valid values: Alpha-numeric string, 4 - 20 characters. Type: \u0026lt;class \u0026#39;str\u0026#39;\u0026gt; Default: None Current value: None -------------------------------------------------------------------------------- Please do one of the following to set the value of the above setting: - Input a new value - Press CTRL-C to exit ... Run sat showrev to verify System Revision Information. The following tables contain example information.\nncn-m001# sat showrev ################################################################################ System Revision Information ################################################################################ +---------------------+---------------+ | component | data | +---------------------+---------------+ | Company name | HPE | | Country code | US | | Interconnect | Sling | | Product number | R4K98A | | Serial number | 12345 | | Site name | HPE | | Slurm version | slurm 20.02.5 | | System description | Test System | | System install date | 2021-01-29 | | System name | eniac | | System type | Shasta | +---------------------+---------------+ ################################################################################ Product Revision Information ################################################################################ +--------------+-----------------+------------------------------+------------------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+------------------------------+------------------------------+ | csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... | | sat | 2.0.1 | - | - | | sdu | 1.0.8 | - | - | | slingshot | 0.8.0 | - | - | | sma | 1.4.12 | - | - | +--------------+-----------------+------------------------------+------------------------------+ ################################################################################ Local Host Operating System ################################################################################ +-----------+----------------------+ | component | version | +-----------+----------------------+ | Kernel | 5.3.18-24.15-default | | SLES | SLES 15-SP2 | +-----------+----------------------+ Optional: Remove old versions after an upgrade Prerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. The Perform NCN Personalization procedure has been successfully completed. Procedure After upgrading from a previous version of SAT, the old version of the cray/cray-sat container image will remain in the registry on the system. It is not removed automatically, but it will not be the default version.\nThe admin can remove the older version of the cray/cray-sat container image.\nThe cray-product-catalog Kubernetes configuration map will also show all versions of SAT that are installed. The command sat showrev --products will display these versions. See the example:\nncn-m001# sat showrev --products ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------------------+-----------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+--------------------+-----------------------+ ... | sat | 2.1.3 | - | - | | sat | 2.0.4 | - | - | ... +--------------+-----------------+--------------------+-----------------------+ Remove obsolete configuration file sections Prerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. The Perform NCN Personalization procedure has been successfully completed. Procedure After upgrading SAT, if using the configuration file from a previous version, there may be configuration file sections no longer used in the new version. For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. In that case, the following warning may appear upon running sat commands.\nWARNING: Ignoring unknown section \u0026#39;redfish\u0026#39; in config file. Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.\n[redfish] username = \u0026#34;admin\u0026#34; password = \u0026#34;adminpass\u0026#34; Repeat this process for any configuration file sections for which there are \u0026ldquo;unknown section\u0026rdquo; warnings.\n" +}, +{ + "uri": "/docs-sat/en-21/introduction/", + "title": "Introduction to SAT", + "tags": [], + "description": "", + "content": "Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components.\nSAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.\nSix Kibana Dashboards are included with SAT. They provide organized output for system health information.\nAER Kibana Dashboard ATOM Kibana Dashboard Heartbeat Kibana Dashboard Kernel Kibana Dashboard MCE Kibana Dashboard Rasdaemon Kibana Dashboard Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and are reported through Redfish.\nGrafana Fabric Congestion Dashboard Grafana Fabric Errors Dashboard Grafana Fabric Port State Dashboard Grafana Fabric RFC3635 Dashboard SAT is installed as a separate product as part of the HPE Cray EX System base installation.\nSystem Admin Toolkit Command Overview Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides instruction on the SAT Container Environment.\nSAT Command Line Utility The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes (ncn-m nodes).\nIt is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are similarities between SAT commands and xt commands used on the Cray XC platform.\nSAT Commands The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each have their own set of options.\nSAT Container Environment The sat command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes manager nodes. A few important points about the SAT container environment include the following:\nUsing either sat or sat bash always launches a container. The SAT container does not have access to the NCN file system. There are two ways to run sat.\nInteractive: Launching a container using sat bash, followed by a sat command. Non-interactive: Running a sat command directly on a Kubernetes manager node. In both of these cases, a container is launched in the background to execute the command. The first option, running sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the container is launched, executes the command, and upon the command\u0026rsquo;s completion the container exits. The following two examples show the same action, checking the system status, using interactive and non-interactive modes.\nInteractive ncn-m001# sat bash (CONTAINER-ID)sat-container# sat status Non-interactive ncn-m001# sat status Interactive Advantages Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral container storage. If multiple sat commands are being run in succession, then use sat bash to launch the container beforehand. This will save time because the container does not need to be launched for each sat command.\nNon-interactive Advantages The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of several steps that need to be executed from a management NCN.\nMan Pages - Interactive and Non-interactive Modes To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following example.\nncn-m001# sat-man status A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed either with man sat or man sat-podman from the manager node.\nncn-m001# man sat ncn-m001# man sat-podman Command Prompt Conventions in SAT The host name in a command prompt indicates where the command must be run. The account that must run the command is also indicated in the prompt.\nThe root or super-user account always has the # character at the end of the prompt and has the host name of the host in the prompt. Any non-root account is indicated with account@hostname\u0026gt;. A user account that is neither root nor crayadm is referred to as user. The command prompt inside the SAT container environment is indicated with the string as follows. It also has the \u0026ldquo;#\u0026rdquo; character at the end of the prompt. Command Prompt Meaning ncn-m001# Run on one of the Kubernetes Manager servers. (Non-interactive) (CONTAINER_ID) sat-container# Run the command inside the SAT container environment by first running sat bash. (Interactive) Examples of the sat status command used by an administrator:\nncn-m001# sat status ncn-m001# sat bash (CONTAINER_ID) sat-container# sat status SAT Dependencies Most sat subcommands depend on services or components from other products in the HPE Cray EX (Shasta) software stack. The following list shows these dependencies for each subcommand. Each service or component is listed under the product it belongs to.\nsat auth CSM Keycloak sat bootsys CSM Boot Orchestration Service (BOS) Cray Advanced Platform Monitoring and Control (CAPMC) Ceph Compute Rolling Upgrade Service (CRUS) Etcd Firmware Action Service (FAS) Hardware State Manager (HSM) Kubernetes S3 COS Node Memory Dump (NMD) sat diag CSM Hardware State Manager (HSM) CSM-Diag Fox sat firmware CSM Firmware Action Service (FAS) sat hwinv CSM Hardware State Manager (HSM) sat hwmatch CSM Hardware State Manager (HSM) sat init None\nsat k8s CSM Kubernetes sat nid2xname CSM Hardware State Manager (HSM) sat sensors CSM Hardware State Manager (HSM) HM Collector SMA Telemetry API sat setrev CSM S3 sat showrev CSM Hardware State Manager (HSM) Kubernetes S3 sat status CSM Hardware State Manager (HSM) sat swap Slingshot Fabric Manager sat switch Deprecated: See sat swap\nsat xname2nid CSM Hardware State Manager (HSM) " +}, +{ + "uri": "/docs-sat/en-21/dashboards/sat_grafana_dashboards/", + "title": "SAT Grafana Dashboards", + "tags": [], + "description": "", + "content": "SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity.\nGrafana can be accessed via web browser at the following URL:\nhttps://sma-grafana.\u0026lt;system_name\u0026gt;.\u0026lt;system_domain\u0026gt; For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the SMA product documentation.\nFor more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry Kafka Topics in the SMA product documentation.\nNavigate SAT Grafana Dashboards There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display telemetry in a tabular format.\nDashboard Name Display Type Fabric Congestion Chart Panels Fabric RFC3635 Chart Panels Fabric Errors Tabular Format Fabric Port State Tabular Format The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location during the time range selected, if any. The interval setting is not used for tabular dashboards.\nSAT Grafana Interval and Locations Options Shows the Interval and Locations Options for the available telemetry.\nThe value of the Interval option sets the time resolution of the received telemetry. This works a bit like a histogram, with the available telemetry in an interval of time going into a \u0026ldquo;bucket\u0026rdquo; and averaging out to a single point on the chart or table. The special value auto will choose an interval based on the time range selected.\nFor additional information, refer to Grafana Templates and Variables.\nThe Locations option allows restriction of the telemetry shown by locations, either individual links or all links in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, which always has entries for all links and switches, although the errors shown are restricted to the selected time range.\nThe chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart\u0026rsquo;s legend or the trace on the chart.\nGrafana Fabric Congestion Dashboard SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in the system and assess the past and present health of the high-speed network. It also allows the ability to drill down to view data for specific ports on specific switches.\nThis dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, local, and global and correspond to the link\u0026rsquo;s relationship to the network topology. The locations presented in the panels are restricted to the values (any combination, defaults to \u0026ldquo;all\u0026rdquo;) selected.\nThe metric values for links of a given port type are similar in value to each other but very distinct from the values of other types. If the values for different port types are all plotted together, the values for links with lower values are indistinguishable from zero when plotted.\nThe port type of a link is reported as a port state \u0026ldquo;subtype\u0026rdquo; event when defined at port initialization.\nGrafana Fabric Errors Dashboard This dashboard reports error counters in a tabular format in three panels.\nThere is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nUnlike other dashboards, the locations presented are all locations in the system rather than having telemetry within the time range selected. However, the values are taken from telemetry within the time range.\nGrafana Fabric Port State Dashboard There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nThe Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours results in all states for all links in the system being shown.\nThe three columns named, group, switch, and port are not port state events, but extra information included with all port state events.\nGrafana Fabric RFC3635 Dashboard For additional information on performance counters, refer to Definitions of Managed Objects for the Ethernet-like Interface Types, an Internet standards document.\nBecause these metrics are counters that only increase over time, the values plotted are the change in the counter\u0026rsquo;s value over the interval setting.\n" +}, +{ + "uri": "/docs-sat/en-21/dashboards/sat_kibana_dashboards/", + "title": "SAT Kibana Dashboards", + "tags": [], + "description": "", + "content": "SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information.\nKibana can be accessed via web browser at the following URL:\nhttps://sma-kibana.\u0026lt;system_name\u0026gt;.\u0026lt;system_domain\u0026gt; For additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product documentation.\nAdditional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this table.\nDashboard Short Description Long Description Kibana Visualization and Search Name sat-aer AER corrected Corrected Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-corrected Search: sat-aer-corrected sat-aer AER fatal Fatal Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-fatal Search: sat-aer-fatal sat-atom ATOM failures Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. sat-atom-failed sat-atom ATOM admindown Application Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch. sat-atom-admindown sat-heartbeat Heartbeat loss events Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. sat-heartbeat sat-kernel Kernel assertions The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. sat-kassertions sat-kernel Kernel panics The kernel panics when something is seriously wrong. The node goes down. sat-kernel-panic sat-kernel Lustre bugs (LBUGs) The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. sat-lbug sat-kernel CPU stalls CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. sat-cpu-stall sat-kernel Out of memory An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. sat-oom sat-mce MCE Machine Check Exceptions (MCE) are errors detected at the processor level. sat-mce sat-rasdaemon rasdaemon errors Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. sat-rasdaemon-error sat-rasdaemon rasdaemon messages All messages from the rasdaemon service on nodes. sat-rasdaemon Disable Search Highlighting in Kibana Dashboard By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.\nThe Kibana Dashboard should be open on your system.\nNavigate to Management\nNavigate to Advanced Settings in the Kibana section, below the Elastic search section\nScroll down to the Discover section\nChange Highlight results from on to off\nClick Save to save changes\nAER Kibana Dashboard The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors are split up into separate visualizations depending on whether they are fatal or corrected errors.\nView the AER Kibana Dashboard Go to the dashboard section.\nSelect sat-aer dashboard.\nChoose the time range of interest.\nView the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nATOM Kibana Dashboard The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health checks and application test failures. Some test failures are of possible interest even though a node is not marked admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide clues if a node otherwise fails. They might also show application problems.\nView the ATOM Kibana Dashboard HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.\nGo to the dashboard section.\nSelect sat-atom dashboard.\nChoose the time range of interest.\nView any nodes marked admindown and any ATOM test failures. These failures occur during health checks and application test failures. Test failures marked admindown are important to note. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nHeartbeat Kibana Dashboard The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss.\nView the Heartbeat Kibana Dashboard Go to the dashboard section.\nSelect sat-heartbeat dashboard.\nChoose the time range of interest.\nView the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.\nKernel Kibana Dashboard The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using too much memory.\nView the Kernel Kibana Dashboard Go to the dashboard section.\nSelect sat-kernel dashboard.\nChoose the time range of interest.\nView the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nMCE Kibana Dashboard The MCE Dashboard displays CPU detected processor-level hardware errors.\nView the MCE Kibana Dashboard Go to the dashboard section.\nSelect sat-mce dashboard.\nChoose the time range of interest.\nView the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nRasdaemon Kibana Dashboard The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one for only messages of severity \u0026ldquo;emerg\u0026rdquo; or \u0026ldquo;err\u0026rdquo; and another for all messages from rasdaemon.\nView the Rasdaemon Kibana Dashboard Go to the dashboard section.\nSelect sat-rasdaemon dashboard.\nChoose the time range of interest.\nView the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\n" +}, +{ + "uri": "/docs-sat/en-21/release_notes/", + "title": "SAT Release Notes", + "tags": [], + "description": "", + "content": "SAT Release Notes Summary of SAT changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5.\nThis version of the SAT product included:\nVersion 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component:\nVersion 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release.\nInstall Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. The cray-sat-podman RPM is no longer installed in the management non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on all master management NCNs via an Ansible playbook which is referenced by a layer of the CFS configuration that applies to management NCNs. This CFS configuration is typically named \u0026ldquo;ncn-personalization\u0026rdquo;.\nThe SAT product now includes a Docker image and a Helm chart named sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart with Loftsman. This helm chart deploys a Kubernetes job that imports the SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. This repository is referenced by the layer added to the NCN personalization CFS configuration.\nRemoval of Direct Redfish Access All commands which used to access Redfish directly have either been removed or modified to use higher-level service APIs. This includes the following commands:\nsat sensors sat diag sat linkhealth The sat sensors command has been rewritten to use the SMA telemetry API to obtain the latest sensor values. The command\u0026rsquo;s usage has changed slightly, but legacy options work as before, so it is backwards compatible. Additionally, new commands have been added.\nThe sat diag command has been rewritten to use a new service called Fox, which is delivered with the CSM-diags product. The sat diag command now launches diagnostics using the Fox service, which launches the corresponding diagnostic executables on controllers using the Hardware Management Job and Task Daemon (HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start diagnostics over Redfish.\nThe sat linkhealth command has been removed. Its functionality has been replaced by functionality from the Slingshot Topology Tool (STT) in the fabric manager pod.\nThe Redfish username and password command line options and config file options have been removed. For further instructions, see Remove Obsolete Configuration File Sections.\nAdditional Fields in sat setrev and sat showrev sat setrev now collects the following information from the admin, which is then displayed by sat showrev:\nSystem description Product number Company name Country code Additional guidance and validation has been added to each field collected by sat setrev. This sets the stage for sdu setup to stop collecting this information and instead collect it from sat showrev or its S3 bucket.\nImprovements to sat bootsys The platform-services stage of the sat bootsys boot command has been improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph health in the correct order. The ceph-check stage has been removed as it is no longer needed.\nThe platform-services stage of sat bootsys boot now prompts for confirmation of the storage NCN hostnames in addition to the Kubernetes masters and workers.\nBug Fixes and Security Fixes Improved error handling in sat firmware. Incremented version of Alpine Linux to 3.13.2 to address a security vulnerability. Other Notable Changes Ansible has been removed from the cray-sat container image. Support for the Firmware Update Service (FUS) has been removed from the sat firmware command. Summary of SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1.\nThis version of the SAT product included:\nVersion 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release.\nNew Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames:\nsat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API.\nBug Fixes Fixed a problem in sat swap where creating the offline port policy failed. Changed sat bootsys shutdown --stage bos-operations to no longer forcefully power off all compute nodes and application nodes using CAPMC when BOS sessions complete or time out. Fixed an issue with the command sat bootsys boot --stage cabinet-power. Summary of SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4.\nThis version of the SAT product included the following components:\nVersion 3.4.0 of the sat python package and CLI It also added the following new component:\nVersion 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release.\nSAT as an Independent Product SAT is now packaged and released as an independent product. The product deliverable is called a \u0026ldquo;release distribution\u0026rdquo;. The release distribution is a gzipped tar file containing an install script. This install script loads the cray/cray-sat container image into the Docker registry in Nexus and loads the cray-sat-podman RPM into a package repository in Nexus.\nIn this release, the cray-sat-podman package is still installed in the master and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in Shasta v1.5.\nSAT Running in a Container Under Podman The sat command now runs in a container under Podman. The sat executable is now installed on all nodes in the Kubernetes management cluster (i.e., workers and masters). This executable is a wrapper script that starts a SAT container in Podman and invokes the sat Python CLI within that container. The admin can run individual sat commands directly on the master or worker NCNs as before, or they can run sat commands inside the SAT container after using sat bash to enter an interactive shell inside the SAT container.\nTo view man pages for sat commands, the user can run sat-man SAT_COMMAND, replacing SAT_COMMAND with the name of the sat command. Alternatively, the user can enter the sat container with sat bash and use the man command.\nNew sat init Command and Config File Location Change The default location of the SAT config file has been changed from /etc/sat.toml to ~/.config/sat/sat.toml. A new command, sat init, has been added that initializes a configuration file in the new default directory. This better supports individual users on the system who want their own config files.\n~/.config/sat is mounted into the container that runs under Podman, so changes are persistent across invocations of the sat container. If desired, an alternate configuration directory can be specified with the SAT_CONFIG_DIR environment variable.\nAdditionally, if a config file does not yet exist when a user runs a sat command, one is generated automatically.\nAdditional Types Added to sat hwinv Additional functionality has been added to sat hwinv including:\nList node enclosure power supplies with the --list-node-enclosure-power-supplies option. List node accelerators (e.g., GPUs) with the --list-node-accels option. The count of node accelerators is also included for each node. List node accelerator risers (e.g., Redstone modules) with the --list-node-accel-risers option. The count of node accelerator risers is also included for each node. List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics option. The count of HSN NICs is also included for each node. Documentation for these new options has been added to the man page for sat hwinv.\nSite Information Stored by sat setrev in S3 The sat setrev and sat showrev commands now use S3 to store and obtain site information, including system name, site name, serial number, install date, and system type. Since the information is stored in S3, it will now be consistent regardless of the node on which sat is executed.\nAs a result of this change, S3 credentials must be configured for SAT. For detailed instructions, see Generate SAT S3 Credentials.\nProduct Version Information Shown by sat showrev sat showrev now shows product information from the cray-product-catalog ConfigMap in Kubernetes.\nAdditional Changes to sat showrev The output from sat showrev has also been changed in the following ways:\nThe --docker and --packages options were considered misleading and have been removed. Information pertaining to only to the local host, where the command is run, has been moved to the output of the --local option. Removal of sat cablecheck The sat cablecheck command has been removed. To verify that the system\u0026rsquo;s Slingshot network is cabled correctly, admins should now use the show cables command in the Slingshot Topology Tool (STT).\nsat swap Command Compatibility with Next-gen Fabric Controller The sat swap command was added in Shasta v1.3.2. This command used the Fabric Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the Fabric Controller API, so this command has been rewritten to use the new backwards-incompatible API. Usage of the command did not change.\nsat bootsys Functionality Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken by changes introduced in Shasta v1.4, which removed the Ansible inventory and playbooks.\nThe functionality in the platform-services stage of sat bootsys has been re-implemented to use python directly instead of Ansible. This resulted in a more robust procedure with better logging to the sat log file. Failures to stop containers on Kubernetes nodes are handled more gracefully, and more information about the containers that failed to stop, including how to debug the problem, is included.\nImprovements were made to console logging setup for non-compute nodes (NCNs) when they are shut down and booted.\nThe following improvements were made to the bos-operations stage of sat bootsys:\nMore information about the BOS sessions, BOA jobs, and BOA pods is printed. A command-line option, --bos-templates, and a corresponding config-file option, bos_templates, were added, and the --cle-bos-template and --uan-bos-template options and their corresponding config file options were deprecated. The following functionality has been removed from sat bootsys:\nThe hsn-bringup stage of sat bootsys boot has been removed due to removal of the underlying Ansible playbook. The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is now a manual procedure. Log File Location Change The location of the sat log file has changed from /var/log/cray/sat.log to /var/log/cray/sat/sat.log. This change simplifies mounting this file into the sat container running under Podman.\nSummary of SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.\nThe following sections detail the changes in this release.\nsat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable.\nThe sat swap switch command is equivalent to sat switch. The sat switch command will be removed in a future release.\nAddition of Stages to sat bootsys Command The sat bootsys command now has multiple stages for both the boot and shutdown actions. Please refer to the \u0026ldquo;System Power On Procedures\u0026rdquo; and \u0026ldquo;System Power Off Procedures\u0026rdquo; sections of the Cray Shasta Administration Guide (S-8001) for more details on using this command in the context of a full system power off and power on.\nSummary of SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI.\nThis version of the sat CLI contained the following commands:\nauth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch See the System Admin Toolkit Command Overview and the table of commands in the SAT Authentication section of this document for more details on each of these commands.\n" +}, +{ + "uri": "/docs-sat/en-21/categories/", + "title": "Categories", + "tags": [], + "description": "", + "content": "" +}, +{ + "uri": "/docs-sat/en-21/tags/", + "title": "Tags", + "tags": [], + "description": "", + "content": "" +}] \ No newline at end of file diff --git a/en-21/index.xml b/en-21/index.xml new file mode 100644 index 0000000000..e20c89353b --- /dev/null +++ b/en-21/index.xml @@ -0,0 +1,47 @@ + + + + HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + /docs-sat/en-21/ + Recent content in HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-21 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Installation + /docs-sat/en-21/install/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-21/install/ + SAT Installation Install the System Admin Toolkit Product Stream Describes how to install the System Admin Toolkit (SAT) product stream. Prerequisites CSM is installed and verified. cray-product-catalog is running. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.1.x with the version of the SAT product stream being installed. + + + Introduction to SAT + /docs-sat/en-21/introduction/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-21/introduction/ + Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview. + + + SAT Grafana Dashboards + /docs-sat/en-21/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-21/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.&lt;system_name&gt;.&lt;system_domain&gt; For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the SMA product documentation. For more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry Kafka Topics in the SMA product documentation. + + + SAT Kibana Dashboards + /docs-sat/en-21/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-21/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + SAT Release Notes + /docs-sat/en-21/release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-21/release_notes/ + SAT Release Notes Summary of SAT changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5. This version of the SAT product included: Version 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component: Version 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release. + + + diff --git a/en-21/install/index.html b/en-21/install/index.html new file mode 100644 index 0000000000..def52dbf4f --- /dev/null +++ b/en-21/install/index.html @@ -0,0 +1,1333 @@ + + + + + + + + + + + + SAT Installation :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Installation

+

Install the System Admin Toolkit Product Stream

+

Describes how to install the System Admin Toolkit (SAT) product stream.

+

Prerequisites

+
    +
  • CSM is installed and verified.
  • +
  • cray-product-catalog is running.
  • +
  • There must be at least 2 gigabytes of free space on the manager NCN on which the +procedure is run.
  • +
+

Notes on the Procedures

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace 2.1.x with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
  • To upgrade SAT, execute the pre-installation, installation, and post-installation +procedures for a newer distribution. The newly installed version will become +the default.
  • +
+

Pre-Installation Procedure

+
    +
  1. +

    Start a typescript.

    +

    The typescript will record the commands and the output from this installation.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
+

Installation Procedure

+
    +
  1. +

    Copy the release distribution gzipped tar file to ncn-m001.

    +
  2. +
  3. +

    Unzip and extract the release distribution, 2.1.x.

    +
    ncn-m001# tar -xvzf sat-2.1.x.tar.gz
    +
  4. +
  5. +

    Change directory to the extracted release distribution directory.

    +
    ncn-m001# cd sat-2.1.x
    +
  6. +
  7. +

    Run the installer: install.sh.

    +

    The script produces a lot of output. The last several lines are included +below for reference.

    +
    ncn-m001# ./install.sh
    +...
    +ConfigMap data updates exist; Exiting.
    ++ clean-install-deps
    ++ for image in "${vendor_images[@]}"
    ++ podman rmi -f docker.io/library/cray-nexus-setup:sat-2.1.x-20210804163905-8dbb87d
    +Untagged: docker.io/library/cray-nexus-setup:sat-2.1.x-20210804163905-8dbb87d
    +Deleted: 2c196c0c6364d9a1699d83dc98550880dc491cc3433a015d35f6cab1987dd6da
    ++ for image in "${vendor_images[@]}"
    ++ podman rmi -f docker.io/library/skopeo:sat-2.1.x-20210804163905-8dbb87d
    +Untagged: docker.io/library/skopeo:sat-2.1.x-20210804163905-8dbb87d
    +Deleted: 1b38b7600f146503e246e753cd9df801e18409a176b3dbb07b0564e6bc27144c
    +
  8. +
  9. +

    Check the return code of the installer. Zero indicates a successful installation.

    +
    ncn-m001# echo $?
    +0
    +
  10. +
  11. +

    Check the progress of the SAT configuration import Kubernetes job, which is +initiated by install.sh.

    +

    If the “Pods Statuses” appear as “Succeeded”, the job has completed +successfully. The job usually takes between 30 seconds and 2 minutes.

    +
    ncn-m001# kubectl describe job sat-config-import-2.1.x -n services
    +...
    +Pods Statuses:  0 Running / 1 Succeeded / 0 Failed
    +...
    +

    The job’s progress may be monitored using kubectl logs. The example below includes +the final log lines from a successful configuration import Kubernetes job.

    +
    ncn-m001# kubectl logs -f -n services --selector \
    +    job-name=sat-config-import-2.1.x --all-containers
    +...
    +ConfigMap update attempt=1
    +Resting 1s before reading ConfigMap
    +ConfigMap data updates exist; Exiting.
    +2021-08-04T21:50:10.275886Z  info    Agent has successfully terminated
    +2021-08-04T21:50:10.276118Z  warning envoy main  caught SIGTERM
    +# Completed on Wed Aug  4 21:49:44 2021
    +

    The following error may appear in this log, but it can be ignored.

    +
    error accept tcp [::]:15020: use of closed network connection
    +
  12. +
+

Post-Installation Procedure

+
    +
  1. +

    Optional: Remove the SAT release distribution tar file and extracted directory.

    +
    ncn-m001# rm sat-2.2.x.tar.gz
    +ncn-m001# rm -rf sat-2.2.x/
    +
  2. +
  3. +

    Upgrade only: Ensure that the environment variable SAT_TAG is not set +in the ~/.bashrc file on any of the management NCNs.

    +

    NOTE: This step should only be required when updating from +Shasta 1.4.1 or Shasta 1.4.2.

    +

    The following example assumes three manager NCNs: ncn-m001, ncn-m002, and ncn-m003, +and shows output from a system in which no further action is needed.

    +
    ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
    +ncn-m001: source <(kubectl completion bash)
    +ncn-m003: source <(kubectl completion bash)
    +ncn-m002: source <(kubectl completion bash)
    +

    The following example shows that SAT_TAG is set in ~/.bashrc on ncn-m002. +Remove that line from the ~/.bashrc file on ncn-m002.

    +
    ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
    +ncn-m001: source <(kubectl completion bash)
    +ncn-m002: source <(kubectl completion bash)
    +ncn-m002: export SAT_TAG=3.5.0
    +ncn-m003: source <(kubectl completion bash)
    +
  4. +
  5. +

    Stop the typescript.

    +

    NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT install. See Next Steps.

    +
    ncn-m001# exit
    +
  6. +
+

SAT version 2.1.x is now installed/upgraded, meaning the SAT 2.1.x release +has been loaded into the system software repository.

+
    +
  • SAT configuration content for this release has been uploaded to VCS.
  • +
  • SAT content for this release has been uploaded to the CSM product catalog.
  • +
  • SAT content for this release has been uploaded to Nexus repositories.
  • +
  • The sat command won’t be available until the NCN Personalization +procedure has been executed.
  • +
+

Next Steps

+

If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.

+

If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the sections listed below.

+

NOTE: The NCN Personalization procedure is required when +upgrading SAT. The setup procedures in SAT Setup, however, are +not required when upgrading SAT. They should have been executed +during the first installation of SAT.

+

Execute the NCN Personalization procedure:

+ +

If performing a fresh install, execute the SAT Setup procedures:

+ +

If performing an upgrade, execute the upgrade procedures:

+ +

Perform NCN Personalization

+

Describes how to perform NCN personalization using CFS. This personalization process +will configure the System Admin Toolkit (SAT) product stream.

+

Prerequisites

+ +

Notes on the Procedure

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace 2.1.x with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
  • If upgrading SAT, the existing configuration will likely include other Cray EX product +entries. Update the SAT entry as described in this procedure. The HPE Cray EX System +Software Getting Started Guide provides guidance on how and when to update the +entries for the other products.
  • +
+

Procedure

+
    +
  1. +

    Start a typescript if not already using one.

    +

    The typescript will capture the commands and the output from this installation procedure.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
  3. +

    Get the git commit ID for the branch with a version number matching the version of SAT.

    +

    This represents a revision of Ansible configuration content stored in VCS.

    +

    Get and store the VCS password (required to access the remote VCS repo).

    +
    ncn-m001# VCS_PASS=$(kubectl get secret -n services vcs-user-credentials \
    +    --template={{.data.vcs_password}} | base64 --decode)
    +

    In this example, the git commit ID is 82537e59c24dd5607d5f5d6f92cdff971bd9c615, +and the version number is 2.1.x.

    +
    ncn-m001# git ls-remote \
    +    https://crayvcs:$VCS_PASS@api-gw-service-nmn.local/vcs/cray/sat-config-management.git \
    +    refs/heads/cray/sat/*
    +...
    +82537e59c24dd5607d5f5d6f92cdff971bd9c615 refs/heads/cray/sat/2.1.x
    +
  4. +
  5. +

    Add a sat layer to the CFS configuration(s) associated with the manager NCNs.

    +
      +
    1. +

      Get the name(s) of the CFS configuration(s).

      +

      NOTE: Each manager NCN uses a single CFS configuration. An individual CFS configuration +may be used by any number of manage NCNs, i.e., three manager NCNs might use one, +two, or three CFS configurations.

      +

      In the following example, all three manager NCNs use the same CFS configuration – ncn-personalization.

      +
      ncn-m001:~ # for component in $(cray hsm state components list \
      +    --role Management --subrole Master --format json | jq -r \
      +    '.Components | .[].ID'); do cray cfs components describe $component \
      +    --format json | jq -r '.desiredConfig'; done
      +ncn-personalization
      +ncn-personalization
      +ncn-personalization
      +

      In the following example, the three manager NCNs all use different configurations, +each with a unique name.

      +
      ncn-personalization-m001
      +ncn-personalization-m002
      +ncn-personalization-m003
      +

      Execute the following sub-steps (3.2 through 3.5) once for each unique CFS +configuration name.

      +

      NOTE: Examples in the following sub-steps assume that all manager NCNs use the +CFS configuration ncn-personalization.

      +
    2. +
    3. +

      Get the current configuration layers for each CFS configuration, and save the +data to a local JSON file.

      +

      The JSON file created in this sub-step will serve as a template for updating +an existing CFS configuration, or creating a new one.

      +
      ncn-m001# cray cfs configurations describe ncn-personalization --format \
      +    json | jq '{ layers }' > ncn-personalization.json
      +

      If the configuration does not exist yet, you may see the following error. +In this case, create a new JSON file for that CFS configuration, e.g., ncn-personalization.json.

      +
      Error: Configuration could not found.: Configuration ncn-personalization could not be found
      +

      NOTE: For more on CFS configuration management, refer to “Manage a Configuration +with CFS” in the CSM product documentation.

      +
    4. +
    5. +

      Append a sat layer to the end of the JSON file’s list of layers.

      +

      If the file already contains a sat layer entry, update it.

      +

      If the configuration data could not be found in the previous sub-step, the JSON file +will be empty. In this case, copy the ncn-personalization.json example below, +paste it into the JSON file, delete the ellipsis, and make appropriate changes to +the sat layer entry.

      +

      Use the git commit ID from step 8, e.g. 82537e59c24dd5607d5f5d6f92cdff971bd9c615.

      +

      NOTE: The name value in the example below may be changed, but the installation +procedure uses the example value, sat-ncn. If an alternate value is used, some +of the following examples must be updated accordingly before they are executed.

      +
      ncn-m001# vim ncn-personalization.json
      +...
      +ncn-m001# cat ncn-personalization.json
      +{
      +    "layers": [
      +        ...
      +        {
      +            "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
      +            "commit": "82537e59c24dd5607d5f5d6f92cdff971bd9c615",
      +            "name": "sat-ncn",
      +            "playbook": "sat-ncn.yml"
      +        }
      +    ]
      +}
      +
    6. +
    7. +

      Update the existing CFS configuration, or create a new one.

      +

      The command should output a JSON-formatted representation of the CFS configuration, +which will look like the JSON file, but with lastUpdated and name fields.

      +
      ncn-m001# cray cfs configurations update ncn-personalization --file \
      +    ncn-personalization.json --format json
      +{
      +    "lastUpdated": "2021-08-05T16:38:53Z",
      +    "layers": {
      +        ...
      +    },
      +    "name": "ncn-personalization"
      +}
      +
    8. +
    9. +

      Optional: Delete the JSON file.

      +

      NOTE: There is no reason to keep the file. If you keep it, verify that +it is up-to-date with the actual CFS configuration before using it again.

      +
      ncn-m001# rm ncn-personalization.json
      +
    10. +
    +
  6. +
  7. +

    Invoke the CFS configurations that you created or updated in the previous step.

    +

    This step will create a CFS session based on the given configuration and install +SAT on the associated manager NCNs.

    +

    The --configuration-limit option causes only the sat-ncn layer of the configuration, +ncn-personalization, to run.

    +

    CAUTION: In this example, the session --name is sat-session. That value +is only an example. Declare a unique name for each configuration session.

    +

    You should see a representation of the CFS session in the output.

    +
    ncn-m001# cray cfs sessions create --name sat-session --configuration-name \
    +    ncn-personalization --configuration-limit sat-ncn
    +name="sat-session"
    +
    +[ansible]
    +...
    +

    Execute this step once for each unique CFS configuration that you created or +updated in the previous step.

    +
  8. +
  9. +

    Monitor the progress of each CFS session.

    +

    First, list all containers associated with the CFS session:

    +
    ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session \
    +    -o json | jq '.items[0].spec.containers[] | .name'
    +"inventory"
    +"ansible-1"
    +"istio-proxy"
    +

    Next, get the logs for the ansible-1 container.

    +

    NOTE: the trailing digit might differ from “1”. It is the zero-based +index of the sat-ncn layer within the configuration’s layers.

    +
    ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
    +    --selector=cfsession=sat-session
    +

    Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:

    +
    ...
    +PLAY RECAP *********************************************************************
    +x3000c0s1b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s3b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s5b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +

    Execute this step for each unique CFS configuration.

    +

    NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.

    +
  10. +
  11. +

    Verify that SAT was successfully configured.

    +

    If sat is configured, the --version command will indicate which version +is installed. If sat is not properly configured, the command will fail.

    +

    NOTE: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the sat Python package, +which is different from the version number of the overall SAT release distribution.

    +
    ncn-m001# sat --version
    +sat 3.7.0
    +

    NOTE: Upon first running sat, you may see additional output while the sat +container image is downloaded. This will occur the first time sat is run on +each manager NCN. For example, if you run sat for the first time on ncn-m001 +and then for the first time on ncn-m002, you will see this additional output +both times.

    +
    Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
    +Getting image source signatures
    +Copying blob da64e8df3afc done
    +Copying blob 0f36fd81d583 done
    +Copying blob 12527cf455ba done
    +...
    +sat 3.7.0
    +
  12. +
  13. +

    Stop the typescript.

    +
    ncn-m001# exit
    +
  14. +
+

SAT version 2.1.x is now configured:

+
    +
  • The SAT RPM package is installed on the associated NCNs.
  • +
+

Next Steps

+

If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.

+

If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.

+

If performing a fresh install, execute the SAT Setup procedures:

+ +

If performing an upgrade, execute the SAT Post-Upgrade procedures:

+ +

SAT Authentication

+

Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in +later steps of the install process. The admin account used to authenticate with sat auth must be enabled in +Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see +Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation. +For additional information on SAT authentication, see System Security and Authentication in the CSM +documentation.

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Description of SAT Command Authentication Types

+

Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to +the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to +be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket, +the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be +done on every Kubernetes manager node where SAT commands are run.

+

Below is a table describing SAT commands and the types of authentication they require.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SAT SubcommandAuthentication/Credentials RequiredMan PageDescription
sat authResponsible for authenticating to the API gateway and storing a token.sat-authAuthenticate to the API gateway and save the token.
sat bootsysRequires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages.sat-bootsysBoot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software.
sat diagRequires authentication to the API gateway.sat-diagLaunch diagnostics on the HSN switches and generate a report.
sat firmwareRequires authentication to the API gateway.sat-firmwareReport firmware version.
sat hwinvRequires authentication to the API gateway.sat-hwinvGive a listing of the hardware of the HPE Cray EX system.
sat hwmatchRequires authentication to the API gateway.sat-hwmatchReport hardware mismatches.
sat initNonesat-initCreate a default SAT configuration file.
sat k8sRequires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install.sat-k8sReport on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node).
sat linkhealthThis command has been deprecated.
sat nid2xnameRequires authentication to the API gateway.sat-nid2xnameTranslate node IDs to node xnames.
sat sensorsRequires authentication to the API gateway.sat-sensorsReport current sensor data.
sat setrevRequires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-setrevSet HPE Cray EX system revision information.
sat showrevRequires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-showrevPrint revision information for the HPE Cray EX system.
sat statusRequires authentication to the API gateway.sat-statusReport node status across the HPE Cray EX system.
sat swapRequires authentication to the API gateway.sat-swapPrepare HSN switch or cable for replacement and bring HSN switch or cable into service.
sat xname2nidRequires authentication to the API gateway.sat-xname2nidTranslate node and node BMC xnames to node IDs.
sat switchThis command has been deprecated. It has been replaced by sat swap.
+

In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password +on the command line. The username value is obtained from the following locations, in order of higher precedence to lower +precedence:

+
    +
  • The --username global command-line option.
  • +
  • The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml.
  • +
  • The name of currently logged in user running the sat command.
  • +
+

If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to +~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above, +and will use the token for that username if it has been obtained and saved by sat auth.

+

Prerequisites

+ +

Procedure

+

The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:

+
    +
  1. +

    Generate a default SAT configuration file, if one does not exist.

    +
    ncn-m001# sat init
    +Configuration file "/root/.config/sat/sat.toml" generated.
    +

    Note: If the config file already exists, it will print out an error:

    +
    ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
    +Not generating configuration file.
    +
  2. +
  3. +

    Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file. E.g.:

    +
    username = "crayadmin"
    +
  4. +
  5. +

    Run sat auth. Enter your password when prompted. E.g.:

    +
    ncn-m001# sat auth
    +Password for crayadmin:
    +Succeeded!
    +
  6. +
  7. +

    Other sat commands are now authenticated to make requests to the API gateway. E.g.:

    +
    ncn-m001# sat status
    +
  8. +
+

Generate SAT S3 Credentials

+

Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT +S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. +This must be done on every Kubernetes master node where SAT commands are run.

+

SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev +(see: Run Sat Setrev to Set System Information).

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Ensure the files are readable only by root.

    +
    ncn-m001# touch /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
    ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
  2. +
  3. +

    Write the credentials to local files using kubectl.

    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.access_key}' | base64 -d > \
    +    /root/.config/sat/s3_access_key
    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.secret_key}' | base64 -d > \
    +    /root/.config/sat/s3_secret_key
    +
  4. +
  5. +

    Verify the S3 endpoint specified in the SAT configuration file is correct.

    +
      +
    1. +

      Get the SAT configuration file’s endpoint valie.

      +

      NOTE: If the command’s output is commented out, indicated by an initial # +character, the SAT configuration will take the default value – "https://rgw-vip.nmn".

      +
      ncn-m001# grep endpoint ~/.config/sat/sat.toml
      +# endpoint = "https://rgw-vip.nmn"
      +
    2. +
    3. +

      Get the sat-s3-credentials secret’s endpoint value.

      +
      ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
      +    jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
      +https://rgw-vip.nmn
      +
    4. +
    5. +

      Compare the two endpoint values.

      +

      If the values differ, modify the SAT configuration file’s endpoint value to match the secret’s.

      +
    6. +
    +
  6. +
  7. +

    Copy SAT configurations to every manager node on the system.

    +
    ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
    +    mkdir -p /root/.config/sat; \
    +    scp -pr /root/.config/sat ${i}:/root/.config; done
    +

    NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may +be different. This example assumes three manager nodes, where the configuration files must be +copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 +and ncn-m003.

    +
  8. +
+

Run sat setrev to Set System Information

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Run sat setrev to set System Revision Information. Follow the on-screen prompts.

    +
    ncn-m001# sat setrev
    +--------------------------------------------------------------------------------
    +Setting:        Serial number
    +Purpose:        System identification. This will affect how snapshots are
    +                identified in the HPE backend services.
    +Description:    This is the top-level serial number which uniquely identifies
    +                the system. It can be requested from an HPE representative.
    +Valid values:   Alpha-numeric string, 4 - 20 characters.
    +Type:           <class 'str'>
    +Default:        None
    +Current value:  None
    +--------------------------------------------------------------------------------
    +Please do one of the following to set the value of the above setting:
    +    - Input a new value
    +    - Press CTRL-C to exit
    +...
    +
  2. +
  3. +

    Run sat showrev to verify System Revision Information. The following tables contain example information.

    +
    ncn-m001# sat showrev
    +################################################################################
    +System Revision Information
    +################################################################################
    ++---------------------+---------------+
    +| component           | data          |
    ++---------------------+---------------+
    +| Company name        | HPE           |
    +| Country code        | US            |
    +| Interconnect        | Sling         |
    +| Product number      | R4K98A        |
    +| Serial number       | 12345         |
    +| Site name           | HPE           |
    +| Slurm version       | slurm 20.02.5 |
    +| System description  | Test System   |
    +| System install date | 2021-01-29    |
    +| System name         | eniac         |
    +| System type         | Shasta        |
    ++---------------------+---------------+
    +################################################################################
    +Product Revision Information
    +################################################################################
    ++--------------+-----------------+------------------------------+------------------------------+
    +| product_name | product_version | images                       | image_recipes                |
    ++--------------+-----------------+------------------------------+------------------------------+
    +| csm          | 0.8.14          | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
    +| sat          | 2.0.1           | -                            | -                            |
    +| sdu          | 1.0.8           | -                            | -                            |
    +| slingshot    | 0.8.0           | -                            | -                            |
    +| sma          | 1.4.12          | -                            | -                            |
    ++--------------+-----------------+------------------------------+------------------------------+
    +################################################################################
    +Local Host Operating System
    +################################################################################
    ++-----------+----------------------+
    +| component | version              |
    ++-----------+----------------------+
    +| Kernel    | 5.3.18-24.15-default |
    +| SLES      | SLES 15-SP2          |
    ++-----------+----------------------+
    +
  4. +
+

Optional: Remove old versions after an upgrade

+

Prerequisites

+ +

Procedure

+

After upgrading from a previous version of SAT, the old version of the cray/cray-sat +container image will remain in the registry on the system. It is not removed +automatically, but it will not be the default version.

+

The admin can remove the older version of the cray/cray-sat container image.

+

The cray-product-catalog Kubernetes configuration map will also show all versions +of SAT that are installed. The command sat showrev --products will display these +versions. See the example:

+
ncn-m001# sat showrev --products
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------------------+-----------------------+
+| product_name | product_version | images             | image_recipes         |
++--------------+-----------------+--------------------+-----------------------+
+...
+| sat          | 2.1.3           | -                  | -                     |
+| sat          | 2.0.4           | -                  | -                     |
+...
++--------------+-----------------+--------------------+-----------------------+
+

Remove obsolete configuration file sections

+

Prerequisites

+ +

Procedure

+

After upgrading SAT, if using the configuration file from a previous version, there may be +configuration file sections no longer used in the new version. For example, when upgrading +from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. +In that case, the following warning may appear upon running sat commands.

+
WARNING: Ignoring unknown section 'redfish' in config file.
+

Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.

+
[redfish]
+username = "admin"
+password = "adminpass"
+

Repeat this process for any configuration file sections for which there are “unknown section” warnings.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-21/introduction/index.html b/en-21/introduction/index.html new file mode 100644 index 0000000000..30b4a7e651 --- /dev/null +++ b/en-21/introduction/index.html @@ -0,0 +1,773 @@ + + + + + + + + + + + + Introduction to SAT :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Introduction to SAT

+

About System Admin Toolkit (SAT)

+

The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.

+

SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.

+

Six Kibana Dashboards are included with SAT. They provide organized output for system health information.

+ +

Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.

+ +

SAT is installed as a separate product as part of the HPE Cray EX System base installation.

+

System Admin Toolkit Command Overview

+

Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.

+

SAT Command Line Utility

+

The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes +(ncn-m nodes).

+

It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.

+

SAT Commands

+

The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.

+

SAT Container Environment

+

The sat command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes +manager nodes. A few important points about the SAT container environment include the following:

+
    +
  • Using either sat or sat bash always launches a container.
  • +
  • The SAT container does not have access to the NCN file system.
  • +
+

There are two ways to run sat.

+
    +
  • Interactive: Launching a container using sat bash, followed by a sat command.
  • +
  • Non-interactive: Running a sat command directly on a Kubernetes manager node.
  • +
+

In both of these cases, a container is launched in the background to execute the command. The first option, running +sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the +container is launched, executes the command, and upon the command’s completion the container exits. The following two +examples show the same action, checking the system status, using interactive and non-interactive modes.

+

Interactive

+
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+

Non-interactive

+
ncn-m001# sat status
+

Interactive Advantages

+

Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral +container storage. If multiple sat commands are being run in succession, then use sat bash to launch the +container beforehand. This will save time because the container does not need to be launched for each sat command.

+

Non-interactive Advantages

+

The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of +several steps that need to be executed from a management NCN.

+

Man Pages - Interactive and Non-interactive Modes

+

To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following +example.

+
ncn-m001# sat-man status
+

A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed +either with man sat or man sat-podman from the manager node.

+
ncn-m001# man sat
+
ncn-m001# man sat-podman
+

Command Prompt Conventions in SAT

+

The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.

+
    +
  • The root or super-user account always has the # character at the end of the prompt and has the host name of the +host in the prompt.
  • +
  • Any non-root account is indicated with account@hostname>. A user account that is neither root nor crayadm is +referred to as user.
  • +
  • The command prompt inside the SAT container environment is indicated with the string as follows. It also has the “#” +character at the end of the prompt.
  • +
+ + + + + + + + + + + + + + + + + +
Command PromptMeaning
ncn-m001#Run on one of the Kubernetes Manager servers. (Non-interactive)
(CONTAINER_ID) sat-container#Run the command inside the SAT container environment by first running sat bash. (Interactive)
+

Examples of the sat status command used by an administrator:

+
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+

SAT Dependencies

+

Most sat subcommands depend on services or components from other products in the +HPE Cray EX (Shasta) software stack. The following list shows these dependencies +for each subcommand. Each service or component is listed under the product it belongs to.

+

sat auth

+

CSM

+
    +
  • Keycloak
  • +
+

sat bootsys

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Cray Advanced Platform Monitoring and Control (CAPMC)
  • +
  • Ceph
  • +
  • Compute Rolling Upgrade Service (CRUS)
  • +
  • Etcd
  • +
  • Firmware Action Service (FAS)
  • +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

COS

+
    +
  • Node Memory Dump (NMD)
  • +
+

sat diag

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

CSM-Diag

+
    +
  • Fox
  • +
+

sat firmware

+

CSM

+
    +
  • Firmware Action Service (FAS)
  • +
+

sat hwinv

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwmatch

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat init

+

None

+

sat k8s

+

CSM

+
    +
  • Kubernetes
  • +
+

sat nid2xname

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat sensors

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • HM Collector
  • +
+

SMA

+
    +
  • Telemetry API
  • +
+

sat setrev

+

CSM

+
    +
  • S3
  • +
+

sat showrev

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat status

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat swap

+

Slingshot

+
    +
  • Fabric Manager
  • +
+

sat switch

+

Deprecated: See sat swap

+

sat xname2nid

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-21/release_notes/index.html b/en-21/release_notes/index.html new file mode 100644 index 0000000000..558774dd07 --- /dev/null +++ b/en-21/release_notes/index.html @@ -0,0 +1,849 @@ + + + + + + + + + + + + SAT Release Notes :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Release Notes

+

Summary of SAT changes in Shasta v1.5

+

We released version 2.1.16 of the SAT product in Shasta v1.5.

+

This version of the SAT product included:

+
    +
  • Version 3.7.4 of the sat python package and CLI
  • +
  • Version 1.4.10 of the sat-podman wrapper script
  • +
+

It also added the following new component:

+
    +
  • Version 1.0.3 of the sat-cfs-install docker image and helm chart
  • +
+

The following sections detail the changes in this release.

+

Install Changes to Separate Product from CSM

+

This release further decouples the installation of the SAT product from the CSM +product. The cray-sat-podman RPM is no longer installed in the management +non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on +all master management NCNs via an Ansible playbook which is referenced by a +layer of the CFS configuration that applies to management NCNs. This CFS +configuration is typically named “ncn-personalization”.

+

The SAT product now includes a Docker image and a Helm chart named +sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart +with Loftsman. This helm chart deploys a Kubernetes job that imports the +SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. +This repository is referenced by the layer added to the NCN personalization +CFS configuration.

+

Removal of Direct Redfish Access

+

All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:

+
    +
  • sat sensors
  • +
  • sat diag
  • +
  • sat linkhealth
  • +
+

The sat sensors command has been rewritten to use the SMA telemetry API to +obtain the latest sensor values. The command’s usage has changed slightly, but +legacy options work as before, so it is backwards compatible. Additionally, new +commands have been added.

+

The sat diag command has been rewritten to use a new service called Fox, which +is delivered with the CSM-diags product. The sat diag command now launches +diagnostics using the Fox service, which launches the corresponding diagnostic +executables on controllers using the Hardware Management Job and Task Daemon +(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start +diagnostics over Redfish.

+

The sat linkhealth command has been removed. Its functionality has been +replaced by functionality from the Slingshot Topology Tool (STT) in the +fabric manager pod.

+

The Redfish username and password command line options and config file options +have been removed. For further instructions, see Remove Obsolete Configuration +File Sections.

+

Additional Fields in sat setrev and sat showrev

+

sat setrev now collects the following information from the admin, which is then displayed by sat showrev:

+
    +
  • System description
  • +
  • Product number
  • +
  • Company name
  • +
  • Country code
  • +
+

Additional guidance and validation has been added to each field collected by +sat setrev. This sets the stage for sdu setup to stop collecting this +information and instead collect it from sat showrev or its S3 bucket.

+

Improvements to sat bootsys

+

The platform-services stage of the sat bootsys boot command has been +improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph +health in the correct order. The ceph-check stage has been removed as it is no +longer needed.

+

The platform-services stage of sat bootsys boot now prompts for confirmation +of the storage NCN hostnames in addition to the Kubernetes masters and workers.

+

Bug Fixes and Security Fixes

+
    +
  • Improved error handling in sat firmware.
  • +
  • Incremented version of Alpine Linux to 3.13.2 to address a security +vulnerability.
  • +
+

Other Notable Changes

+
    +
  • Ansible has been removed from the cray-sat container image.
  • +
  • Support for the Firmware Update Service (FUS) has been removed from the sat firmware command.
  • +
+

Summary of SAT Changes in Shasta v1.4.1

+

We released version 2.0.4 of the SAT product in Shasta v1.4.1.

+

This version of the SAT product included:

+
    +
  • Version 3.5.0 of the sat python package and CLI.
  • +
  • Version 1.4.3 of the sat-podman wrapper script.
  • +
+

The following sections detail the changes in this release.

+

New Commands to Translate Between NIDs and XNames

+

Two new commands were added to translate between NIDs and XNames:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
+

These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.

+

Bug Fixes

+
    +
  • Fixed a problem in sat swap where creating the offline port policy failed.
  • +
  • Changed sat bootsys shutdown --stage bos-operations to no longer forcefully +power off all compute nodes and application nodes using CAPMC when BOS +sessions complete or time out.
  • +
  • Fixed an issue with the command sat bootsys boot --stage cabinet-power.
  • +
+

Summary of SAT Changes in Shasta v1.4

+

In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.

+

This version of the SAT product included the following components:

+
    +
  • Version 3.4.0 of the sat python package and CLI
  • +
+

It also added the following new component:

+
    +
  • Version 1.4.2 of the sat-podman wrapper script
  • +
+

The following sections detail the changes in this release.

+

SAT as an Independent Product

+

SAT is now packaged and released as an independent product. The product +deliverable is called a “release distribution”. The release distribution is a +gzipped tar file containing an install script. This install script loads the +cray/cray-sat container image into the Docker registry in Nexus and loads the +cray-sat-podman RPM into a package repository in Nexus.

+

In this release, the cray-sat-podman package is still installed in the master +and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in +Shasta v1.5.

+

SAT Running in a Container Under Podman

+

The sat command now runs in a container under Podman. The sat executable is +now installed on all nodes in the Kubernetes management cluster (i.e., workers +and masters). This executable is a wrapper script that starts a SAT container in +Podman and invokes the sat Python CLI within that container. The admin can run +individual sat commands directly on the master or worker NCNs as before, or +they can run sat commands inside the SAT container after using sat bash to +enter an interactive shell inside the SAT container.

+

To view man pages for sat commands, the user can run sat-man SAT_COMMAND, +replacing SAT_COMMAND with the name of the sat command. Alternatively, +the user can enter the sat container with sat bash and use the man command.

+

New sat init Command and Config File Location Change

+

The default location of the SAT config file has been changed from /etc/sat.toml +to ~/.config/sat/sat.toml. A new command, sat init, has been added that +initializes a configuration file in the new default directory. This better supports +individual users on the system who want their own config files.

+

~/.config/sat is mounted into the container that runs under Podman, so changes +are persistent across invocations of the sat container. If desired, an alternate +configuration directory can be specified with the SAT_CONFIG_DIR environment variable.

+

Additionally, if a config file does not yet exist when a user runs a sat +command, one is generated automatically.

+

Additional Types Added to sat hwinv

+

Additional functionality has been added to sat hwinv including:

+
    +
  • List node enclosure power supplies with the --list-node-enclosure-power-supplies option.
  • +
  • List node accelerators (e.g., GPUs) with the --list-node-accels option. The count of +node accelerators is also included for each node.
  • +
  • List node accelerator risers (e.g., Redstone modules) with the --list-node-accel-risers +option. The count of node accelerator risers is also included for each node.
  • +
  • List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics +option. The count of HSN NICs is also included for each node.
  • +
+

Documentation for these new options has been added to the man page for sat hwinv.

+

Site Information Stored by sat setrev in S3

+

The sat setrev and sat showrev commands now use S3 to store and obtain site +information, including system name, site name, serial number, install date, and +system type. Since the information is stored in S3, it will now be consistent +regardless of the node on which sat is executed.

+

As a result of this change, S3 credentials must be configured for SAT. For detailed +instructions, see Generate SAT S3 Credentials.

+

Product Version Information Shown by sat showrev

+

sat showrev now shows product information from the cray-product-catalog +ConfigMap in Kubernetes.

+

Additional Changes to sat showrev

+

The output from sat showrev has also been changed in the following ways:

+
    +
  • The --docker and --packages options were considered misleading and have +been removed.
  • +
  • Information pertaining to only to the local host, where the command is run, +has been moved to the output of the --local option.
  • +
+

Removal of sat cablecheck

+

The sat cablecheck command has been removed. To verify that the system’s Slingshot +network is cabled correctly, admins should now use the show cables command in the +Slingshot Topology Tool (STT).

+

sat swap Command Compatibility with Next-gen Fabric Controller

+

The sat swap command was added in Shasta v1.3.2. This command used the Fabric +Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the +Fabric Controller API, so this command has been rewritten to use the new +backwards-incompatible API. Usage of the command did not change.

+

sat bootsys Functionality

+

Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken +by changes introduced in Shasta v1.4, which removed the Ansible inventory +and playbooks.

+

The functionality in the platform-services stage of sat bootsys has been +re-implemented to use python directly instead of Ansible. This resulted in +a more robust procedure with better logging to the sat log file. Failures +to stop containers on Kubernetes nodes are handled more gracefully, and +more information about the containers that failed to stop, including how to +debug the problem, is included.

+

Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.

+

The following improvements were made to the bos-operations stage +of sat bootsys:

+
    +
  • More information about the BOS sessions, BOA jobs, and BOA pods is printed.
  • +
  • A command-line option, --bos-templates, and a corresponding config-file +option, bos_templates, were added, and the --cle-bos-template and +--uan-bos-template options and their corresponding config file options were +deprecated.
  • +
+

The following functionality has been removed from sat bootsys:

+
    +
  • The hsn-bringup stage of sat bootsys boot has been removed due to removal +of the underlying Ansible playbook.
  • +
  • The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is +now a manual procedure.
  • +
+

Log File Location Change

+

The location of the sat log file has changed from /var/log/cray/sat.log to +/var/log/cray/sat/sat.log. This change simplifies mounting this file into the +sat container running under Podman.

+

Summary of SAT Changes in Shasta v1.3.2

+

Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.

+

The following sections detail the changes in this release.

+

sat swap Command for Switch and Cable Replacement

+

The sat switch command which supported operations for replacing a switch has +been deprecated and replaced with the sat swap command, which now supports +replacing a switch OR cable.

+

The sat swap switch command is equivalent to sat switch. The sat switch +command will be removed in a future release.

+

Addition of Stages to sat bootsys Command

+

The sat bootsys command now has multiple stages for both the boot and +shutdown actions. Please refer to the “System Power On Procedures” and “System +Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001) +for more details on using this command in the context of a full system power off +and power on.

+

Summary of SAT Changes in Shasta v1.3

+

Shasta v1.3 included version 2.2.3 of the sat python package and CLI.

+

This version of the sat CLI contained the following commands:

+
    +
  • auth
  • +
  • bootsys
  • +
  • cablecheck
  • +
  • diag
  • +
  • firmware
  • +
  • hwinv
  • +
  • hwmatch
  • +
  • k8s
  • +
  • linkhealth
  • +
  • sensors
  • +
  • setrev
  • +
  • showrev
  • +
  • status
  • +
  • swap
  • +
  • switch
  • +
+

See the System Admin Toolkit Command Overview +and the table of commands in the SAT Authentication section +of this document for more details on each of these commands.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-21/sitemap.xml b/en-21/sitemap.xml new file mode 100644 index 0000000000..46819937ce --- /dev/null +++ b/en-21/sitemap.xml @@ -0,0 +1,280 @@ + + + + /docs-sat/en-21/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-21/dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-21/install/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-21/introduction/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-21/dashboards/sat_grafana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-21/dashboards/sat_kibana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-21/release_notes/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-21/categories/ + + + + + + + + /docs-sat/en-21/tags/ + + + + + + + + diff --git a/en-21/tags/index.html b/en-21/tags/index.html new file mode 100644 index 0000000000..4a33de38d1 --- /dev/null +++ b/en-21/tags/index.html @@ -0,0 +1,560 @@ + + + + + + + + + + + + Tags :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + tag :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-21/tags/index.xml b/en-21/tags/index.xml new file mode 100644 index 0000000000..06178855b0 --- /dev/null +++ b/en-21/tags/index.xml @@ -0,0 +1,11 @@ + + + + Tags on System Admin Toolkit (SAT) + /docs-sat/en-21/tags/ + Recent content in Tags on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-21 + + + diff --git a/en-22/404.html b/en-22/404.html new file mode 100644 index 0000000000..a2e7c01d2e --- /dev/null +++ b/en-22/404.html @@ -0,0 +1,59 @@ + + + + + + + + + 404 Page not found + + + + + + + + + + + + + + + + + + +
+
+
+
+

+

+

+

+

+

+

Page not found!

+
+
+ +
+ + + diff --git a/en-22/categories/index.html b/en-22/categories/index.html new file mode 100644 index 0000000000..5ee4b4144d --- /dev/null +++ b/en-22/categories/index.html @@ -0,0 +1,579 @@ + + + + + + + + + + + + Categories :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + category :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-22/categories/index.xml b/en-22/categories/index.xml new file mode 100644 index 0000000000..70c743e115 --- /dev/null +++ b/en-22/categories/index.xml @@ -0,0 +1,11 @@ + + + + Categories on System Admin Toolkit (SAT) + /docs-sat/en-22/categories/ + Recent content in Categories on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-22 + + + diff --git a/en-22/dashboards/index.html b/en-22/dashboards/index.html new file mode 100644 index 0000000000..1008aa8c10 --- /dev/null +++ b/en-22/dashboards/index.html @@ -0,0 +1,561 @@ + + + + + + + + + + + + SAT Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Dashboards

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-22/dashboards/index.xml b/en-22/dashboards/index.xml new file mode 100644 index 0000000000..3990163936 --- /dev/null +++ b/en-22/dashboards/index.xml @@ -0,0 +1,26 @@ + + + + SAT Dashboards on System Admin Toolkit (SAT) + /docs-sat/en-22/dashboards/ + Recent content in SAT Dashboards on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-22 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Grafana Dashboards + /docs-sat/en-22/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-22/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.&lt;site-domain&gt; The value of site-domain can be obtained as follows: ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=&#39;{.data.customizations\.yaml}&#39; | \ base64 -d | grep &#34;external:&#34; That command will produce the following output, for example: + + + SAT Kibana Dashboards + /docs-sat/en-22/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-22/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + diff --git a/en-22/dashboards/sat_grafana_dashboards/index.html b/en-22/dashboards/sat_grafana_dashboards/index.html new file mode 100644 index 0000000000..f38b5b7d4c --- /dev/null +++ b/en-22/dashboards/sat_grafana_dashboards/index.html @@ -0,0 +1,666 @@ + + + + + + + + + + + + SAT Grafana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Grafana Dashboards

+

The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.

+

Grafana can be accessed via web browser at the following URL:

+
    +
  • https://sma-grafana.<site-domain>
  • +
+

The value of site-domain can be obtained as follows:

+
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+    base64 -d | grep "external:"
+

That command will produce the following output, for example:

+
    external: EXAMPLE_DOMAIN.com
+

This would result in the address for Grafana being https://sma-grafana.EXAMPLE_DOMAIN.com

+

For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the +SMA product documentation.

+

For more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry +Kafka Topics in the SMA product documentation.

+ +

There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Dashboard NameDisplay Type
Fabric CongestionChart Panels
Fabric RFC3635Chart Panels
Fabric ErrorsTabular Format
Fabric Port StateTabular Format
+

The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.

+

SAT Grafana Interval and Locations Options

+

Shows the Interval and Locations Options for the available telemetry.

+

+

The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.

+

For additional information, refer to Grafana Templates and Variables.

+

The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.

+

The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.

+

Grafana Fabric Congestion Dashboard

+

+

SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.

+

This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.

+

The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.

+

The port type of a link is reported as a port state “subtype” event when defined at port initialization.

+

Grafana Fabric Errors Dashboard

+

+

This dashboard reports error counters in a tabular format in three panels.

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.

+

Grafana Fabric Port State Dashboard

+

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.

+

The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.

+

Grafana Fabric RFC3635 Dashboard

+

+

For additional information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.

+

Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-22/dashboards/sat_kibana_dashboards/index.html b/en-22/dashboards/sat_kibana_dashboards/index.html new file mode 100644 index 0000000000..9523a6f4fa --- /dev/null +++ b/en-22/dashboards/sat_kibana_dashboards/index.html @@ -0,0 +1,855 @@ + + + + + + + + + + + + SAT Kibana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Kibana Dashboards

+

Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.

+

Kibana can be accessed via web browser at the following URL:

+
    +
  • https://sma-kibana.<site-domain>
  • +
+

The value of site-domain can be obtained as follows:

+
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+    base64 -d | grep "external:"
+

That command will produce the following output, for example:

+
    external: EXAMPLE_DOMAIN.com
+

This would result in the address for Kibana being https://sma-kibana.EXAMPLE_DOMAIN.com

+

For additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product +documentation.

+

Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this +table.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DashboardShort DescriptionLong DescriptionKibana Visualization and Search Name
sat-aerAER correctedCorrected Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-corrected Search: sat-aer-corrected
sat-aerAER fatalFatal Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-fatal Search: sat-aer-fatal
sat-atomATOM failuresApplication Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged.sat-atom-failed
sat-atomATOM admindownApplication Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch.sat-atom-admindown
sat-heartbeatHeartbeat loss eventsHeartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system.sat-heartbeat
sat-kernelKernel assertionsThe kernel software performs a failed assertion when some condition represents a serious fault. The node goes down.sat-kassertions
sat-kernelKernel panicsThe kernel panics when something is seriously wrong. The node goes down.sat-kernel-panic
sat-kernelLustre bugs (LBUGs)The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down.sat-lbug
sat-kernelCPU stallsCPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric.sat-cpu-stall
sat-kernelOut of memoryAn Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided.sat-oom
sat-mceMCEMachine Check Exceptions (MCE) are errors detected at the processor level.sat-mce
sat-rasdaemonrasdaemon errorsErrors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future.sat-rasdaemon-error
sat-rasdaemonrasdaemon messagesAll messages from the rasdaemon service on nodes.sat-rasdaemon
+

Disable Search Highlighting in Kibana Dashboard

+

By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.

+

The Kibana Dashboard should be open on your system.

+
    +
  1. +

    Navigate to Management

    +
  2. +
  3. +

    Navigate to Advanced Settings in the Kibana section, below the Elastic search section

    +
  4. +
  5. +

    Scroll down to the Discover section

    +
  6. +
  7. +

    Change Highlight results from on to off

    +
  8. +
  9. +

    Click Save to save changes

    +
  10. +
+

AER Kibana Dashboard

+

The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.

+

View the AER Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-aer dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.

    +
  8. +
+

ATOM Kibana Dashboard

+

The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health +checks and application test failures. Some test failures are of possible interest even though a node is not marked +admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide +clues if a node otherwise fails. They might also show application problems.

+

View the ATOM Kibana Dashboard

+

HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-atom dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View any nodes marked admindown and any ATOM test failures. These failures occur during health checks and +application test failures. Test failures marked admindown are important to note. View the matching log messages +in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, +results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

Heartbeat Kibana Dashboard

+

The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods +are responsible for monitoring nodes in the system for heartbeat loss.

+

View the Heartbeat Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-heartbeat dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for +monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.

    +
  8. +
+

Kernel Kibana Dashboard

+

The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.

+

View the Kernel Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-kernel dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.

    +
  8. +
+

MCE Kibana Dashboard

+

The MCE Dashboard displays CPU detected processor-level hardware errors.

+

View the MCE Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-mce dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

Rasdaemon Kibana Dashboard

+

The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon +service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including +PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages +presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one +for only messages of severity “emerg” or “err” and another for all messages from rasdaemon.

+

View the Rasdaemon Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-rasdaemon dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.

    +
  8. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-22/img/Fabric_PortState_Locations_UI.png b/en-22/img/Fabric_PortState_Locations_UI.png new file mode 100644 index 0000000000..704511ebce Binary files /dev/null and b/en-22/img/Fabric_PortState_Locations_UI.png differ diff --git a/en-22/img/Grafana_Fabric_Congestion.png b/en-22/img/Grafana_Fabric_Congestion.png new file mode 100644 index 0000000000..dbf481d94c Binary files /dev/null and b/en-22/img/Grafana_Fabric_Congestion.png differ diff --git a/en-22/img/Grafana_HSN_Errors.png b/en-22/img/Grafana_HSN_Errors.png new file mode 100644 index 0000000000..f43b7d02a6 Binary files /dev/null and b/en-22/img/Grafana_HSN_Errors.png differ diff --git a/en-22/img/Grafana_rfc3635.png b/en-22/img/Grafana_rfc3635.png new file mode 100644 index 0000000000..dff176c82d Binary files /dev/null and b/en-22/img/Grafana_rfc3635.png differ diff --git a/en-22/img/SAT_Grafana_Fabric_Vars.png b/en-22/img/SAT_Grafana_Fabric_Vars.png new file mode 100644 index 0000000000..194d75b124 Binary files /dev/null and b/en-22/img/SAT_Grafana_Fabric_Vars.png differ diff --git a/en-22/index.html b/en-22/index.html new file mode 100644 index 0000000000..8b0c2fb7f6 --- /dev/null +++ b/en-22/index.html @@ -0,0 +1,574 @@ + + + + + + + + + + + + HPE Cray EX System Admin Toolkit (SAT) Guide :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ + + + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-22/index.json b/en-22/index.json new file mode 100644 index 0000000000..9776932efe --- /dev/null +++ b/en-22/index.json @@ -0,0 +1,71 @@ +[ +{ + "uri": "/docs-sat/en-22/", + "title": "HPE Cray EX System Admin Toolkit (SAT) Guide", + "tags": [], + "description": "", + "content": "HPE Cray EX System Admin Toolkit (SAT) Guide Introduction to SAT About System Admin Toolkit (SAT) System Admin Toolkit Command Overview Command Prompt Conventions in SAT SAT Dependencies SAT Installation Install SAT Install the System Admin Toolkit Product Stream Perform NCN Personalization SAT Setup SAT Authentication Generate SAT S3 Credentials Run sat setrev to Set System Information SAT Post-Upgrade Remove obsolete configuration file sections SAT Logging SAT Uninstall and Downgrade Uninstall: Removing a Version of SAT Activate: Switching Between Versions SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards SAT Usage SAT Bootprep SAT Release Notes Summary of changes in SAT 2.2 Summary of SAT changes in Shasta v1.5 Summary of SAT Changes in Shasta v1.4.1 Summary of SAT Changes in Shasta v1.4 Summary of SAT Changes in Shasta v1.3.2 Summary of SAT Changes in Shasta v1.3 " +}, +{ + "uri": "/docs-sat/en-22/dashboards/", + "title": "SAT Dashboards", + "tags": [], + "description": "", + "content": "SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards " +}, +{ + "uri": "/docs-sat/en-22/install/", + "title": "SAT Installation", + "tags": [], + "description": "", + "content": "SAT Installation Install the System Admin Toolkit Product Stream Describes how to install the System Admin Toolkit (SAT) product stream.\nPrerequisites CSM is installed and verified. cray-product-catalog is running. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.2.x with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. To upgrade SAT, execute the pre-installation, installation, and post-installation procedures for a newer distribution. The newly installed version will become the default. Pre-Installation Procedure Start a typescript.\nThe typescript will record the commands and the output from this installation.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Installation Procedure Copy the release distribution gzipped tar file to ncn-m001.\nUnzip and extract the release distribution, 2.2.x.\nncn-m001# tar -xvzf sat-2.2.x.tar.gz Change directory to the extracted release distribution directory.\nncn-m001# cd sat-2.2.x Run the installer: install.sh.\nThe script produces a lot of output. A successful install ends with \u0026ldquo;SAT version 2.2.x has been installed\u0026rdquo;.\nncn-m001# ./install.sh ... ====\u0026gt; Updating active CFS configurations ... ====\u0026gt; SAT version 2.2.x has been installed. Upgrade only: Record the names of the CFS configuration or configurations modified by install.sh.\nThe install.sh script attempts to modify any CFS configurations that apply to the master management NCNs. During an upgrade, install.sh will log messages indicating the CFS configuration or configurations that were modified. For example, if there are three master nodes all using the same CFS configuration named \u0026ldquo;ncn-personalization\u0026rdquo;, the output would look like this:\n====\u0026gt; Updating active CFS configurations INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0 INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s1b0n0 INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s3b0n0 INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s5b0n0 INFO: Updating CFS configuration \u0026#34;ncn-personalization\u0026#34; INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml in configuration \u0026#34;ncn-personalization\u0026#34;. INFO: Key \u0026#34;name\u0026#34; in layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-ncn to sat-2.2.16 INFO: Successfully updated layers in configuration \u0026#34;ncn-personalization\u0026#34; Save the name of each CFS configuration updated by the installer. In the previous example, a single configuration named \u0026ldquo;ncn-personalization\u0026rdquo; was updated, so that name is saved to a temporary file.\nncn-m001# echo ncn-personalization \u0026gt;\u0026gt; /tmp/sat-ncn-cfs-configurations.txt Repeat the previous command for each CFS configuration that was updated.\nUpgrade only: Save the new name of the SAT CFS configuration layer.\nIn the example install.sh output above, the new layer name is sat-2.2.16. Save this value to a file to be used later.\nncn-m001# echo sat-2.2.16 \u0026gt; /tmp/sat-layer-name.txt Fresh install only: Save the CFS configuration layer for SAT to a file for later use.\nThe install.sh script attempts to modify any CFS configurations that apply to the master management NCNs. During a fresh install, no such CFS configurations will be found, and it will instead log the SAT configuration layer that must be added to the CFS configuration that will be created. Here is an example of the output in that case:\n====\u0026gt; Updating active CFS configurations INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0 WARNING: No CFS configurations found that apply to components with role Management and subrole Master. INFO: The following sat layer should be used in the CFS configuration that will be applied to NCNs with role Management and subrole Master. { \u0026#34;name\u0026#34;: \u0026#34;sat-2.2.15\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;9a74b8f5ba499af6fbcecfd2518a40e081312933\u0026#34;, \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;sat-ncn.yml\u0026#34; } Save the JSON output to a file for later use. For example:\nncn-m001# cat \u0026gt; /tmp/sat-layer.json \u0026lt;\u0026lt;EOF \u0026gt; { \u0026gt; \u0026#34;name\u0026#34;: \u0026#34;sat-2.2.15\u0026#34;, \u0026gt; \u0026#34;commit\u0026#34;: \u0026#34;9a74b8f5ba499af6fbcecfd2518a40e081312933\u0026#34;, \u0026gt; \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git\u0026#34;, \u0026gt; \u0026#34;playbook\u0026#34;: \u0026#34;sat-ncn.yml\u0026#34; \u0026gt; } \u0026gt; EOF Do not copy the previous command verbatim. Use the JSON output from the install.sh script.\nPost-Installation Procedure Optional: Remove the SAT release distribution tar file and extracted directory.\nncn-m001# rm sat-2.2.x.tar.gz ncn-m001# rm -rf sat-2.2.x/ Upgrade only: Ensure that the environment variable SAT_TAG is not set in the ~/.bashrc file on any of the management NCNs.\nNOTE: This step should only be required when updating from Shasta 1.4.1 or Shasta 1.4.2.\nThe following example assumes three manager NCNs: ncn-m001, ncn-m002, and ncn-m003, and shows output from a system in which no further action is needed.\nncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc ncn-m001: source \u0026lt;(kubectl completion bash) ncn-m003: source \u0026lt;(kubectl completion bash) ncn-m002: source \u0026lt;(kubectl completion bash) The following example shows that SAT_TAG is set in ~/.bashrc on ncn-m002. Remove that line from the ~/.bashrc file on ncn-m002.\nncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc ncn-m001: source \u0026lt;(kubectl completion bash) ncn-m002: source \u0026lt;(kubectl completion bash) ncn-m002: export SAT_TAG=3.5.0 ncn-m003: source \u0026lt;(kubectl completion bash) Stop the typescript.\nNOTE: This step can be skipped if you wish to use the same typescript for the remainder of the SAT install. See Next Steps.\nncn-m001# exit SAT version 2.2.x is now installed/upgraded, meaning the SAT 2.2.x release has been loaded into the system software repository.\nSAT configuration content for this release has been uploaded to VCS. SAT content for this release has been uploaded to the CSM product catalog. SAT content for this release has been uploaded to Nexus repositories. The sat command won\u0026rsquo;t be available until the NCN Personalization procedure has been executed. Next Steps If other HPE Cray EX software products are being installed or upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide to determine which step to execute next.\nIf no other HPE Cray EX software products are being installed or upgraded at this time, proceed to the sections listed below.\nNOTE: The NCN Personalization procedure is required when upgrading SAT. The setup procedures in SAT Setup, however, are not required when upgrading SAT. They should have been executed during the first installation of SAT.\nExecute the NCN Personalization procedure:\nPerform NCN Personalization If performing a fresh install, execute the SAT Setup procedures:\nSAT Authentication Generate SAT S3 Credentials Run Sat Setrev to Set System Information If performing an upgrade, execute the upgrade procedures:\nRemove obsolete configuration file sections SAT Logging Perform NCN Personalization Describes how to perform NCN personalization using CFS. This personalization process will configure the System Admin Toolkit (SAT) product stream.\nPrerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. If upgrading, the names of the CFS configurations updated during installation were saved to the file /tmp/sat-ncn-cfs-configurations.txt. If upgrading, the name of the new SAT CFS configuration layer was saved to the file /tmp/sat-layer-name.txt. If performing a fresh install, the SAT CFS configuration layer was saved to the file /tmp/sat-layer.json. Notes on the Procedure Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.2.x with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. If upgrading SAT, the existing configuration will likely include other Cray EX product entries. Update the SAT entry as described in this procedure. The HPE Cray EX System Software Getting Started Guide provides guidance on how and when to update the entries for the other products. Procedure Start a typescript if not already using one.\nThe typescript will capture the commands and the output from this installation procedure.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Fresh install only: Add the SAT layer to the NCN personalization JSON file.\nIf the SAT install script, install.sh, did not identify and modify the CFS configurations that apply to each master management NCN, it will have printed the SAT CFS configuration layer in JSON format. This layer must be added to the JSON file being used to construct the CFS configuration. For example, if the file being used is named ncn-personalization.json, and the SAT layer was saved to the file /tmp/sat-layer.json as described in the install instructions, the following jq command will append the SAT layer and save the result in a new file named ncn-personalization.json.\nncn-m001# jq -s \u0026#39;{layers: (.[0].layers + [.[1]])}\u0026#39; ncn-personalization.json \\ /tmp/sat-layer.json \u0026gt; ncn-personalization.new.json For instructions on how to create a CFS configuration from the previous file and how to apply it to the management NCNs, refer to \u0026ldquo;Perform NCN Personalization\u0026rdquo; in the HPE Cray System Management Documentation. After the CFS configuration has been created and applied, return to this procedure.\nUpgrade only: Invoke each CFS configuration that was updated during the upgrade.\nIf the SAT install script, install.sh, identified CFS configurations that apply to the master management NCNs and modified them in place, invoke each CFS configuration that was created or updated during installation.\nThis step will create a CFS session for each given configuration and install SAT on the associated manager NCNs.\nThe --configuration-limit option limits the configuration session to run only the SAT layer of the configuration.\nYou should see a representation of the CFS session in the output.\nncn-m001# for cfs_configuration in $(cat /tmp/sat-ncn-cfs-configurations.txt); do cray cfs sessions create --name \u0026#34;sat-session-${cfs_configuration}\u0026#34; --configuration-name \\ \u0026#34;${cfs_configuration}\u0026#34; --configuration-limit $(cat /tmp/sat-layer-name.txt); done name=\u0026#34;sat-session-ncn-personalization\u0026#34; [ansible] ... Upgrade only: Monitor the progress of each CFS session.\nThis step assumes a single session named sat-session-ncn-personalization was created in the previous step.\nFirst, list all containers associated with the CFS session:\nncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \\ -o json | jq \u0026#39;.items[0].spec.containers[] | .name\u0026#39; \u0026#34;inventory\u0026#34; \u0026#34;ansible-1\u0026#34; \u0026#34;istio-proxy\u0026#34; Next, get the logs for the ansible-1 container.\nNOTE: the trailing digit might differ from \u0026ldquo;1\u0026rdquo;. It is the zero-based index of the sat-ncn layer within the configuration\u0026rsquo;s layers.\nncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \\ --selector=cfsession=sat-session-ncn-personalization Ansible plays, which are run by the CFS session, will install SAT on all the manager NCNs on the system. Successful results for all of the manager NCN xnames can be found at the end of the container log. For example:\n... PLAY RECAP ********************************************************************* x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 Execute this step for each unique CFS configuration.\nNOTE: Ensure that the PLAY RECAPs for each session show successes for all manager NCNs before proceeding.\nVerify that SAT was successfully configured.\nIf sat is configured, the --version command will indicate which version is installed. If sat is not properly configured, the command will fail.\nNOTE: This version number will differ from the version number of the SAT release distribution. This is the semantic version of the sat Python package, which is different from the version number of the overall SAT release distribution.\nncn-m001# sat --version sat 3.7.0 NOTE: Upon first running sat, you may see additional output while the sat container image is downloaded. This will occur the first time sat is run on each manager NCN. For example, if you run sat for the first time on ncn-m001 and then for the first time on ncn-m002, you will see this additional output both times.\nTrying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037... Getting image source signatures Copying blob da64e8df3afc done Copying blob 0f36fd81d583 done Copying blob 12527cf455ba done ... sat 3.7.0 Stop the typescript.\nncn-m001# exit SAT version 2.2.x is now configured:\nThe SAT RPM package is installed on the associated NCNs. Next Steps If other HPE Cray EX software products are being installed or upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide to determine which step to execute next.\nIf no other HPE Cray EX software products are being installed or upgraded at this time, proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.\nIf performing a fresh install, execute the SAT Setup procedures:\nSAT Authentication Generate SAT S3 Credentials Run Sat Setrev to Set System Information If performing an upgrade, execute the SAT Post-Upgrade procedures:\nRemove obsolete configuration file sections SAT Logging SAT Authentication Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in later steps of the install process. The admin account used to authenticate with sat auth must be enabled in Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation. For additional information on SAT authentication, see System Security and Authentication in the CSM documentation.\nNOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nDescription of SAT Command Authentication Types Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes manager node where SAT commands are run.\nBelow is a table describing SAT commands and the types of authentication they require.\nSAT Subcommand Authentication/Credentials Required Man Page Description sat auth Responsible for authenticating to the API gateway and storing a token. sat-auth Authenticate to the API gateway and save the token. sat bmccreds Requires authentication to the API gateway. sat-bmccreds Set BMC passwords. sat bootprep Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is done on ncn-m001 during the install. sat-bootprep Prepare to boot nodes with images and configurations. sat bootsys Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. sat-bootsys Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. sat diag Requires authentication to the API gateway. sat-diag Launch diagnostics on the HSN switches and generate a report. sat firmware Requires authentication to the API gateway. sat-firmware Report firmware version. sat hwhist Requires authentication to the API gateway. sat-hwhist Report hardware component history. sat hwinv Requires authentication to the API gateway. sat-hwinv Give a listing of the hardware of the HPE Cray EX system. sat hwmatch Requires authentication to the API gateway. sat-hwmatch Report hardware mismatches. sat init None sat-init Create a default SAT configuration file. sat k8s Requires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install. sat-k8s Report on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node). sat linkhealth This command has been deprecated. sat nid2xname Requires authentication to the API gateway. sat-nid2xname Translate node IDs to node xnames. sat sensors Requires authentication to the API gateway. sat-sensors Report current sensor data. sat setrev Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-setrev Set HPE Cray EX system revision information. sat showrev Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-showrev Print revision information for the HPE Cray EX system. sat slscheck Requires authentication to the API gateway. sat-slscheck Perform a cross-check between SLS and HSM. sat status Requires authentication to the API gateway. sat-status Report node status across the HPE Cray EX system. sat swap Requires authentication to the API gateway. sat-swap Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. sat xname2nid Requires authentication to the API gateway. sat-xname2nid Translate node and node BMC xnames to node IDs. sat switch This command has been deprecated. It has been replaced by sat swap. In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password on the command line. The username value is obtained from the following locations, in order of higher precedence to lower precedence:\nThe --username global command-line option. The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml. The name of currently logged in user running the sat command. If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to ~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above, and will use the token for that username if it has been obtained and saved by sat auth.\nPrerequisites The sat CLI has been installed following Install The System Admin Toolkit Product Stream. Procedure The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:\nGenerate a default SAT configuration file, if one does not exist.\nncn-m001# sat init Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; generated. Note: If the config file already exists, it will print out an error:\nERROR: Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; already exists. Not generating configuration file. Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file. E.g.:\nusername = \u0026#34;crayadmin\u0026#34; Run sat auth. Enter your password when prompted. E.g.:\nncn-m001# sat auth Password for crayadmin: Succeeded! Other sat commands are now authenticated to make requests to the API gateway. E.g.:\nncn-m001# sat status Generate SAT S3 Credentials Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes master node where SAT commands are run.\nSAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev (see: Run Sat Setrev to Set System Information).\nNOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nPrerequisites The SAT CLI has been installed following Install The System Admin Toolkit Product Stream The SAT configuration file has been created (See SAT Authentication). CSM has been installed and verified. Procedure Ensure the files are readable only by root.\nncn-m001# touch /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key ncn-m001# chmod 600 /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key Write the credentials to local files using kubectl.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.access_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_access_key ncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.secret_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_secret_key Verify the S3 endpoint specified in the SAT configuration file is correct.\nGet the SAT configuration file\u0026rsquo;s endpoint value.\nNOTE: If the command\u0026rsquo;s output is commented out, indicated by an initial # character, the SAT configuration will take the default value – \u0026quot;https://rgw-vip.nmn\u0026quot;.\nncn-m001# grep endpoint ~/.config/sat/sat.toml # endpoint = \u0026#34;https://rgw-vip.nmn\u0026#34; Get the sat-s3-credentials secret\u0026rsquo;s endpoint value.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.s3_endpoint}\u0026#39; | base64 -d | xargs https://rgw-vip.nmn Compare the two endpoint values.\nIf the values differ, change the SAT configuration file\u0026rsquo;s endpoint value to match the secret\u0026rsquo;s.\nCopy SAT configurations to each manager node on the system.\nncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \\ mkdir -p /root/.config/sat; \\ scp -pr /root/.config/sat ${i}:/root/.config; done NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may be different. This example assumes three manager nodes, where the configuration files must be copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 and ncn-m003.\nRun sat setrev to Set System Information NOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nPrerequisites S3 credentials have been generated. See Generate SAT S3 Credentials. SAT authentication has been set up. See SAT Authentication. Procedure Run sat setrev to set System Revision Information. Follow the on-screen prompts to set the following site-specific values:\nSerial number System name System type System description Product number Company name Site name Country code System install date TIP: For \u0026ldquo;System type\u0026rdquo;, a system with any liquid-cooled components should be considered a liquid-cooled system. I.e., \u0026ldquo;System type\u0026rdquo; is EX-1C.\nncn-m001# sat setrev -------------------------------------------------------------------------------- Setting: Serial number Purpose: System identification. This will affect how snapshots are identified in the HPE backend services. Description: This is the top-level serial number which uniquely identifies the system. It can be requested from an HPE representative. Valid values: Alpha-numeric string, 4 - 20 characters. Type: \u0026lt;class \u0026#39;str\u0026#39;\u0026gt; Default: None Current value: None -------------------------------------------------------------------------------- Please do one of the following to set the value of the above setting: - Input a new value - Press CTRL-C to exit ... Run sat showrev to verify System Revision Information. The following tables contain example information.\nncn-m001# sat showrev ################################################################################ System Revision Information ################################################################################ +---------------------+---------------+ | component | data | +---------------------+---------------+ | Company name | HPE | | Country code | US | | Interconnect | Sling | | Product number | R4K98A | | Serial number | 12345 | | Site name | HPE | | Slurm version | slurm 20.02.5 | | System description | Test System | | System install date | 2021-01-29 | | System name | eniac | | System type | EX-1C | +---------------------+---------------+ ################################################################################ Product Revision Information ################################################################################ +--------------+-----------------+------------------------------+------------------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+------------------------------+------------------------------+ | csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... | | sat | 2.0.1 | - | - | | sdu | 1.0.8 | - | - | | slingshot | 0.8.0 | - | - | | sma | 1.4.12 | - | - | +--------------+-----------------+------------------------------+------------------------------+ ################################################################################ Local Host Operating System ################################################################################ +-----------+----------------------+ | component | version | +-----------+----------------------+ | Kernel | 5.3.18-24.15-default | | SLES | SLES 15-SP2 | +-----------+----------------------+ Remove obsolete configuration file sections Prerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. The Perform NCN Personalization procedure has been successfully completed. Procedure After upgrading SAT, if using the configuration file from a previous version, there may be configuration file sections no longer used in the new version. For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. In that case, the following warning may appear upon running sat commands.\nWARNING: Ignoring unknown section \u0026#39;redfish\u0026#39; in config file. Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.\n[redfish] username = \u0026#34;admin\u0026#34; password = \u0026#34;adminpass\u0026#34; Repeat this process for any configuration file sections for which there are \u0026ldquo;unknown section\u0026rdquo; warnings.\nSAT Logging As of SAT version 2.2, some command output that was previously printed to stdout is now logged to stderr. These messages are logged at the INFO level. The default logging threshold was changed from WARNING to INFO to accomodate this logging change. Additionally, some messages previously logged at the INFO are now logged at the DEBUG level.\nThese changes take effect automatically. However, if the default output threshold has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure that important output is shown in the terminal.\nUpdate Configuration In the following example, the stderr log level, logging.stderr_level, is set to WARNING, which will exclude INFO-level logging from terminal output.\nncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml [logging] ... stderr_level = \u0026#34;WARNING\u0026#34; To enable the new default behavior, comment this line out, delete it, or set the value to \u0026ldquo;INFO\u0026rdquo;.\nIf logging.stderr_level is commented out, its value will not affect logging behavior. However, it may be helpful set its value to INFO as a reminder of the new default behavior.\nAffected Commands The following commands trigger messages that have been changed from stdout print calls to INFO-level (or WARNING- or ERROR-level) log messages:\nsat bootsys --stage shutdown --stage session-checks sat sensors The following commands trigger messages that have been changed from INFO-level log messages to DEBUG-level log messages:\nsat nid2xname sat xname2nid sat swap Uninstall: Removing a Version of SAT Prerequisites Only versions 2.2 or newer of SAT can be uninstalled with prodmgr. Older versions must be uninstalled manually. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT.\nNOTE: It is not recommended to uninstall a version designated as \u0026ldquo;active\u0026rdquo;. If the active version is uninstalled, then the activate procedure must be executed on a remaining version.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+-------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+-------------------+-----------------------+ | sat | 2.3.3 | True | - | - | | sat | 2.2.10 | False | - | - | +--------------+-----------------+--------+-------------------+-----------------------+ Use prodmgr to uninstall a version of SAT.\nThis command will do three things:\nRemove all hosted-type package repositories associated with the given version of SAT. Group-type repositories are not removed. Remove all container images associated with the given version of SAT. Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up in the output of sat showrev. ncn-m001# prodmgr uninstall sat 2.2.10 Repository sat-2.2.10-sle-15sp2 has been removed. Removed Docker image cray/cray-sat:3.9.0 Removed Docker image cray/sat-cfs-install:1.0.2 Removed Docker image cray/sat-install-utility:1.4.0 Deleted sat-2.2.10 from product catalog. Activate: Switching Between Versions This procedure can be used to downgrade the active version of SAT.\nPrerequisites Only versions 2.2 or newer of SAT can be activated. Older versions must be activated manually. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+--------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+--------------------+-----------------------+ | sat | 2.3.3 | True | - | - | | sat | 2.2.10 | False | - | - | +--------------+-----------------+--------+--------------------+-----------------------+ Use prodmgr to activate a different version of SAT.\nThis command will do three things:\nFor all hosted-type package repositories associated with this version of SAT, set them as the sole member of their corresponding group-type repository. For example, activating SAT version 2.2.10 sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group. Set the version 2.2.10 as active within the product catalog, so that it appears active in the output of sat showrev. Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are associated with NCNs with the role \u0026ldquo;Management\u0026rdquo; and subrole \u0026ldquo;Master\u0026rdquo; (for example, the CFS configuration ncn-personalization). Specifically, it will ensure that the layer refers to the version of SAT CFS configuration content associated with the version of SAT being activated. ncn-m001# prodmgr activate sat 2.2.10 Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2. Set sat-2.2.10 as active in product catalog. Updated CFS configurations: [ncn-personalization] Verify that the chosen version is marked as active.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+--------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+--------------------+-----------------------+ | sat | 2.3.3 | False | - | - | | sat | 2.2.10 | True | - | - | +--------------+-----------------+--------+--------------------+-----------------------+ Run NCN Personalization.\nAt this point, the command has modified Nexus package repositories to set a particular package repository as active, but no packages on the NCNs have been changed. In order to complete the activation process, NCN Personalization must be executed to change the cray-sat-podman package version on the manager NCNs.\nNOTE: Refer to the command output from step 2 for the names of all CFS configurations that were updated, which may not necessarily be just ncn-personalization. If multiple configurations were updated in step 2, then a cray cfs sessions create command should be run for each of them. This example assumes a single configuration named ncn-personalization was updated. If multiple were updated, set cfs_configurations to a space-separated list below.\nncn-m001# cfs_configurations=\u0026#34;ncn-personalization\u0026#34; ncn-m001# for cfs_configuration in ${cfs_configurations} do cray cfs sessions create --name \u0026#34;sat-session-${cfs_configuration}\u0026#34; --configuration-name \\ \u0026#34;${cfs_configuration}\u0026#34; --configuration-limit sat-ncn; done Monitor the progress of each CFS session.\nThis step assumes a single session named sat-session-ncn-personalization was created in the previous step.\nFirst, list all containers associated with the CFS session:\nncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \\ -o json | jq \u0026#39;.items[0].spec.containers[] | .name\u0026#39; \u0026#34;inventory\u0026#34; \u0026#34;ansible-1\u0026#34; \u0026#34;istio-proxy\u0026#34; Next, get the logs for the ansible-1 container.\nNOTE: the trailing digit might differ from \u0026ldquo;1\u0026rdquo;. It is the zero-based index of the sat-ncn layer within the configuration\u0026rsquo;s layers.\nncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \\ --selector=cfsession=sat-session-ncn-personalization Ansible plays, which are run by the CFS session, will install SAT on all the manager NCNs on the system. Successful results for all of the manager NCN xnames can be found at the end of the container log. For example:\n... PLAY RECAP ********************************************************************* x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 Execute this step for each unique CFS configuration.\nNOTE: Ensure that the PLAY RECAPs for each session show successes for all manager NCNs before proceeding.\nVerify the new version of the SAT CLI.\nNOTE: This version number will differ from the version number of the SAT release distribution. This is the semantic version of the SAT Python package, which is different from the version number of the overall SAT release distribution.\nncn-m001# sat --version 3.9.0 " +}, +{ + "uri": "/docs-sat/en-22/introduction/", + "title": "Introduction to SAT", + "tags": [], + "description": "", + "content": "Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components.\nSAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.\nSix Kibana Dashboards are included with SAT. They provide organized output for system health information.\nAER Kibana Dashboard ATOM Kibana Dashboard Heartbeat Kibana Dashboard Kernel Kibana Dashboard MCE Kibana Dashboard Rasdaemon Kibana Dashboard Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and are reported through Redfish.\nGrafana Fabric Congestion Dashboard Grafana Fabric Errors Dashboard Grafana Fabric Port State Dashboard Grafana Fabric RFC3635 Dashboard SAT is installed as a separate product as part of the HPE Cray EX System base installation.\nSystem Admin Toolkit Command Overview Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides instruction on the SAT Container Environment.\nSAT Command Line Utility The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes (ncn-m nodes).\nIt is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are similarities between SAT commands and xt commands used on the Cray XC platform.\nSAT Commands The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each have their own set of options.\nSAT Container Environment The sat command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes manager nodes. A few important points about the SAT container environment include the following:\nUsing either sat or sat bash always launches a container. The SAT container does not have access to the NCN file system. There are two ways to run sat.\nInteractive: Launching a container using sat bash, followed by a sat command. Non-interactive: Running a sat command directly on a Kubernetes manager node. In both of these cases, a container is launched in the background to execute the command. The first option, running sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the container is launched, executes the command, and upon the command\u0026rsquo;s completion the container exits. The following two examples show the same action, checking the system status, using interactive and non-interactive modes.\nInteractive ncn-m001# sat bash (CONTAINER-ID)sat-container# sat status Non-interactive ncn-m001# sat status Interactive Advantages Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral container storage. If multiple sat commands are being run in succession, then use sat bash to launch the container beforehand. This will save time because the container does not need to be launched for each sat command.\nNon-interactive Advantages The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of several steps that need to be executed from a management NCN.\nMan Pages - Interactive and Non-interactive Modes To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following example.\nncn-m001# sat-man status A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed either with man sat or man sat-podman from the manager node.\nncn-m001# man sat ncn-m001# man sat-podman Command Prompt Conventions in SAT The host name in a command prompt indicates where the command must be run. The account that must run the command is also indicated in the prompt.\nThe root or super-user account always has the # character at the end of the prompt and has the host name of the host in the prompt. Any non-root account is indicated with account@hostname\u0026gt;. A user account that is neither root nor crayadm is referred to as user. The command prompt inside the SAT container environment is indicated with the string as follows. It also has the \u0026ldquo;#\u0026rdquo; character at the end of the prompt. Command Prompt Meaning ncn-m001# Run on one of the Kubernetes Manager servers. (Non-interactive) (CONTAINER_ID) sat-container# Run the command inside the SAT container environment by first running sat bash. (Interactive) Examples of the sat status command used by an administrator:\nncn-m001# sat status ncn-m001# sat bash (CONTAINER_ID) sat-container# sat status SAT Dependencies Most sat subcommands depend on services or components from other products in the HPE Cray EX (Shasta) software stack. The following list shows these dependencies for each subcommand. Each service or component is listed under the product it belongs to.\nsat auth CSM Keycloak sat bmccreds CSM System Configuration Service (SCSD) sat bootprep CSM Boot Orchestration Service (BOS) Configuration Framework Service (CFS) Image Management Service (IMS) Version Control Service (VCS) Kubernetes S3 sat bootsys CSM Boot Orchestration Service (BOS) Cray Advanced Platform Monitoring and Control (CAPMC) Ceph Compute Rolling Upgrade Service (CRUS) Etcd Firmware Action Service (FAS) Hardware State Manager (HSM) Kubernetes S3 COS Node Memory Dump (NMD) sat diag CSM Hardware State Manager (HSM) CSM-Diag Fox sat firmware CSM Firmware Action Service (FAS) sat hwhist CSM Hardware State Manager (HSM) sat hwinv CSM Hardware State Manager (HSM) sat hwmatch CSM Hardware State Manager (HSM) sat init None\nsat k8s CSM Kubernetes sat nid2xname CSM Hardware State Manager (HSM) sat sensors CSM Hardware State Manager (HSM) HM Collector SMA Telemetry API sat setrev CSM S3 sat showrev CSM Hardware State Manager (HSM) Kubernetes S3 sat slscheck CSM Hardware State Manager (HSM) Kubernetes S3 sat status CSM Hardware State Manager (HSM) sat swap Slingshot Fabric Manager sat switch Deprecated: See sat swap\nsat xname2nid CSM Hardware State Manager (HSM) " +}, +{ + "uri": "/docs-sat/en-22/dashboards/sat_grafana_dashboards/", + "title": "SAT Grafana Dashboards", + "tags": [], + "description": "", + "content": "SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity.\nGrafana can be accessed via web browser at the following URL:\nhttps://sma-grafana.\u0026lt;site-domain\u0026gt; The value of site-domain can be obtained as follows:\nncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=\u0026#39;{.data.customizations\\.yaml}\u0026#39; | \\ base64 -d | grep \u0026#34;external:\u0026#34; That command will produce the following output, for example:\nexternal: EXAMPLE_DOMAIN.com This would result in the address for Grafana being https://sma-grafana.EXAMPLE_DOMAIN.com\nFor additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the SMA product documentation.\nFor more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry Kafka Topics in the SMA product documentation.\nNavigate SAT Grafana Dashboards There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display telemetry in a tabular format.\nDashboard Name Display Type Fabric Congestion Chart Panels Fabric RFC3635 Chart Panels Fabric Errors Tabular Format Fabric Port State Tabular Format The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location during the time range selected, if any. The interval setting is not used for tabular dashboards.\nSAT Grafana Interval and Locations Options Shows the Interval and Locations Options for the available telemetry.\nThe value of the Interval option sets the time resolution of the received telemetry. This works a bit like a histogram, with the available telemetry in an interval of time going into a \u0026ldquo;bucket\u0026rdquo; and averaging out to a single point on the chart or table. The special value auto will choose an interval based on the time range selected.\nFor additional information, refer to Grafana Templates and Variables.\nThe Locations option allows restriction of the telemetry shown by locations, either individual links or all links in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, which always has entries for all links and switches, although the errors shown are restricted to the selected time range.\nThe chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart\u0026rsquo;s legend or the trace on the chart.\nGrafana Fabric Congestion Dashboard SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in the system and assess the past and present health of the high-speed network. It also allows the ability to drill down to view data for specific ports on specific switches.\nThis dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, local, and global and correspond to the link\u0026rsquo;s relationship to the network topology. The locations presented in the panels are restricted to the values (any combination, defaults to \u0026ldquo;all\u0026rdquo;) selected.\nThe metric values for links of a given port type are similar in value to each other but very distinct from the values of other types. If the values for different port types are all plotted together, the values for links with lower values are indistinguishable from zero when plotted.\nThe port type of a link is reported as a port state \u0026ldquo;subtype\u0026rdquo; event when defined at port initialization.\nGrafana Fabric Errors Dashboard This dashboard reports error counters in a tabular format in three panels.\nThere is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nUnlike other dashboards, the locations presented are all locations in the system rather than having telemetry within the time range selected. However, the values are taken from telemetry within the time range.\nGrafana Fabric Port State Dashboard There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nThe Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours results in all states for all links in the system being shown.\nThe three columns named, group, switch, and port are not port state events, but extra information included with all port state events.\nGrafana Fabric RFC3635 Dashboard For additional information on performance counters, refer to Definitions of Managed Objects for the Ethernet-like Interface Types, an Internet standards document.\nBecause these metrics are counters that only increase over time, the values plotted are the change in the counter\u0026rsquo;s value over the interval setting.\n" +}, +{ + "uri": "/docs-sat/en-22/dashboards/sat_kibana_dashboards/", + "title": "SAT Kibana Dashboards", + "tags": [], + "description": "", + "content": "SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information.\nKibana can be accessed via web browser at the following URL:\nhttps://sma-kibana.\u0026lt;site-domain\u0026gt; The value of site-domain can be obtained as follows:\nncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=\u0026#39;{.data.customizations\\.yaml}\u0026#39; | \\ base64 -d | grep \u0026#34;external:\u0026#34; That command will produce the following output, for example:\nexternal: EXAMPLE_DOMAIN.com This would result in the address for Kibana being https://sma-kibana.EXAMPLE_DOMAIN.com\nFor additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product documentation.\nAdditional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this table.\nDashboard Short Description Long Description Kibana Visualization and Search Name sat-aer AER corrected Corrected Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-corrected Search: sat-aer-corrected sat-aer AER fatal Fatal Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-fatal Search: sat-aer-fatal sat-atom ATOM failures Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. sat-atom-failed sat-atom ATOM admindown Application Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch. sat-atom-admindown sat-heartbeat Heartbeat loss events Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. sat-heartbeat sat-kernel Kernel assertions The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. sat-kassertions sat-kernel Kernel panics The kernel panics when something is seriously wrong. The node goes down. sat-kernel-panic sat-kernel Lustre bugs (LBUGs) The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. sat-lbug sat-kernel CPU stalls CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. sat-cpu-stall sat-kernel Out of memory An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. sat-oom sat-mce MCE Machine Check Exceptions (MCE) are errors detected at the processor level. sat-mce sat-rasdaemon rasdaemon errors Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. sat-rasdaemon-error sat-rasdaemon rasdaemon messages All messages from the rasdaemon service on nodes. sat-rasdaemon Disable Search Highlighting in Kibana Dashboard By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.\nThe Kibana Dashboard should be open on your system.\nNavigate to Management\nNavigate to Advanced Settings in the Kibana section, below the Elastic search section\nScroll down to the Discover section\nChange Highlight results from on to off\nClick Save to save changes\nAER Kibana Dashboard The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors are split up into separate visualizations depending on whether they are fatal or corrected errors.\nView the AER Kibana Dashboard Go to the dashboard section.\nSelect sat-aer dashboard.\nChoose the time range of interest.\nView the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nATOM Kibana Dashboard The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health checks and application test failures. Some test failures are of possible interest even though a node is not marked admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide clues if a node otherwise fails. They might also show application problems.\nView the ATOM Kibana Dashboard HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.\nGo to the dashboard section.\nSelect sat-atom dashboard.\nChoose the time range of interest.\nView any nodes marked admindown and any ATOM test failures. These failures occur during health checks and application test failures. Test failures marked admindown are important to note. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nHeartbeat Kibana Dashboard The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss.\nView the Heartbeat Kibana Dashboard Go to the dashboard section.\nSelect sat-heartbeat dashboard.\nChoose the time range of interest.\nView the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.\nKernel Kibana Dashboard The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using too much memory.\nView the Kernel Kibana Dashboard Go to the dashboard section.\nSelect sat-kernel dashboard.\nChoose the time range of interest.\nView the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nMCE Kibana Dashboard The MCE Dashboard displays CPU detected processor-level hardware errors.\nView the MCE Kibana Dashboard Go to the dashboard section.\nSelect sat-mce dashboard.\nChoose the time range of interest.\nView the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nRasdaemon Kibana Dashboard The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one for only messages of severity \u0026ldquo;emerg\u0026rdquo; or \u0026ldquo;err\u0026rdquo; and another for all messages from rasdaemon.\nView the Rasdaemon Kibana Dashboard Go to the dashboard section.\nSelect sat-rasdaemon dashboard.\nChoose the time range of interest.\nView the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\n" +}, +{ + "uri": "/docs-sat/en-22/release_notes/", + "title": "SAT Release Notes", + "tags": [], + "description": "", + "content": "SAT Release Notes Summary of changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022.\nThis version of the SAT product included:\nVersion 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components:\nVersion 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release.\nKnown issues in SAT 2.2 sat command unavailable in sat bash shell After launching a shell within the SAT container with sat bash, the sat command will not be found. For example:\n(CONTAINER-ID) sat-container:~ # sat status bash: sat: command not found This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be prepended to the $PATH environment variable:\n(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH (CONTAINER-ID) sat-container:~ # sat status Or, the file /sat/venv/bin/activate may be sourced:\n(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate (CONTAINER-ID) sat-container:~ # sat status Tab completion unavailable in sat bash shell After launching a shell within the SAT container with sat bash, tab completion for sat commands does not work.\nThis can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash:\nsource /etc/bash_completion.d/sat-completion.bash OCI runtime permission error when running sat in root directory sat commands will not work if the current directory is /. For example:\nncn-m001:/ # sat --help Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error To resolve, run sat in another directory.\nDuplicate mount error when running sat in config directory sat commands will not work if the current directory is ~/.config/sat. For example:\nncn-m001:~/.config/sat # sat --help Error: /root/.config/sat: duplicate mount destination To resolve, run sat in another directory.\nNew sat commands sat bootprep automates the creation of CFS configurations, the build and customization of IMS images, and the creation of BOS session templates. See SAT Bootprep for details. sat slscheck performs a check for consistency between the System Layout Service (SLS) and the Hardware State Manager (HSM). sat bmccreds provides a simple interface for interacting with the System Configuration Service (SCSD) to set BMC Redfish credentials. sat hwhist displays hardware component history by xname (location) or by its Field-Replaceable Unit ID (FRUID). This command queries the Hardware State Manager (HSM) API to obtain this information. Since the sat hwhist command supports querying for the history of a component by its FRUID, the FRUID of components has been added to the output of sat hwinv. Additional Install Automation The following automation has been added to the install script, install.sh:\nWait for the completion of the sat-config-import Kubernetes job, which is started when the sat-cfs-install Helm chart is deployed. Automate the modification of the CFS configuration, which applies to master management NCNs (e.g. \u0026ldquo;ncn-personalization\u0026rdquo;). Changes to Product Catalog Data Schema The SAT product uploads additional information to the cray-product-catalog Kubernetes ConfigMap detailing the components it provides, including container (Docker) images, Helm charts, RPMs, and package repositories.\nThis information is used to support uninstall and activation of SAT product versions moving forward.\nSupport for Uninstall and Activation of SAT Versions Beginning with the 2.2 release, SAT now provides partial support for the uninstall and activation of the SAT product stream.\nSee Uninstall: Removing a Version of SAT and Activate: Switching Between Versions for details.\nImprovements to sat status A Subrole column has been added to the output of sat status. This allows you to easily differentiate between master, worker, and storage nodes in the management role, for example.\nHostname information from SLS has been added to sat status output.\nAdded Support for JSON Output Support for JSON-formatted output has been added to commands which currently support the --format option, such as hwinv, status, and showrev.\nUsability Improvements Many usability improvements have been made to multiple sat commands, mostly related to filtering command output. The following are some highlights:\nAdded --fields option to display only specific fields for subcommands which display tabular reports. Added ability to filter on exact matches of a field name. Improved handling of multiple matches of a field name in --filter queries so that the first match is used, similar to --sort-by. Added support for --filter, --fields, and --reverse for summaries displayed by sat hwinv. Added borders to summary tables generated by sat hwinv. Improved documentation in the man pages. Default Log Level Changed The default log level for stderr has been changed from \u0026ldquo;WARNING\u0026rdquo; to \u0026ldquo;INFO\u0026rdquo;. For details, see SAT Logging.\nMore Granular Log Level Configuration Options With the command-line options --loglevel-stderr and --loglevel-file, the log level can now be configured separately for stderr and the log file.\nThe existing --loglevel option is now an alias for the --loglevel-stderr option.\nPodman Wrapper Script Improvements The Podman wrapper script is the script installed at /usr/bin/sat on the master management NCNs by the cray-sat-podman RPM that runs the cray-sat container in podman. The following subsections detail improvements that were made to the wrapper script in this release.\nMounting of $HOME and Current Directories in cray-sat Container The Podman wrapper script that launches the cray-sat container with podman has been modified to mount the user\u0026rsquo;s current directory and home directory into the cray-sat container to provide access to local files in the container.\nPodman Wrapper Script Documentation Improvements The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:\nEnvironment variables that affect execution of the wrapper script Host files and directories mounted in the container Fixes to Podman Wrapper Script Output Redirection Fixed issues with redirecting stdout and stderr, and piping output to commands, such as awk, less, and more.\nConfigurable HTTP Timeout A new sat option has been added to configure the HTTP timeout length for requests to the API gateway. See sat-man sat for details.\nsat bootsys Improvements Many improvements and fixes have been made to sat bootsys. The following are some highlights:\nAdded the --excluded-ncns option, which can be used to omit NCNs from the platform-services and ncn-power stages in case they are inaccessible. Disruptive shutdown stages in sat bootsys shutdown now prompt the user to continue before proceeding. A new option, --disruptive, will bypass this. Improvements to Ceph service health checks and restart during the platform-services stage of sat bootsys boot. sat xname2nid Improvements sat xname2nid can now recursively expand slot, chassis, and cabinet xnames to a list of nids in those locations.\nA new --format option has been added to sat xname2nid. It sets the output format to either \u0026ldquo;range\u0026rdquo; (the default) or \u0026ldquo;nid\u0026rdquo;. The \u0026ldquo;range\u0026rdquo; format displays nids in a compressed range format suitable for use with a workload manager like Slurm.\nUsage of v2 HSM API The commands which interact with HSM (e.g., sat status and sat hwinv) now use the v2 HSM API.\nsat diag Limited to HSN Switches sat diag will now only operate against HSN switches by default. These are the only controllers that support running diagnostics with HMJTD.\nsat showrev Enhancements A column has been added to the output of sat showrev that indicates whether a product version is \u0026ldquo;active\u0026rdquo;. The definition of \u0026ldquo;active\u0026rdquo; varies across products, and not all products may set an \u0026ldquo;active\u0026rdquo; version.\nFor SAT, the active version is the one with its hosted-type package repository in Nexus set as the member of the group-type package repository in Nexus, meaning that it will be used when installing the cray-sat-podman RPM.\ncray-sat Container Image Size Reduction The size of the cray-sat container image has been approximately cut in half by leveraging multi-stage builds. This also improved the repeatability of the unit tests by running them in the container.\nBug Fixes Minor bug fixes were made in cray-sat and in cray-sat-podman. For full change lists, see each repository\u0026rsquo;s CHANGELOG.md file.\nSummary of SAT changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5.\nThis version of the SAT product included:\nVersion 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component:\nVersion 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release.\nInstall Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. The cray-sat-podman RPM is no longer installed in the management non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on all master management NCNs via an Ansible playbook which is referenced by a layer of the CFS configuration that applies to management NCNs. This CFS configuration is typically named \u0026ldquo;ncn-personalization\u0026rdquo;.\nThe SAT product now includes a Docker image and a Helm chart named sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart with Loftsman. This helm chart deploys a Kubernetes job that imports the SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. This repository is referenced by the layer added to the NCN personalization CFS configuration.\nRemoval of Direct Redfish Access All commands which used to access Redfish directly have either been removed or modified to use higher-level service APIs. This includes the following commands:\nsat sensors sat diag sat linkhealth The sat sensors command has been rewritten to use the SMA telemetry API to obtain the latest sensor values. The command\u0026rsquo;s usage has changed slightly, but legacy options work as before, so it is backwards compatible. Additionally, new commands have been added.\nThe sat diag command has been rewritten to use a new service called Fox, which is delivered with the CSM-diags product. The sat diag command now launches diagnostics using the Fox service, which launches the corresponding diagnostic executables on controllers using the Hardware Management Job and Task Daemon (HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start diagnostics over Redfish.\nThe sat linkhealth command has been removed. Its functionality has been replaced by functionality from the Slingshot Topology Tool (STT) in the fabric manager pod.\nThe Redfish username and password command line options and config file options have been removed. For further instructions, see Remove Obsolete Configuration File Sections.\nAdditional Fields in sat setrev and sat showrev sat setrev now collects the following information from the admin, which is then displayed by sat showrev:\nSystem description Product number Company name Country code Additional guidance and validation has been added to each field collected by sat setrev. This sets the stage for sdu setup to stop collecting this information and instead collect it from sat showrev or its S3 bucket.\nImprovements to sat bootsys The platform-services stage of the sat bootsys boot command has been improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph health in the correct order. The ceph-check stage has been removed as it is no longer needed.\nThe platform-services stage of sat bootsys boot now prompts for confirmation of the storage NCN hostnames in addition to the Kubernetes masters and workers.\nBug Fixes and Security Fixes Improved error handling in sat firmware. Incremented version of Alpine Linux to 3.13.2 to address a security vulnerability. Other Notable Changes Ansible has been removed from the cray-sat container image. Support for the Firmware Update Service (FUS) has been removed from the sat firmware command. Summary of SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1.\nThis version of the SAT product included:\nVersion 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release.\nNew Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames:\nsat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API.\nBug Fixes Fixed a problem in sat swap where creating the offline port policy failed. Changed sat bootsys shutdown --stage bos-operations to no longer forcefully power off all compute nodes and application nodes using CAPMC when BOS sessions complete or time out. Fixed an issue with the command sat bootsys boot --stage cabinet-power. Summary of SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4.\nThis version of the SAT product included the following components:\nVersion 3.4.0 of the sat python package and CLI It also added the following new component:\nVersion 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release.\nSAT as an Independent Product SAT is now packaged and released as an independent product. The product deliverable is called a \u0026ldquo;release distribution\u0026rdquo;. The release distribution is a gzipped tar file containing an install script. This install script loads the cray/cray-sat container image into the Docker registry in Nexus and loads the cray-sat-podman RPM into a package repository in Nexus.\nIn this release, the cray-sat-podman package is still installed in the master and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in Shasta v1.5.\nSAT Running in a Container Under Podman The sat command now runs in a container under Podman. The sat executable is now installed on all nodes in the Kubernetes management cluster (i.e., workers and masters). This executable is a wrapper script that starts a SAT container in Podman and invokes the sat Python CLI within that container. The admin can run individual sat commands directly on the master or worker NCNs as before, or they can run sat commands inside the SAT container after using sat bash to enter an interactive shell inside the SAT container.\nTo view man pages for sat commands, the user can run sat-man SAT_COMMAND, replacing SAT_COMMAND with the name of the sat command. Alternatively, the user can enter the sat container with sat bash and use the man command.\nNew sat init Command and Config File Location Change The default location of the SAT config file has been changed from /etc/sat.toml to ~/.config/sat/sat.toml. A new command, sat init, has been added that initializes a configuration file in the new default directory. This better supports individual users on the system who want their own config files.\n~/.config/sat is mounted into the container that runs under Podman, so changes are persistent across invocations of the sat container. If desired, an alternate configuration directory can be specified with the SAT_CONFIG_DIR environment variable.\nAdditionally, if a config file does not yet exist when a user runs a sat command, one is generated automatically.\nAdditional Types Added to sat hwinv Additional functionality has been added to sat hwinv including:\nList node enclosure power supplies with the --list-node-enclosure-power-supplies option. List node accelerators (e.g., GPUs) with the --list-node-accels option. The count of node accelerators is also included for each node. List node accelerator risers (e.g., Redstone modules) with the --list-node-accel-risers option. The count of node accelerator risers is also included for each node. List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics option. The count of HSN NICs is also included for each node. Documentation for these new options has been added to the man page for sat hwinv.\nSite Information Stored by sat setrev in S3 The sat setrev and sat showrev commands now use S3 to store and obtain site information, including system name, site name, serial number, install date, and system type. Since the information is stored in S3, it will now be consistent regardless of the node on which sat is executed.\nAs a result of this change, S3 credentials must be configured for SAT. For detailed instructions, see Generate SAT S3 Credentials.\nProduct Version Information Shown by sat showrev sat showrev now shows product information from the cray-product-catalog ConfigMap in Kubernetes.\nAdditional Changes to sat showrev The output from sat showrev has also been changed in the following ways:\nThe --docker and --packages options were considered misleading and have been removed. Information pertaining to only to the local host, where the command is run, has been moved to the output of the --local option. Removal of sat cablecheck The sat cablecheck command has been removed. To verify that the system\u0026rsquo;s Slingshot network is cabled correctly, admins should now use the show cables command in the Slingshot Topology Tool (STT).\nsat swap Command Compatibility with Next-gen Fabric Controller The sat swap command was added in Shasta v1.3.2. This command used the Fabric Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the Fabric Controller API, so this command has been rewritten to use the new backwards-incompatible API. Usage of the command did not change.\nsat bootsys Functionality Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken by changes introduced in Shasta v1.4, which removed the Ansible inventory and playbooks.\nThe functionality in the platform-services stage of sat bootsys has been re-implemented to use python directly instead of Ansible. This resulted in a more robust procedure with better logging to the sat log file. Failures to stop containers on Kubernetes nodes are handled more gracefully, and more information about the containers that failed to stop, including how to debug the problem, is included.\nImprovements were made to console logging setup for non-compute nodes (NCNs) when they are shut down and booted.\nThe following improvements were made to the bos-operations stage of sat bootsys:\nMore information about the BOS sessions, BOA jobs, and BOA pods is printed. A command-line option, --bos-templates, and a corresponding config-file option, bos_templates, were added, and the --cle-bos-template and --uan-bos-template options and their corresponding config file options were deprecated. The following functionality has been removed from sat bootsys:\nThe hsn-bringup stage of sat bootsys boot has been removed due to removal of the underlying Ansible playbook. The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is now a manual procedure. Log File Location Change The location of the sat log file has changed from /var/log/cray/sat.log to /var/log/cray/sat/sat.log. This change simplifies mounting this file into the sat container running under Podman.\nSummary of SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.\nThe following sections detail the changes in this release.\nsat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable.\nThe sat swap switch command is equivalent to sat switch. The sat switch command will be removed in a future release.\nAddition of Stages to sat bootsys Command The sat bootsys command now has multiple stages for both the boot and shutdown actions. Please refer to the \u0026ldquo;System Power On Procedures\u0026rdquo; and \u0026ldquo;System Power Off Procedures\u0026rdquo; sections of the Cray Shasta Administration Guide (S-8001) for more details on using this command in the context of a full system power off and power on.\nSummary of SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI.\nThis version of the sat CLI contained the following commands:\nauth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch See the System Admin Toolkit Command Overview and the table of commands in the SAT Authentication section of this document for more details on each of these commands.\n" +}, +{ + "uri": "/docs-sat/en-22/usage/", + "title": "SAT Usage", + "tags": [], + "description": "", + "content": "SAT Usage SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates based on a given input file which defines how those configurations, images, and session templates should be created.\nThis automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similarly to other SAT commands.\nncn-m001# sat-man sat-bootprep SAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session.\nsat bootsys automates several portions of the boot and shutdown processes, including (but not limited to) performing BOS operations (such as creating BOS sessions), powering on and off cabinets, and checking the state of the system prior to shutdown.\nEditing a bootprep input file The input file provided to sat bootprep is a YAML-formatted file containing information which CFS, IMS, and BOS use to create configurations, images, and BOS session templates respectively. Writing and modifying these input files is the main task associated with using sat bootprep. An input file is composed of three main sections, one each for configurations, images, and session templates. These sections may be specified in any order, and any of the sections may be omitted if desired.\nCreating CFS configurations The configurations section begins with a configurations: key.\n--- configurations: Under this key, the user can list one or more configurations to create. For each configuration, a name should be given, in addition to the list of layers which comprise the configuration. Each layer can be defined by a product name and optionally a version number, or commit hash or branch in the product\u0026rsquo;s configuration repository. Alternatively, a layer can be defined by a Git repository URL directly, along with an associated branch or commit hash.\nWhen a configuration layer is specified in terms of a product name, the layer is created in CFS by looking up relevant configuration information (including the configuration repository and commit information) from the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be supplied, but if it is absent, the version is assumed to be the latest version found in the cray-product-catalog.\n--- configurations: - name: example-configuration layers: - name: example product playbook: example.yml product: name: example version: 1.2.3 Alternatively, a configuration layer may be specified by explicitly referencing the desired configuration repository, along with the branch containing the intended version of the Ansible playbooks. A commit hash may be specified by replacing branch with commit.\n... - name: another example product playbook: another-example.yml git: url: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34; branch: main ... When sat bootprep is run against an input file, a CFS configuration will be created corresponding to each configuration in the configurations section. For example, the configuration created from an input file with the layers listed above might look something like the following:\n{ \u0026#34;lastUpdated\u0026#34;: \u0026#34;2022-02-07T21:47:49Z\u0026#34;, \u0026#34;layers\u0026#34;: [ { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;example.yml\u0026#34; }, { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;another example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;another-example.yml\u0026#34; } ], \u0026#34;name\u0026#34;: \u0026#34;example-configuration\u0026#34; } Creating IMS images After specifying configurations, the user may add images to the input file which are to be built by IMS. To add an images section, the user should add an images key.\n--- configurations: ... (omitted for brevity) images: Under the images key, the user may define one or more images to be created in a list. Each element of the list defines a separate IMS image to be built and/or configured. Images must contain a name, as well as an ims section containing a definition of the image to be built and/or configured. Images may be defined by an image recipe, or by a pre-built image. Recipes and pre-built images are referred to by their names or IDs in IMS. The ims section should also contain an is_recipe property, which indicates whether the name or ID refers to an image recipe or a pre-built image. Images may also optionally provide a text description of the image. This description is not stored or used by sat bootprep or any CSM services, but is useful for documenting images in the input file.\n--- configurations: ... (omitted for brevity) images: - name: example-compute-image description: \u0026gt; An example compute node image for illustrative purposes. ims: name: example-compute-image-recipe is_recipe: true - name: another-example-compute-image description: \u0026gt; Another example compute node image. ims: id: \u0026lt;IMS image UUID\u0026gt; is_recipe: false Images may also contain a configuration property in their definition, which specifies a configuration with which to customize the built image prior to booting. If a configuration is specified, then configuration groups must also be specified using the configuration_group_names property.\n--- configurations: ... (omitted for brevity) images: - name: example-compute-image description: \u0026gt; An example compute node image for illustrative purposes. ims: name: example-compute-image-recipe is_recipe: true configuration: example configuration configuration_group_names: - Compute Creating BOS session templates BOS session templates are the final section of the input file, and are defined under the session_templates key.\n--- configurations: ... (omitted for brevity) images: ... (omitted for brevity) session_templates: Each session template is defined in terms of its name, an image, a configuration, and a set of parameters which can be used to configure the session. The name, image, and configuration are specified with their respective name, image, and configuration keys. bos_parameters may also be specified; currently, the only setting under bos_parameters that is supported is boot_sets, which can be used to define boot sets in the BOS session template. Each boot set is defined under its own property under boot_sets, and the value of each boot set can contain the following properties, all of which are optional:\nkernel_parameters: the parameters passed to the kernel on the command line network: the network over which the nodes will boot node_list: nodes to add to the boot set node_roles_groups: HSM roles to add to the boot set node_groups: HSM groups to add to the boot set rootfs_provider: the root file system provider rootfs_provider_passthrough: parameters to add to the rootfs= kernel parameter The properties listed previously are the same as the parameters that can be specified directly through BOS boot sets. More information can be found in the CSM documentation on session templates. Additional properties not listed are passed through to the BOS session template as written.\nAn example session template might look like the following:\nconfigurations: ... (omitted for brevity) images: ... (omitted for brevity) session_templates: - name: example-session-template image: example-image configuration: example-configuration bos_parameters: boot_sets: example_boot_set: kernel_parameters: ip=dhcp quiet node_list: [] rootfs_provider: cpss3 rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0 Example bootprep input files Putting together all of the previous input file sections, an example bootprep input file might look something like the following.\n--- configurations: - name: cos-config layers: - name: cos-integration-2.2.87 playbook: site.yml product: name: cos version: 2.2.87 branch: integration - name: cpe-integration-21.12.3 playbook: pe_deploy.yml product: name: cpe version: 21.12.3 branch: integration - name: slurm-master-1.1.1 playbook: site.yml product: name: slurm version: 1.1.1 branch: master images: - name: cray-shasta-compute-sles15sp3.x86_64-2.2.35 ims: is_recipe: true name: cray-shasta-compute-sles15sp3.x86_64-2.2.35 configuration: cos-config configuration_group_names: - Compute session_templates: - name: cray-shasta-compute-sles15sp3.x86_64-2.2.35 image: cray-shasta-compute-sles15sp3.x86_64-2.2.35 configuration: cos-config bos_parameters: boot_sets: compute: kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN} node_roles_groups: - Compute Creating a pre-populated example bootprep input file It is possible to create an example bootprep input file using values from the system\u0026rsquo;s product catalog using the sat bootprep generate-example command.\nncn-m001# sat bootprep generate-example INFO: Using latest version (2.3.24-20220113160653) of product cos INFO: Using latest version (21.11.4) of product cpe INFO: Using latest version (1.0.7) of product slurm INFO: Using latest version (1.1.24) of product analytics INFO: Using latest version (2.1.5) of product uan INFO: Using latest version (21.11.4) of product cpe INFO: Using latest version (1.0.7) of product slurm INFO: Using latest version (1.1.24) of product analytics INFO: Using latest version (2.3.24-20220113160653) of product cos INFO: Using latest version (2.1.5) of product uan INFO: Wrote example bootprep input file to ./example-bootprep-input.yaml. This file should be reviewed and edited to match the desired parameters of the configurations, images, and session templates.\nViewing built-in generated documentation The contents of the YAML input files described above must conform to a schema which defines the structure of the data. The schema definition is written using the JSON Schema format. (Although the format is named \u0026ldquo;JSON Schema\u0026rdquo;, the schema itself is written in YAML as well.) More information, including introductory materials and a formal specification of the JSON Schema metaschema, can be found on the JSON Schema website.\nViewing the exact schema specification To view the exact schema specification, run sat bootprep view-schema.\nncn-m001# sat bootprep view-schema --- $schema: \u0026#34;https://json-schema.org/draft-07/schema\u0026#34; title: Bootprep Input File description: \u0026gt; A description of the set of CFS configurations to create, the set of IMS images to create and optionally customize with the defined CFS configurations, and the set of BOS session templates to create that reference the defined images and configurations. type: object additionalProperties: false properties: ... Generating user-friendly documentation The raw schema definition can be difficult to understand without experience working with JSON Schema specifications. For this reason, a feature was included which can generate user-friendly HTML documentation for the input file schema which can be browsed with the user\u0026rsquo;s preferred web browser.\nCreate a documentation tarball using sat bootprep.\nncn-m001# sat bootprep generate-docs INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz An alternate output directory can be specified with the --output-dir option. The generated tarball is always named bootprep-schema-docs.tar.gz.\nncn-m001# sat bootprep generate-docs --output-dir /tmp INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz From another machine, copy the tarball to a local directory.\nanother-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz . Extract the contents of the tarball and open the contained index.html.\nanother-machine$ tar xzvf bootprep-schema-docs.tar.gz x bootprep-schema-docs/ x bootprep-schema-docs/index.html x bootprep-schema-docs/schema_doc.css x bootprep-schema-docs/schema_doc.min.js another-machine$ open bootprep-schema-docs/index.html " +}, +{ + "uri": "/docs-sat/en-22/categories/", + "title": "Categories", + "tags": [], + "description": "", + "content": "" +}, +{ + "uri": "/docs-sat/en-22/tags/", + "title": "Tags", + "tags": [], + "description": "", + "content": "" +}] \ No newline at end of file diff --git a/en-22/index.xml b/en-22/index.xml new file mode 100644 index 0000000000..652e1f455b --- /dev/null +++ b/en-22/index.xml @@ -0,0 +1,54 @@ + + + + HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + /docs-sat/en-22/ + Recent content in HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-22 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Installation + /docs-sat/en-22/install/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-22/install/ + SAT Installation Install the System Admin Toolkit Product Stream Describes how to install the System Admin Toolkit (SAT) product stream. Prerequisites CSM is installed and verified. cray-product-catalog is running. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.2.x with the version of the SAT product stream being installed. + + + Introduction to SAT + /docs-sat/en-22/introduction/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-22/introduction/ + Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview. + + + SAT Grafana Dashboards + /docs-sat/en-22/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-22/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.&lt;site-domain&gt; The value of site-domain can be obtained as follows: ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=&#39;{.data.customizations\.yaml}&#39; | \ base64 -d | grep &#34;external:&#34; That command will produce the following output, for example: + + + SAT Kibana Dashboards + /docs-sat/en-22/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-22/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + SAT Release Notes + /docs-sat/en-22/release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-22/release_notes/ + SAT Release Notes Summary of changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022. This version of the SAT product included: Version 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components: Version 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release. + + + SAT Usage + /docs-sat/en-22/usage/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-22/usage/ + SAT Usage SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates based on a given input file which defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similarly to other SAT commands. ncn-m001# sat-man sat-bootprep SAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session. + + + diff --git a/en-22/install/index.html b/en-22/install/index.html new file mode 100644 index 0000000000..3ebd1f5820 --- /dev/null +++ b/en-22/install/index.html @@ -0,0 +1,1505 @@ + + + + + + + + + + + + SAT Installation :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Installation

+

Install the System Admin Toolkit Product Stream

+

Describes how to install the System Admin Toolkit (SAT) product stream.

+

Prerequisites

+
    +
  • CSM is installed and verified.
  • +
  • cray-product-catalog is running.
  • +
  • There must be at least 2 gigabytes of free space on the manager NCN on which the +procedure is run.
  • +
+

Notes on the Procedures

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace 2.2.x with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
  • To upgrade SAT, execute the pre-installation, installation, and post-installation +procedures for a newer distribution. The newly installed version will become +the default.
  • +
+

Pre-Installation Procedure

+
    +
  1. +

    Start a typescript.

    +

    The typescript will record the commands and the output from this installation.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
+

Installation Procedure

+
    +
  1. +

    Copy the release distribution gzipped tar file to ncn-m001.

    +
  2. +
  3. +

    Unzip and extract the release distribution, 2.2.x.

    +
    ncn-m001# tar -xvzf sat-2.2.x.tar.gz
    +
  4. +
  5. +

    Change directory to the extracted release distribution directory.

    +
    ncn-m001# cd sat-2.2.x
    +
  6. +
  7. +

    Run the installer: install.sh.

    +

    The script produces a lot of output. A successful install ends with “SAT +version 2.2.x has been installed”.

    +
    ncn-m001# ./install.sh
    +...
    +====> Updating active CFS configurations
    +...
    +====> SAT version 2.2.x has been installed.
    +
  8. +
  9. +

    Upgrade only: Record the names of the CFS configuration or +configurations modified by install.sh.

    +

    The install.sh script attempts to modify any CFS configurations that apply +to the master management NCNs. During an upgrade, install.sh will log +messages indicating the CFS configuration or configurations that were +modified. For example, if there are three master nodes all using the same +CFS configuration named “ncn-personalization”, the output would look like +this:

    +
    ====> Updating active CFS configurations
    +INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0
    +INFO: Found configuration "ncn-personalization" for component x3000c0s1b0n0
    +INFO: Found configuration "ncn-personalization" for component x3000c0s3b0n0
    +INFO: Found configuration "ncn-personalization" for component x3000c0s5b0n0
    +INFO: Updating CFS configuration "ncn-personalization"
    +INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml in configuration "ncn-personalization".
    +INFO: Key "name" in layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-ncn to sat-2.2.16
    +INFO: Successfully updated layers in configuration "ncn-personalization"
    +

    Save the name of each CFS configuration updated by the installer. In the +previous example, a single configuration named “ncn-personalization” was +updated, so that name is saved to a temporary file.

    +
    ncn-m001# echo ncn-personalization >> /tmp/sat-ncn-cfs-configurations.txt
    +

    Repeat the previous command for each CFS configuration that was updated.

    +
  10. +
  11. +

    Upgrade only: Save the new name of the SAT CFS configuration layer.

    +

    In the example install.sh output above, the new layer name is +sat-2.2.16. Save this value to a file to be used later.

    +
    ncn-m001# echo sat-2.2.16 > /tmp/sat-layer-name.txt
    +
  12. +
  13. +

    Fresh install only: Save the CFS configuration layer for SAT to a file +for later use.

    +

    The install.sh script attempts to modify any CFS configurations that apply +to the master management NCNs. During a fresh install, no such CFS +configurations will be found, and it will instead log the SAT configuration +layer that must be added to the CFS configuration that will be created. Here +is an example of the output in that case:

    +
    ====> Updating active CFS configurations
    +INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0
    +WARNING: No CFS configurations found that apply to components with role Management and subrole Master.
    +INFO: The following sat layer should be used in the CFS configuration that will be applied to NCNs with role Management and subrole Master.
    +{
    +    "name": "sat-2.2.15",
    +    "commit": "9a74b8f5ba499af6fbcecfd2518a40e081312933",
    +    "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
    +    "playbook": "sat-ncn.yml"
    +}
    +

    Save the JSON output to a file for later use. For example:

    +
    ncn-m001# cat > /tmp/sat-layer.json <<EOF
    +> {
    +>     "name": "sat-2.2.15",
    +>     "commit": "9a74b8f5ba499af6fbcecfd2518a40e081312933",
    +>     "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
    +>     "playbook": "sat-ncn.yml"
    +> }
    +> EOF
    +

    Do not copy the previous command verbatim. Use the JSON output from the +install.sh script.

    +
  14. +
+

Post-Installation Procedure

+
    +
  1. +

    Optional: Remove the SAT release distribution tar file and extracted directory.

    +
    ncn-m001# rm sat-2.2.x.tar.gz
    +ncn-m001# rm -rf sat-2.2.x/
    +
  2. +
  3. +

    Upgrade only: Ensure that the environment variable SAT_TAG is not set +in the ~/.bashrc file on any of the management NCNs.

    +

    NOTE: This step should only be required when updating from +Shasta 1.4.1 or Shasta 1.4.2.

    +

    The following example assumes three manager NCNs: ncn-m001, ncn-m002, and ncn-m003, +and shows output from a system in which no further action is needed.

    +
    ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
    +ncn-m001: source <(kubectl completion bash)
    +ncn-m003: source <(kubectl completion bash)
    +ncn-m002: source <(kubectl completion bash)
    +

    The following example shows that SAT_TAG is set in ~/.bashrc on ncn-m002. +Remove that line from the ~/.bashrc file on ncn-m002.

    +
    ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
    +ncn-m001: source <(kubectl completion bash)
    +ncn-m002: source <(kubectl completion bash)
    +ncn-m002: export SAT_TAG=3.5.0
    +ncn-m003: source <(kubectl completion bash)
    +
  4. +
  5. +

    Stop the typescript.

    +

    NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT install. See Next Steps.

    +
    ncn-m001# exit
    +
  6. +
+

SAT version 2.2.x is now installed/upgraded, meaning the SAT 2.2.x release +has been loaded into the system software repository.

+
    +
  • SAT configuration content for this release has been uploaded to VCS.
  • +
  • SAT content for this release has been uploaded to the CSM product catalog.
  • +
  • SAT content for this release has been uploaded to Nexus repositories.
  • +
  • The sat command won’t be available until the NCN Personalization +procedure has been executed.
  • +
+

Next Steps

+

If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.

+

If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the sections listed below.

+

NOTE: The NCN Personalization procedure is required when +upgrading SAT. The setup procedures in SAT Setup, however, are +not required when upgrading SAT. They should have been executed +during the first installation of SAT.

+

Execute the NCN Personalization procedure:

+ +

If performing a fresh install, execute the SAT Setup procedures:

+ +

If performing an upgrade, execute the upgrade procedures:

+ +

Perform NCN Personalization

+

Describes how to perform NCN personalization using CFS. This personalization process +will configure the System Admin Toolkit (SAT) product stream.

+

Prerequisites

+
    +
  • The Install the System Admin Toolkit Product Stream +procedure has been successfully completed.
  • +
  • If upgrading, the names of the CFS configurations updated during installation +were saved to the file /tmp/sat-ncn-cfs-configurations.txt.
  • +
  • If upgrading, the name of the new SAT CFS configuration layer was saved to +the file /tmp/sat-layer-name.txt.
  • +
  • If performing a fresh install, the SAT CFS configuration layer was saved to +the file /tmp/sat-layer.json.
  • +
+

Notes on the Procedure

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace 2.2.x with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
  • If upgrading SAT, the existing configuration will likely include other Cray EX product +entries. Update the SAT entry as described in this procedure. The HPE Cray EX System +Software Getting Started Guide provides guidance on how and when to update the +entries for the other products.
  • +
+

Procedure

+
    +
  1. +

    Start a typescript if not already using one.

    +

    The typescript will capture the commands and the output from this installation procedure.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
  3. +

    Fresh install only: Add the SAT layer to the NCN personalization JSON file.

    +

    If the SAT install script, install.sh, did not identify and modify the CFS +configurations that apply to each master management NCN, it will have printed +the SAT CFS configuration layer in JSON format. This layer must be added to +the JSON file being used to construct the CFS configuration. For example, +if the file being used is named ncn-personalization.json, and the SAT +layer was saved to the file /tmp/sat-layer.json as described in the +install instructions, the following jq command will append the SAT layer +and save the result in a new file named ncn-personalization.json.

    +
    ncn-m001# jq -s '{layers: (.[0].layers + [.[1]])}' ncn-personalization.json \
    +    /tmp/sat-layer.json > ncn-personalization.new.json
    +

    For instructions on how to create a CFS configuration from the previous +file and how to apply it to the management NCNs, refer to “Perform NCN +Personalization” in the HPE Cray System Management Documentation. After +the CFS configuration has been created and applied, return to this +procedure.

    +
  4. +
  5. +

    Upgrade only: Invoke each CFS configuration that was updated during the +upgrade.

    +

    If the SAT install script, install.sh, identified CFS configurations that +apply to the master management NCNs and modified them in place, invoke each +CFS configuration that was created or updated during installation.

    +

    This step will create a CFS session for each given configuration and install +SAT on the associated manager NCNs.

    +

    The --configuration-limit option limits the configuration session to run +only the SAT layer of the configuration.

    +

    You should see a representation of the CFS session in the output.

    +
    ncn-m001# for cfs_configuration in $(cat /tmp/sat-ncn-cfs-configurations.txt);
    +do cray cfs sessions create --name "sat-session-${cfs_configuration}" --configuration-name \
    +    "${cfs_configuration}" --configuration-limit $(cat /tmp/sat-layer-name.txt);
    +done
    +
    +name="sat-session-ncn-personalization"
    +
    +[ansible]
    +...
    +
  6. +
  7. +

    Upgrade only: Monitor the progress of each CFS session.

    +

    This step assumes a single session named sat-session-ncn-personalization was created in the previous step.

    +

    First, list all containers associated with the CFS session:

    +
    ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \
    +    -o json | jq '.items[0].spec.containers[] | .name'
    +"inventory"
    +"ansible-1"
    +"istio-proxy"
    +

    Next, get the logs for the ansible-1 container.

    +

    NOTE: the trailing digit might differ from “1”. It is the zero-based +index of the sat-ncn layer within the configuration’s layers.

    +
    ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
    +    --selector=cfsession=sat-session-ncn-personalization
    +

    Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:

    +
    ...
    +PLAY RECAP *********************************************************************
    +x3000c0s1b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s3b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s5b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +

    Execute this step for each unique CFS configuration.

    +

    NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.

    +
  8. +
  9. +

    Verify that SAT was successfully configured.

    +

    If sat is configured, the --version command will indicate which version +is installed. If sat is not properly configured, the command will fail.

    +

    NOTE: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the sat Python package, +which is different from the version number of the overall SAT release distribution.

    +
    ncn-m001# sat --version
    +sat 3.7.0
    +

    NOTE: Upon first running sat, you may see additional output while the sat +container image is downloaded. This will occur the first time sat is run on +each manager NCN. For example, if you run sat for the first time on ncn-m001 +and then for the first time on ncn-m002, you will see this additional output +both times.

    +
    Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
    +Getting image source signatures
    +Copying blob da64e8df3afc done
    +Copying blob 0f36fd81d583 done
    +Copying blob 12527cf455ba done
    +...
    +sat 3.7.0
    +
  10. +
  11. +

    Stop the typescript.

    +
    ncn-m001# exit
    +
  12. +
+

SAT version 2.2.x is now configured:

+
    +
  • The SAT RPM package is installed on the associated NCNs.
  • +
+

Next Steps

+

If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.

+

If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.

+

If performing a fresh install, execute the SAT Setup procedures:

+ +

If performing an upgrade, execute the SAT Post-Upgrade procedures:

+ +

SAT Authentication

+

Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in +later steps of the install process. The admin account used to authenticate with sat auth must be enabled in +Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see +Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation. +For additional information on SAT authentication, see System Security and Authentication in the CSM +documentation.

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Description of SAT Command Authentication Types

+

Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to +the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to +be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket, +the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be +done on every Kubernetes manager node where SAT commands are run.

+

Below is a table describing SAT commands and the types of authentication they require.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SAT SubcommandAuthentication/Credentials RequiredMan PageDescription
sat authResponsible for authenticating to the API gateway and storing a token.sat-authAuthenticate to the API gateway and save the token.
sat bmccredsRequires authentication to the API gateway.sat-bmccredsSet BMC passwords.
sat bootprepRequires authentication to the API gateway. Requires kubernetes configuration and authentication, which is done on ncn-m001 during the install.sat-bootprepPrepare to boot nodes with images and configurations.
sat bootsysRequires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages.sat-bootsysBoot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software.
sat diagRequires authentication to the API gateway.sat-diagLaunch diagnostics on the HSN switches and generate a report.
sat firmwareRequires authentication to the API gateway.sat-firmwareReport firmware version.
sat hwhistRequires authentication to the API gateway.sat-hwhistReport hardware component history.
sat hwinvRequires authentication to the API gateway.sat-hwinvGive a listing of the hardware of the HPE Cray EX system.
sat hwmatchRequires authentication to the API gateway.sat-hwmatchReport hardware mismatches.
sat initNonesat-initCreate a default SAT configuration file.
sat k8sRequires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install.sat-k8sReport on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node).
sat linkhealthThis command has been deprecated.
sat nid2xnameRequires authentication to the API gateway.sat-nid2xnameTranslate node IDs to node xnames.
sat sensorsRequires authentication to the API gateway.sat-sensorsReport current sensor data.
sat setrevRequires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-setrevSet HPE Cray EX system revision information.
sat showrevRequires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-showrevPrint revision information for the HPE Cray EX system.
sat slscheckRequires authentication to the API gateway.sat-slscheckPerform a cross-check between SLS and HSM.
sat statusRequires authentication to the API gateway.sat-statusReport node status across the HPE Cray EX system.
sat swapRequires authentication to the API gateway.sat-swapPrepare HSN switch or cable for replacement and bring HSN switch or cable into service.
sat xname2nidRequires authentication to the API gateway.sat-xname2nidTranslate node and node BMC xnames to node IDs.
sat switchThis command has been deprecated. It has been replaced by sat swap.
+

In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password +on the command line. The username value is obtained from the following locations, in order of higher precedence to lower +precedence:

+
    +
  • The --username global command-line option.
  • +
  • The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml.
  • +
  • The name of currently logged in user running the sat command.
  • +
+

If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to +~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above, +and will use the token for that username if it has been obtained and saved by sat auth.

+

Prerequisites

+ +

Procedure

+

The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:

+
    +
  1. +

    Generate a default SAT configuration file, if one does not exist.

    +
    ncn-m001# sat init
    +Configuration file "/root/.config/sat/sat.toml" generated.
    +

    Note: If the config file already exists, it will print out an error:

    +
    ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
    +Not generating configuration file.
    +
  2. +
  3. +

    Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file. E.g.:

    +
    username = "crayadmin"
    +
  4. +
  5. +

    Run sat auth. Enter your password when prompted. E.g.:

    +
    ncn-m001# sat auth
    +Password for crayadmin:
    +Succeeded!
    +
  6. +
  7. +

    Other sat commands are now authenticated to make requests to the API gateway. E.g.:

    +
    ncn-m001# sat status
    +
  8. +
+

Generate SAT S3 Credentials

+

Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT +S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. +This must be done on every Kubernetes master node where SAT commands are run.

+

SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev +(see: Run Sat Setrev to Set System Information).

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Ensure the files are readable only by root.

    +
    ncn-m001# touch /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
    ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
  2. +
  3. +

    Write the credentials to local files using kubectl.

    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.access_key}' | base64 -d > \
    +    /root/.config/sat/s3_access_key
    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.secret_key}' | base64 -d > \
    +    /root/.config/sat/s3_secret_key
    +
  4. +
  5. +

    Verify the S3 endpoint specified in the SAT configuration file is correct.

    +
      +
    1. +

      Get the SAT configuration file’s endpoint value.

      +

      NOTE: If the command’s output is commented out, indicated by an initial # +character, the SAT configuration will take the default value – "https://rgw-vip.nmn".

      +
      ncn-m001# grep endpoint ~/.config/sat/sat.toml
      +# endpoint = "https://rgw-vip.nmn"
      +
    2. +
    3. +

      Get the sat-s3-credentials secret’s endpoint value.

      +
      ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
      +    jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
      +https://rgw-vip.nmn
      +
    4. +
    5. +

      Compare the two endpoint values.

      +

      If the values differ, change the SAT configuration file’s endpoint value to match the secret’s.

      +
    6. +
    +
  6. +
  7. +

    Copy SAT configurations to each manager node on the system.

    +
    ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
    +    mkdir -p /root/.config/sat; \
    +    scp -pr /root/.config/sat ${i}:/root/.config; done
    +

    NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may +be different. This example assumes three manager nodes, where the configuration files must be +copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 +and ncn-m003.

    +
  8. +
+

Run sat setrev to Set System Information

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Run sat setrev to set System Revision Information. Follow the on-screen prompts to set +the following site-specific values:

    +
      +
    • Serial number
    • +
    • System name
    • +
    • System type
    • +
    • System description
    • +
    • Product number
    • +
    • Company name
    • +
    • Site name
    • +
    • Country code
    • +
    • System install date
    • +
    +

    TIP: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. I.e., “System type” is EX-1C.

    +
    ncn-m001# sat setrev
    +--------------------------------------------------------------------------------
    +Setting:        Serial number
    +Purpose:        System identification. This will affect how snapshots are
    +                identified in the HPE backend services.
    +Description:    This is the top-level serial number which uniquely identifies
    +                the system. It can be requested from an HPE representative.
    +Valid values:   Alpha-numeric string, 4 - 20 characters.
    +Type:           <class 'str'>
    +Default:        None
    +Current value:  None
    +--------------------------------------------------------------------------------
    +Please do one of the following to set the value of the above setting:
    +    - Input a new value
    +    - Press CTRL-C to exit
    +...
    +
  2. +
  3. +

    Run sat showrev to verify System Revision Information. The following tables contain example information.

    +
    ncn-m001# sat showrev
    +################################################################################
    +System Revision Information
    +################################################################################
    ++---------------------+---------------+
    +| component           | data          |
    ++---------------------+---------------+
    +| Company name        | HPE           |
    +| Country code        | US            |
    +| Interconnect        | Sling         |
    +| Product number      | R4K98A        |
    +| Serial number       | 12345         |
    +| Site name           | HPE           |
    +| Slurm version       | slurm 20.02.5 |
    +| System description  | Test System   |
    +| System install date | 2021-01-29    |
    +| System name         | eniac         |
    +| System type         | EX-1C         |
    ++---------------------+---------------+
    +################################################################################
    +Product Revision Information
    +################################################################################
    ++--------------+-----------------+------------------------------+------------------------------+
    +| product_name | product_version | images                       | image_recipes                |
    ++--------------+-----------------+------------------------------+------------------------------+
    +| csm          | 0.8.14          | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
    +| sat          | 2.0.1           | -                            | -                            |
    +| sdu          | 1.0.8           | -                            | -                            |
    +| slingshot    | 0.8.0           | -                            | -                            |
    +| sma          | 1.4.12          | -                            | -                            |
    ++--------------+-----------------+------------------------------+------------------------------+
    +################################################################################
    +Local Host Operating System
    +################################################################################
    ++-----------+----------------------+
    +| component | version              |
    ++-----------+----------------------+
    +| Kernel    | 5.3.18-24.15-default |
    +| SLES      | SLES 15-SP2          |
    ++-----------+----------------------+
    +
  4. +
+

Remove obsolete configuration file sections

+

Prerequisites

+ +

Procedure

+

After upgrading SAT, if using the configuration file from a previous version, there may be +configuration file sections no longer used in the new version. For example, when upgrading +from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. +In that case, the following warning may appear upon running sat commands.

+
WARNING: Ignoring unknown section 'redfish' in config file.
+

Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.

+
[redfish]
+username = "admin"
+password = "adminpass"
+

Repeat this process for any configuration file sections for which there are “unknown section” warnings.

+

SAT Logging

+

As of SAT version 2.2, some command output that was previously printed to stdout +is now logged to stderr. These messages are logged at the INFO level. The +default logging threshold was changed from WARNING to INFO to accomodate +this logging change. Additionally, some messages previously logged at the INFO +are now logged at the DEBUG level.

+

These changes take effect automatically. However, if the default output threshold +has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure +that important output is shown in the terminal.

+

Update Configuration

+

In the following example, the stderr log level, logging.stderr_level, is set to +WARNING, which will exclude INFO-level logging from terminal output.

+
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+

To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.

+

If logging.stderr_level is commented out, its value will not affect logging +behavior. However, it may be helpful set its value to INFO as a reminder of +the new default behavior.

+

Affected Commands

+

The following commands trigger messages that have been changed from stdout +print calls to INFO-level (or WARNING- or ERROR-level) log messages:

+
sat bootsys --stage shutdown --stage session-checks
+sat sensors
+

The following commands trigger messages that have been changed from INFO-level +log messages to DEBUG-level log messages:

+
sat nid2xname
+sat xname2nid
+sat swap
+

Uninstall: Removing a Version of SAT

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be uninstalled with prodmgr. Older versions must be uninstalled manually.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    Use sat showrev to list versions of SAT.

    +

    NOTE: It is not recommended to uninstall a version designated as “active”. +If the active version is uninstalled, then the activate procedure must be executed +on a remaining version.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +| product_name | product_version | active | images            | image_recipes         |
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +| sat          | 2.3.3           | True   | -                 | -                     |
    +| sat          | 2.2.10          | False  | -                 | -                     |
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +
  2. +
  3. +

    Use prodmgr to uninstall a version of SAT.

    +

    This command will do three things:

    +
      +
    • Remove all hosted-type package repositories associated with the given version of SAT. Group-type +repositories are not removed.
    • +
    • Remove all container images associated with the given version of SAT.
    • +
    • Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up +in the output of sat showrev.
    • +
    +
    ncn-m001# prodmgr uninstall sat 2.2.10
    +Repository sat-2.2.10-sle-15sp2 has been removed.
    +Removed Docker image cray/cray-sat:3.9.0
    +Removed Docker image cray/sat-cfs-install:1.0.2
    +Removed Docker image cray/sat-install-utility:1.4.0
    +Deleted sat-2.2.10 from product catalog.
    +
  4. +
+

Activate: Switching Between Versions

+

This procedure can be used to downgrade the active version of SAT.

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be activated. Older versions must be activated manually.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    Use sat showrev to list versions of SAT.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| product_name | product_version | active | images             | image_recipes         |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| sat          | 2.3.3           | True   | -                  | -                     |
    +| sat          | 2.2.10          | False  | -                  | -                     |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +
  2. +
  3. +

    Use prodmgr to activate a different version of SAT.

    +

    This command will do three things:

    +
      +
    • For all hosted-type package repositories associated with this version of SAT, set them as the sole member +of their corresponding group-type repository. For example, activating SAT version 2.2.10 +sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group.
    • +
    • Set the version 2.2.10 as active within the product catalog, so that it appears active in the output of +sat showrev.
    • +
    • Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are +associated with NCNs with the role “Management” and subrole “Master” (for example, the CFS configuration +ncn-personalization). Specifically, it will ensure that the layer refers to the version of SAT CFS +configuration content associated with the version of SAT being activated.
    • +
    +
    ncn-m001# prodmgr activate sat 2.2.10
    +Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2.
    +Set sat-2.2.10 as active in product catalog.
    +Updated CFS configurations: [ncn-personalization]
    +
  4. +
  5. +

    Verify that the chosen version is marked as active.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| product_name | product_version | active | images             | image_recipes         |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| sat          | 2.3.3           | False  | -                  | -                     |
    +| sat          | 2.2.10          | True   | -                  | -                     |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +
  6. +
  7. +

    Run NCN Personalization.

    +

    At this point, the command has modified Nexus package repositories to set a particular package repository +as active, but no packages on the NCNs have been changed. In order to complete the activation process, +NCN Personalization must be executed to change the cray-sat-podman package version on the manager NCNs.

    +

    NOTE: Refer to the command output from step 2 for the names of all CFS configurations that were updated, +which may not necessarily be just ncn-personalization. If multiple configurations were updated in step 2, then +a cray cfs sessions create command should be run for each of them. This example assumes a single configuration +named ncn-personalization was updated. If multiple were updated, set cfs_configurations to a space-separated +list below.

    +
    ncn-m001# cfs_configurations="ncn-personalization"
    +ncn-m001# for cfs_configuration in ${cfs_configurations}
    +do cray cfs sessions create --name "sat-session-${cfs_configuration}" --configuration-name \
    +    "${cfs_configuration}" --configuration-limit sat-ncn;
    +done
    +
  8. +
  9. +

    Monitor the progress of each CFS session.

    +

    This step assumes a single session named sat-session-ncn-personalization was created in the previous step.

    +

    First, list all containers associated with the CFS session:

    +
    ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \
    +    -o json | jq '.items[0].spec.containers[] | .name'
    +"inventory"
    +"ansible-1"
    +"istio-proxy"
    +

    Next, get the logs for the ansible-1 container.

    +

    NOTE: the trailing digit might differ from “1”. It is the zero-based +index of the sat-ncn layer within the configuration’s layers.

    +
    ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
    +    --selector=cfsession=sat-session-ncn-personalization
    +

    Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:

    +
    ...
    +PLAY RECAP *********************************************************************
    +x3000c0s1b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s3b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s5b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +

    Execute this step for each unique CFS configuration.

    +

    NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.

    +
  10. +
  11. +

    Verify the new version of the SAT CLI.

    +

    NOTE: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the SAT Python package, +which is different from the version number of the overall SAT release distribution.

    +
    ncn-m001# sat --version
    +3.9.0
    +
  12. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-22/introduction/index.html b/en-22/introduction/index.html new file mode 100644 index 0000000000..48b0df9f74 --- /dev/null +++ b/en-22/introduction/index.html @@ -0,0 +1,823 @@ + + + + + + + + + + + + Introduction to SAT :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Introduction to SAT

+

About System Admin Toolkit (SAT)

+

The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.

+

SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.

+

Six Kibana Dashboards are included with SAT. They provide organized output for system health information.

+ +

Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.

+ +

SAT is installed as a separate product as part of the HPE Cray EX System base installation.

+

System Admin Toolkit Command Overview

+

Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.

+

SAT Command Line Utility

+

The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes +(ncn-m nodes).

+

It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.

+

SAT Commands

+

The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.

+

SAT Container Environment

+

The sat command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes +manager nodes. A few important points about the SAT container environment include the following:

+
    +
  • Using either sat or sat bash always launches a container.
  • +
  • The SAT container does not have access to the NCN file system.
  • +
+

There are two ways to run sat.

+
    +
  • Interactive: Launching a container using sat bash, followed by a sat command.
  • +
  • Non-interactive: Running a sat command directly on a Kubernetes manager node.
  • +
+

In both of these cases, a container is launched in the background to execute the command. The first option, running +sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the +container is launched, executes the command, and upon the command’s completion the container exits. The following two +examples show the same action, checking the system status, using interactive and non-interactive modes.

+

Interactive

+
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+

Non-interactive

+
ncn-m001# sat status
+

Interactive Advantages

+

Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral +container storage. If multiple sat commands are being run in succession, then use sat bash to launch the +container beforehand. This will save time because the container does not need to be launched for each sat command.

+

Non-interactive Advantages

+

The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of +several steps that need to be executed from a management NCN.

+

Man Pages - Interactive and Non-interactive Modes

+

To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following +example.

+
ncn-m001# sat-man status
+

A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed +either with man sat or man sat-podman from the manager node.

+
ncn-m001# man sat
+
ncn-m001# man sat-podman
+

Command Prompt Conventions in SAT

+

The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.

+
    +
  • The root or super-user account always has the # character at the end of the prompt and has the host name of the +host in the prompt.
  • +
  • Any non-root account is indicated with account@hostname>. A user account that is neither root nor crayadm is +referred to as user.
  • +
  • The command prompt inside the SAT container environment is indicated with the string as follows. It also has the “#” +character at the end of the prompt.
  • +
+ + + + + + + + + + + + + + + + + +
Command PromptMeaning
ncn-m001#Run on one of the Kubernetes Manager servers. (Non-interactive)
(CONTAINER_ID) sat-container#Run the command inside the SAT container environment by first running sat bash. (Interactive)
+

Examples of the sat status command used by an administrator:

+
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+

SAT Dependencies

+

Most sat subcommands depend on services or components from other products in the +HPE Cray EX (Shasta) software stack. The following list shows these dependencies +for each subcommand. Each service or component is listed under the product it belongs to.

+

sat auth

+

CSM

+
    +
  • Keycloak
  • +
+

sat bmccreds

+

CSM

+
    +
  • System Configuration Service (SCSD)
  • +
+

sat bootprep

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Configuration Framework Service (CFS)
  • +
  • Image Management Service (IMS)
  • +
  • Version Control Service (VCS)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat bootsys

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Cray Advanced Platform Monitoring and Control (CAPMC)
  • +
  • Ceph
  • +
  • Compute Rolling Upgrade Service (CRUS)
  • +
  • Etcd
  • +
  • Firmware Action Service (FAS)
  • +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

COS

+
    +
  • Node Memory Dump (NMD)
  • +
+

sat diag

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

CSM-Diag

+
    +
  • Fox
  • +
+

sat firmware

+

CSM

+
    +
  • Firmware Action Service (FAS)
  • +
+

sat hwhist

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwinv

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwmatch

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat init

+

None

+

sat k8s

+

CSM

+
    +
  • Kubernetes
  • +
+

sat nid2xname

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat sensors

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • HM Collector
  • +
+

SMA

+
    +
  • Telemetry API
  • +
+

sat setrev

+

CSM

+
    +
  • S3
  • +
+

sat showrev

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat slscheck

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat status

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat swap

+

Slingshot

+
    +
  • Fabric Manager
  • +
+

sat switch

+

Deprecated: See sat swap

+

sat xname2nid

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-22/release_notes/index.html b/en-22/release_notes/index.html new file mode 100644 index 0000000000..4655a88537 --- /dev/null +++ b/en-22/release_notes/index.html @@ -0,0 +1,1056 @@ + + + + + + + + + + + + SAT Release Notes :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Release Notes

+

Summary of changes in SAT 2.2

+

SAT 2.2.16 was released on February 25th, 2022.

+

This version of the SAT product included:

+
    +
  • Version 3.14.0 of the sat python package and CLI
  • +
  • Version 1.6.4 of the sat-podman wrapper script
  • +
  • Version 1.0.4 of the sat-cfs-install container image and Helm chart
  • +
+

It also added the following new components:

+
    +
  • Version 1.4.3 of the sat-install-utility container image
  • +
  • Version 2.0.2 of the cfs-config-util container image
  • +
+

The following sections detail the changes in this release.

+

Known issues in SAT 2.2

+

sat command unavailable in sat bash shell

+

After launching a shell within the SAT container with sat bash, the sat command will not +be found. For example:

+
(CONTAINER-ID) sat-container:~ # sat status
+bash: sat: command not found
+

This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be prepended to the +$PATH environment variable:

+
(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH
+(CONTAINER-ID) sat-container:~ # sat status
+

Or, the file /sat/venv/bin/activate may be sourced:

+
(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate
+(CONTAINER-ID) sat-container:~ # sat status
+

Tab completion unavailable in sat bash shell

+

After launching a shell within the SAT container with sat bash, tab completion for sat +commands does not work.

+

This can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash:

+
source /etc/bash_completion.d/sat-completion.bash
+

OCI runtime permission error when running sat in root directory

+

sat commands will not work if the current directory is /. For example:

+
ncn-m001:/ # sat --help
+Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+

To resolve, run sat in another directory.

+

Duplicate mount error when running sat in config directory

+

sat commands will not work if the current directory is ~/.config/sat. For example:

+
ncn-m001:~/.config/sat # sat --help
+Error: /root/.config/sat: duplicate mount destination
+

To resolve, run sat in another directory.

+

New sat commands

+
    +
  • sat bootprep automates the creation of CFS configurations, the build and +customization of IMS images, and the creation of BOS session templates. See +SAT Bootprep for details.
  • +
  • sat slscheck performs a check for consistency between the System Layout +Service (SLS) and the Hardware State Manager (HSM).
  • +
  • sat bmccreds provides a simple interface for interacting with the System +Configuration Service (SCSD) to set BMC Redfish credentials.
  • +
  • sat hwhist displays hardware component history by xname (location) or by +its Field-Replaceable Unit ID (FRUID). This command queries the Hardware +State Manager (HSM) API to obtain this information. Since the sat hwhist +command supports querying for the history of a component by its FRUID, the +FRUID of components has been added to the output of sat hwinv.
  • +
+

Additional Install Automation

+

The following automation has been added to the install script, install.sh:

+
    +
  • Wait for the completion of the sat-config-import Kubernetes job, which is +started when the sat-cfs-install Helm chart is deployed.
  • +
  • Automate the modification of the CFS configuration, which applies to master +management NCNs (e.g. “ncn-personalization”).
  • +
+

Changes to Product Catalog Data Schema

+

The SAT product uploads additional information to the cray-product-catalog +Kubernetes ConfigMap detailing the components it provides, including container +(Docker) images, Helm charts, RPMs, and package repositories.

+

This information is used to support uninstall and activation of SAT product +versions moving forward.

+

Support for Uninstall and Activation of SAT Versions

+

Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and activation of the SAT product stream.

+

See Uninstall: Removing a Version of SAT +and Activate: Switching Between Versions +for details.

+

Improvements to sat status

+

A Subrole column has been added to the output of sat status. This allows you +to easily differentiate between master, worker, and storage nodes in the +management role, for example.

+

Hostname information from SLS has been added to sat status output.

+

Added Support for JSON Output

+

Support for JSON-formatted output has been added to commands which currently +support the --format option, such as hwinv, status, and showrev.

+

Usability Improvements

+

Many usability improvements have been made to multiple sat commands, +mostly related to filtering command output. The following are some highlights:

+
    +
  • Added --fields option to display only specific fields for subcommands which +display tabular reports.
  • +
  • Added ability to filter on exact matches of a field name.
  • +
  • Improved handling of multiple matches of a field name in --filter queries +so that the first match is used, similar to --sort-by.
  • +
  • Added support for --filter, --fields, and --reverse for summaries +displayed by sat hwinv.
  • +
  • Added borders to summary tables generated by sat hwinv.
  • +
  • Improved documentation in the man pages.
  • +
+

Default Log Level Changed

+

The default log level for stderr has been changed from “WARNING” to “INFO”. For +details, see SAT Logging.

+

More Granular Log Level Configuration Options

+

With the command-line options --loglevel-stderr and --loglevel-file, the log level +can now be configured separately for stderr and the log file.

+

The existing --loglevel option is now an alias for the --loglevel-stderr option.

+

Podman Wrapper Script Improvements

+

The Podman wrapper script is the script installed at /usr/bin/sat on the +master management NCNs by the cray-sat-podman RPM that runs the cray-sat +container in podman. The following subsections detail improvements that were +made to the wrapper script in this release.

+

Mounting of $HOME and Current Directories in cray-sat Container

+

The Podman wrapper script that launches the cray-sat container with podman +has been modified to mount the user’s current directory and home directory into +the cray-sat container to provide access to local files in the container.

+

Podman Wrapper Script Documentation Improvements

+

The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:

+
    +
  • Environment variables that affect execution of the wrapper script
  • +
  • Host files and directories mounted in the container
  • +
+

Fixes to Podman Wrapper Script Output Redirection

+

Fixed issues with redirecting stdout and stderr, and piping output to commands, +such as awk, less, and more.

+

Configurable HTTP Timeout

+

A new sat option has been added to configure the HTTP timeout length for +requests to the API gateway. See sat-man sat for details.

+

sat bootsys Improvements

+

Many improvements and fixes have been made to sat bootsys. The following are some +highlights:

+
    +
  • Added the --excluded-ncns option, which can be used to omit NCNs +from the platform-services and ncn-power stages in case they are +inaccessible.
  • +
  • Disruptive shutdown stages in sat bootsys shutdown now prompt the user to +continue before proceeding. A new option, --disruptive, will bypass this.
  • +
  • Improvements to Ceph service health checks and restart during the platform-services +stage of sat bootsys boot.
  • +
+

sat xname2nid Improvements

+

sat xname2nid can now recursively expand slot, chassis, and cabinet xnames to +a list of nids in those locations.

+

A new --format option has been added to sat xname2nid. It sets the output format to +either “range” (the default) or “nid”. The “range” format displays nids in a +compressed range format suitable for use with a workload manager like Slurm.

+

Usage of v2 HSM API

+

The commands which interact with HSM (e.g., sat status and sat hwinv) now +use the v2 HSM API.

+

sat diag Limited to HSN Switches

+

sat diag will now only operate against HSN switches by default. These are the +only controllers that support running diagnostics with HMJTD.

+

sat showrev Enhancements

+

A column has been added to the output of sat showrev that indicates whether a +product version is “active”. The definition of “active” varies across products, +and not all products may set an “active” version.

+

For SAT, the active version is the one with its hosted-type package repository in +Nexus set as the member of the group-type package repository in Nexus, +meaning that it will be used when installing the cray-sat-podman RPM.

+

cray-sat Container Image Size Reduction

+

The size of the cray-sat container image has been approximately cut in half by +leveraging multi-stage builds. This also improved the repeatability of the unit +tests by running them in the container.

+

Bug Fixes

+

Minor bug fixes were made in cray-sat and in cray-sat-podman. For full change lists, +see each repository’s CHANGELOG.md file.

+

Summary of SAT changes in Shasta v1.5

+

We released version 2.1.16 of the SAT product in Shasta v1.5.

+

This version of the SAT product included:

+
    +
  • Version 3.7.4 of the sat python package and CLI
  • +
  • Version 1.4.10 of the sat-podman wrapper script
  • +
+

It also added the following new component:

+
    +
  • Version 1.0.3 of the sat-cfs-install docker image and helm chart
  • +
+

The following sections detail the changes in this release.

+

Install Changes to Separate Product from CSM

+

This release further decouples the installation of the SAT product from the CSM +product. The cray-sat-podman RPM is no longer installed in the management +non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on +all master management NCNs via an Ansible playbook which is referenced by a +layer of the CFS configuration that applies to management NCNs. This CFS +configuration is typically named “ncn-personalization”.

+

The SAT product now includes a Docker image and a Helm chart named +sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart +with Loftsman. This helm chart deploys a Kubernetes job that imports the +SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. +This repository is referenced by the layer added to the NCN personalization +CFS configuration.

+

Removal of Direct Redfish Access

+

All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:

+
    +
  • sat sensors
  • +
  • sat diag
  • +
  • sat linkhealth
  • +
+

The sat sensors command has been rewritten to use the SMA telemetry API to +obtain the latest sensor values. The command’s usage has changed slightly, but +legacy options work as before, so it is backwards compatible. Additionally, new +commands have been added.

+

The sat diag command has been rewritten to use a new service called Fox, which +is delivered with the CSM-diags product. The sat diag command now launches +diagnostics using the Fox service, which launches the corresponding diagnostic +executables on controllers using the Hardware Management Job and Task Daemon +(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start +diagnostics over Redfish.

+

The sat linkhealth command has been removed. Its functionality has been +replaced by functionality from the Slingshot Topology Tool (STT) in the +fabric manager pod.

+

The Redfish username and password command line options and config file options +have been removed. For further instructions, see Remove Obsolete Configuration +File Sections.

+

Additional Fields in sat setrev and sat showrev

+

sat setrev now collects the following information from the admin, which is then displayed by sat showrev:

+
    +
  • System description
  • +
  • Product number
  • +
  • Company name
  • +
  • Country code
  • +
+

Additional guidance and validation has been added to each field collected by +sat setrev. This sets the stage for sdu setup to stop collecting this +information and instead collect it from sat showrev or its S3 bucket.

+

Improvements to sat bootsys

+

The platform-services stage of the sat bootsys boot command has been +improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph +health in the correct order. The ceph-check stage has been removed as it is no +longer needed.

+

The platform-services stage of sat bootsys boot now prompts for confirmation +of the storage NCN hostnames in addition to the Kubernetes masters and workers.

+

Bug Fixes and Security Fixes

+
    +
  • Improved error handling in sat firmware.
  • +
  • Incremented version of Alpine Linux to 3.13.2 to address a security +vulnerability.
  • +
+

Other Notable Changes

+
    +
  • Ansible has been removed from the cray-sat container image.
  • +
  • Support for the Firmware Update Service (FUS) has been removed from the sat firmware command.
  • +
+

Summary of SAT Changes in Shasta v1.4.1

+

We released version 2.0.4 of the SAT product in Shasta v1.4.1.

+

This version of the SAT product included:

+
    +
  • Version 3.5.0 of the sat python package and CLI.
  • +
  • Version 1.4.3 of the sat-podman wrapper script.
  • +
+

The following sections detail the changes in this release.

+

New Commands to Translate Between NIDs and XNames

+

Two new commands were added to translate between NIDs and XNames:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
+

These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.

+

Bug Fixes

+
    +
  • Fixed a problem in sat swap where creating the offline port policy failed.
  • +
  • Changed sat bootsys shutdown --stage bos-operations to no longer forcefully +power off all compute nodes and application nodes using CAPMC when BOS +sessions complete or time out.
  • +
  • Fixed an issue with the command sat bootsys boot --stage cabinet-power.
  • +
+

Summary of SAT Changes in Shasta v1.4

+

In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.

+

This version of the SAT product included the following components:

+
    +
  • Version 3.4.0 of the sat python package and CLI
  • +
+

It also added the following new component:

+
    +
  • Version 1.4.2 of the sat-podman wrapper script
  • +
+

The following sections detail the changes in this release.

+

SAT as an Independent Product

+

SAT is now packaged and released as an independent product. The product +deliverable is called a “release distribution”. The release distribution is a +gzipped tar file containing an install script. This install script loads the +cray/cray-sat container image into the Docker registry in Nexus and loads the +cray-sat-podman RPM into a package repository in Nexus.

+

In this release, the cray-sat-podman package is still installed in the master +and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in +Shasta v1.5.

+

SAT Running in a Container Under Podman

+

The sat command now runs in a container under Podman. The sat executable is +now installed on all nodes in the Kubernetes management cluster (i.e., workers +and masters). This executable is a wrapper script that starts a SAT container in +Podman and invokes the sat Python CLI within that container. The admin can run +individual sat commands directly on the master or worker NCNs as before, or +they can run sat commands inside the SAT container after using sat bash to +enter an interactive shell inside the SAT container.

+

To view man pages for sat commands, the user can run sat-man SAT_COMMAND, +replacing SAT_COMMAND with the name of the sat command. Alternatively, +the user can enter the sat container with sat bash and use the man command.

+

New sat init Command and Config File Location Change

+

The default location of the SAT config file has been changed from /etc/sat.toml +to ~/.config/sat/sat.toml. A new command, sat init, has been added that +initializes a configuration file in the new default directory. This better supports +individual users on the system who want their own config files.

+

~/.config/sat is mounted into the container that runs under Podman, so changes +are persistent across invocations of the sat container. If desired, an alternate +configuration directory can be specified with the SAT_CONFIG_DIR environment variable.

+

Additionally, if a config file does not yet exist when a user runs a sat +command, one is generated automatically.

+

Additional Types Added to sat hwinv

+

Additional functionality has been added to sat hwinv including:

+
    +
  • List node enclosure power supplies with the --list-node-enclosure-power-supplies option.
  • +
  • List node accelerators (e.g., GPUs) with the --list-node-accels option. The count of +node accelerators is also included for each node.
  • +
  • List node accelerator risers (e.g., Redstone modules) with the --list-node-accel-risers +option. The count of node accelerator risers is also included for each node.
  • +
  • List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics +option. The count of HSN NICs is also included for each node.
  • +
+

Documentation for these new options has been added to the man page for sat hwinv.

+

Site Information Stored by sat setrev in S3

+

The sat setrev and sat showrev commands now use S3 to store and obtain site +information, including system name, site name, serial number, install date, and +system type. Since the information is stored in S3, it will now be consistent +regardless of the node on which sat is executed.

+

As a result of this change, S3 credentials must be configured for SAT. For detailed +instructions, see Generate SAT S3 Credentials.

+

Product Version Information Shown by sat showrev

+

sat showrev now shows product information from the cray-product-catalog +ConfigMap in Kubernetes.

+

Additional Changes to sat showrev

+

The output from sat showrev has also been changed in the following ways:

+
    +
  • The --docker and --packages options were considered misleading and have +been removed.
  • +
  • Information pertaining to only to the local host, where the command is run, +has been moved to the output of the --local option.
  • +
+

Removal of sat cablecheck

+

The sat cablecheck command has been removed. To verify that the system’s Slingshot +network is cabled correctly, admins should now use the show cables command in the +Slingshot Topology Tool (STT).

+

sat swap Command Compatibility with Next-gen Fabric Controller

+

The sat swap command was added in Shasta v1.3.2. This command used the Fabric +Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the +Fabric Controller API, so this command has been rewritten to use the new +backwards-incompatible API. Usage of the command did not change.

+

sat bootsys Functionality

+

Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken +by changes introduced in Shasta v1.4, which removed the Ansible inventory +and playbooks.

+

The functionality in the platform-services stage of sat bootsys has been +re-implemented to use python directly instead of Ansible. This resulted in +a more robust procedure with better logging to the sat log file. Failures +to stop containers on Kubernetes nodes are handled more gracefully, and +more information about the containers that failed to stop, including how to +debug the problem, is included.

+

Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.

+

The following improvements were made to the bos-operations stage +of sat bootsys:

+
    +
  • More information about the BOS sessions, BOA jobs, and BOA pods is printed.
  • +
  • A command-line option, --bos-templates, and a corresponding config-file +option, bos_templates, were added, and the --cle-bos-template and +--uan-bos-template options and their corresponding config file options were +deprecated.
  • +
+

The following functionality has been removed from sat bootsys:

+
    +
  • The hsn-bringup stage of sat bootsys boot has been removed due to removal +of the underlying Ansible playbook.
  • +
  • The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is +now a manual procedure.
  • +
+

Log File Location Change

+

The location of the sat log file has changed from /var/log/cray/sat.log to +/var/log/cray/sat/sat.log. This change simplifies mounting this file into the +sat container running under Podman.

+

Summary of SAT Changes in Shasta v1.3.2

+

Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.

+

The following sections detail the changes in this release.

+

sat swap Command for Switch and Cable Replacement

+

The sat switch command which supported operations for replacing a switch has +been deprecated and replaced with the sat swap command, which now supports +replacing a switch OR cable.

+

The sat swap switch command is equivalent to sat switch. The sat switch +command will be removed in a future release.

+

Addition of Stages to sat bootsys Command

+

The sat bootsys command now has multiple stages for both the boot and +shutdown actions. Please refer to the “System Power On Procedures” and “System +Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001) +for more details on using this command in the context of a full system power off +and power on.

+

Summary of SAT Changes in Shasta v1.3

+

Shasta v1.3 included version 2.2.3 of the sat python package and CLI.

+

This version of the sat CLI contained the following commands:

+
    +
  • auth
  • +
  • bootsys
  • +
  • cablecheck
  • +
  • diag
  • +
  • firmware
  • +
  • hwinv
  • +
  • hwmatch
  • +
  • k8s
  • +
  • linkhealth
  • +
  • sensors
  • +
  • setrev
  • +
  • showrev
  • +
  • status
  • +
  • swap
  • +
  • switch
  • +
+

See the System Admin Toolkit Command Overview +and the table of commands in the SAT Authentication section +of this document for more details on each of these commands.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-22/sitemap.xml b/en-22/sitemap.xml new file mode 100644 index 0000000000..67f6c0fd28 --- /dev/null +++ b/en-22/sitemap.xml @@ -0,0 +1,308 @@ + + + + /docs-sat/en-22/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-22/dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-22/install/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-22/introduction/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-22/dashboards/sat_grafana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-22/dashboards/sat_kibana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-22/release_notes/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-22/usage/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-22/categories/ + + + + + + + + /docs-sat/en-22/tags/ + + + + + + + + diff --git a/en-22/tags/index.html b/en-22/tags/index.html new file mode 100644 index 0000000000..e2cbadb624 --- /dev/null +++ b/en-22/tags/index.html @@ -0,0 +1,579 @@ + + + + + + + + + + + + Tags :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + tag :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-22/tags/index.xml b/en-22/tags/index.xml new file mode 100644 index 0000000000..2e359dd74f --- /dev/null +++ b/en-22/tags/index.xml @@ -0,0 +1,11 @@ + + + + Tags on System Admin Toolkit (SAT) + /docs-sat/en-22/tags/ + Recent content in Tags on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-22 + + + diff --git a/en-22/usage/index.html b/en-22/usage/index.html new file mode 100644 index 0000000000..32ab30a76b --- /dev/null +++ b/en-22/usage/index.html @@ -0,0 +1,858 @@ + + + + + + + + + + + + SAT Usage :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Usage

+

SAT Bootprep

+

SAT provides an automated solution for creating CFS configurations, building +and configuring images in IMS, and creating BOS session templates based on a +given input file which defines how those configurations, images, and session +templates should be created.

+

This automated process centers around the sat bootprep command. Man page +documentation for sat bootprep can be viewed similarly to other SAT commands.

+
ncn-m001# sat-man sat-bootprep
+

SAT Bootprep vs SAT Bootsys

+

sat bootprep is used to create CFS configurations, build and +rename IMS images, and create BOS session templates which tie the +configurations and images together during a BOS session.

+

sat bootsys automates several portions of the boot and shutdown processes, +including (but not limited to) performing BOS operations (such as creating BOS +sessions), powering on and off cabinets, and checking the state of the system +prior to shutdown.

+

Editing a bootprep input file

+

The input file provided to sat bootprep is a YAML-formatted file containing +information which CFS, IMS, and BOS use to create configurations, images, and +BOS session templates respectively. Writing and modifying these input files is +the main task associated with using sat bootprep. An input file is composed of +three main sections, one each for configurations, images, and session templates. +These sections may be specified in any order, and any of the sections may be +omitted if desired.

+

Creating CFS configurations

+

The configurations section begins with a configurations: key.

+
---
+configurations:
+

Under this key, the user can list one or more configurations to create. For +each configuration, a name should be given, in addition to the list of layers +which comprise the configuration. Each layer can be defined by a product name +and optionally a version number, or commit hash or branch in the product’s +configuration repository. Alternatively, a layer can be defined by a Git +repository URL directly, along with an associated branch or commit hash.

+

When a configuration layer is specified in terms of a product name, the layer +is created in CFS by looking up relevant configuration information (including +the configuration repository and commit information) from the +cray-product-catalog Kubernetes ConfigMap as necessary. A version may be +supplied, but if it is absent, the version is assumed to be the latest version +found in the cray-product-catalog.

+
---
+configurations:
+- name: example-configuration
+  layers:
+  - name: example product
+    playbook: example.yml
+    product:
+      name: example
+      version: 1.2.3
+

Alternatively, a configuration layer may be specified by explicitly referencing +the desired configuration repository, along with the branch containing the +intended version of the Ansible playbooks. A commit hash may be specified by replacing +branch with commit.

+
  ...
+  - name: another example product
+    playbook: another-example.yml
+    git:
+      url: "https://vcs.local/vcs/another-example-config-management.git"
+      branch: main
+  ...
+

When sat bootprep is run against an input file, a CFS configuration will be +created corresponding to each configuration in the configurations section. For +example, the configuration created from an input file with the layers listed +above might look something like the following:

+
{
+    "lastUpdated": "2022-02-07T21:47:49Z",
+    "layers": [
+        {
+            "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "example product",
+            "playbook": "example.yml"
+        },
+        {
+            "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "another example product",
+            "playbook": "another-example.yml"
+        }
+    ],
+    "name": "example-configuration"
+}
+

Creating IMS images

+

After specifying configurations, the user may add images to the input file +which are to be built by IMS. To add an images section, the user should add +an images key.

+
---
+configurations:
+  ... (omitted for brevity)
+images:
+

Under the images key, the user may define one or more images to be created in +a list. Each element of the list defines a separate IMS image to be built and/or +configured. Images must contain a name, as well as an ims section containing a +definition of the image to be built and/or configured. Images may be defined by +an image recipe, or by a pre-built image. Recipes and pre-built images are +referred to by their names or IDs in IMS. The ims section should also contain +an is_recipe property, which indicates whether the name or ID refers to an +image recipe or a pre-built image. Images may also optionally provide a text +description of the image. This description is not stored or used by sat bootprep or any CSM services, but is useful for documenting images in the input +file.

+
---
+configurations:
+  ... (omitted for brevity)
+images:
+- name: example-compute-image
+  description: >
+    An example compute node image for illustrative purposes.
+  ims:
+    name: example-compute-image-recipe
+    is_recipe: true
+- name: another-example-compute-image
+  description: >
+    Another example compute node image.
+  ims:
+    id: <IMS image UUID>
+    is_recipe: false
+

Images may also contain a configuration property in their definition, which +specifies a configuration with which to customize the built image prior to +booting. If a configuration is specified, then configuration groups must also +be specified using the configuration_group_names property.

+
---
+configurations:
+  ... (omitted for brevity)
+images:
+- name: example-compute-image
+  description: >
+    An example compute node image for illustrative purposes.
+  ims:
+    name: example-compute-image-recipe
+    is_recipe: true
+  configuration: example configuration
+  configuration_group_names:
+  - Compute
+

Creating BOS session templates

+

BOS session templates are the final section of the input file, and are defined +under the session_templates key.

+
---
+configurations:
+  ... (omitted for brevity)
+images:
+  ... (omitted for brevity)
+session_templates:
+

Each session template is defined in terms of its name, an image, a +configuration, and a set of parameters which can be used to configure the +session. The name, image, and configuration are specified with their respective +name, image, and configuration keys. bos_parameters may also be +specified; currently, the only setting under bos_parameters that is supported +is boot_sets, which can be used to define boot sets in the BOS session +template. Each boot set is defined under its own property under boot_sets, and +the value of each boot set can contain the following properties, all of +which are optional:

+
    +
  • kernel_parameters: the parameters passed to the kernel on the command line
  • +
  • network: the network over which the nodes will boot
  • +
  • node_list: nodes to add to the boot set
  • +
  • node_roles_groups: HSM roles to add to the boot set
  • +
  • node_groups: HSM groups to add to the boot set
  • +
  • rootfs_provider: the root file system provider
  • +
  • rootfs_provider_passthrough: parameters to add to the rootfs= kernel +parameter
  • +
+

The properties listed previously are the same as the parameters that can be +specified directly through BOS boot sets. More information can be found in the +CSM documentation on session +templates. +Additional properties not listed are passed through to the BOS session template +as written.

+

An example session template might look like the following:

+
configurations:
+  ... (omitted for brevity)
+images:
+  ... (omitted for brevity)
+session_templates:
+- name: example-session-template
+  image: example-image
+  configuration: example-configuration
+  bos_parameters:
+    boot_sets:
+      example_boot_set:
+        kernel_parameters: ip=dhcp quiet
+        node_list: []
+        rootfs_provider: cpss3
+        rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+

Example bootprep input files

+

Putting together all of the previous input file sections, an example bootprep input +file might look something like the following.

+
---
+configurations:
+- name: cos-config
+  layers:
+  - name: cos-integration-2.2.87
+    playbook: site.yml
+    product:
+      name: cos
+      version: 2.2.87
+      branch: integration
+  - name: cpe-integration-21.12.3
+    playbook: pe_deploy.yml
+    product:
+      name: cpe
+      version: 21.12.3
+      branch: integration
+  - name: slurm-master-1.1.1
+    playbook: site.yml
+    product:
+      name: slurm
+      version: 1.1.1
+      branch: master
+images:
+- name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+  ims:
+    is_recipe: true
+    name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+  configuration: cos-config
+  configuration_group_names:
+  - Compute
+session_templates:
+- name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+  image: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+  configuration: cos-config
+  bos_parameters:
+    boot_sets:
+      compute:
+        kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+        node_roles_groups:
+        - Compute
+

Creating a pre-populated example bootprep input file

+

It is possible to create an example bootprep input file using values from the +system’s product catalog using the sat bootprep generate-example command.

+
ncn-m001# sat bootprep generate-example
+INFO: Using latest version (2.3.24-20220113160653) of product cos
+INFO: Using latest version (21.11.4) of product cpe
+INFO: Using latest version (1.0.7) of product slurm
+INFO: Using latest version (1.1.24) of product analytics
+INFO: Using latest version (2.1.5) of product uan
+INFO: Using latest version (21.11.4) of product cpe
+INFO: Using latest version (1.0.7) of product slurm
+INFO: Using latest version (1.1.24) of product analytics
+INFO: Using latest version (2.3.24-20220113160653) of product cos
+INFO: Using latest version (2.1.5) of product uan
+INFO: Wrote example bootprep input file to ./example-bootprep-input.yaml.
+

This file should be reviewed and edited to match the desired parameters of the +configurations, images, and session templates.

+

Viewing built-in generated documentation

+

The contents of the YAML input files described above must conform to a schema +which defines the structure of the data. The schema definition is written using +the JSON Schema format. (Although the format is named “JSON Schema”, the schema +itself is written in YAML as well.) More information, including introductory +materials and a formal specification of the JSON Schema metaschema, can be found +on the JSON Schema website.

+

Viewing the exact schema specification

+

To view the exact schema specification, run sat bootprep view-schema.

+
ncn-m001# sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft-07/schema"
+title: Bootprep Input File
+description: >
+  A description of the set of CFS configurations to create, the set of IMS
+  images to create and optionally customize with the defined CFS configurations,
+  and the set of BOS session templates to create that reference the defined
+  images and configurations.
+type: object
+additionalProperties: false
+properties:
+  ...
+

Generating user-friendly documentation

+

The raw schema definition can be difficult to understand without experience +working with JSON Schema specifications. For this reason, a feature was included +which can generate user-friendly HTML documentation for the input file schema +which can be browsed with the user’s preferred web browser.

+
    +
  1. +

    Create a documentation tarball using sat bootprep.

    +
    ncn-m001# sat bootprep generate-docs
    +INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
    +

    An alternate output directory can be specified with the --output-dir +option. The generated tarball is always named bootprep-schema-docs.tar.gz.

    +
    ncn-m001# sat bootprep generate-docs --output-dir /tmp
    +INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
    +
  2. +
  3. +

    From another machine, copy the tarball to a local directory.

    +
    another-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz .
    +
  4. +
  5. +

    Extract the contents of the tarball and open the contained index.html.

    +
    another-machine$ tar xzvf bootprep-schema-docs.tar.gz
    +x bootprep-schema-docs/
    +x bootprep-schema-docs/index.html
    +x bootprep-schema-docs/schema_doc.css
    +x bootprep-schema-docs/schema_doc.min.js
    +another-machine$ open bootprep-schema-docs/index.html
    +
  6. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-23/404.html b/en-23/404.html new file mode 100644 index 0000000000..9e96fa3080 --- /dev/null +++ b/en-23/404.html @@ -0,0 +1,59 @@ + + + + + + + + + 404 Page not found + + + + + + + + + + + + + + + + + + +
+
+
+
+

+

+

+

+

+

+

Page not found!

+
+
+ +
+ + + diff --git a/en-23/categories/index.html b/en-23/categories/index.html new file mode 100644 index 0000000000..0d9b6915c7 --- /dev/null +++ b/en-23/categories/index.html @@ -0,0 +1,579 @@ + + + + + + + + + + + + Categories :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + category :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-23/categories/index.xml b/en-23/categories/index.xml new file mode 100644 index 0000000000..ee0b244dca --- /dev/null +++ b/en-23/categories/index.xml @@ -0,0 +1,11 @@ + + + + Categories on System Admin Toolkit (SAT) + /docs-sat/en-23/categories/ + Recent content in Categories on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-23 + + + diff --git a/en-23/dashboards/index.html b/en-23/dashboards/index.html new file mode 100644 index 0000000000..3d8a9308c2 --- /dev/null +++ b/en-23/dashboards/index.html @@ -0,0 +1,561 @@ + + + + + + + + + + + + SAT Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Dashboards

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-23/dashboards/index.xml b/en-23/dashboards/index.xml new file mode 100644 index 0000000000..56d7a3ca5d --- /dev/null +++ b/en-23/dashboards/index.xml @@ -0,0 +1,26 @@ + + + + SAT Dashboards on System Admin Toolkit (SAT) + /docs-sat/en-23/dashboards/ + Recent content in SAT Dashboards on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-23 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Grafana Dashboards + /docs-sat/en-23/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-23/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.cmn.&lt;site-domain&gt; The value of site-domain can be obtained as follows: ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=&#39;{.data.customizations\.yaml}&#39; | \ base64 -d | grep &#34;external:&#34; That command will produce the following output, for example: + + + SAT Kibana Dashboards + /docs-sat/en-23/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-23/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + diff --git a/en-23/dashboards/sat_grafana_dashboards/index.html b/en-23/dashboards/sat_grafana_dashboards/index.html new file mode 100644 index 0000000000..bbc472b260 --- /dev/null +++ b/en-23/dashboards/sat_grafana_dashboards/index.html @@ -0,0 +1,666 @@ + + + + + + + + + + + + SAT Grafana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Grafana Dashboards

+

The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.

+

Grafana can be accessed via web browser at the following URL:

+
    +
  • https://sma-grafana.cmn.<site-domain>
  • +
+

The value of site-domain can be obtained as follows:

+
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+    base64 -d | grep "external:"
+

That command will produce the following output, for example:

+
    external: EXAMPLE_DOMAIN.com
+

This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com

+

For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the +SMA product documentation.

+

For more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry +Kafka Topics in the SMA product documentation.

+ +

There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Dashboard NameDisplay Type
Fabric CongestionChart Panels
Fabric RFC3635Chart Panels
Fabric ErrorsTabular Format
Fabric Port StateTabular Format
+

The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.

+

SAT Grafana Interval and Locations Options

+

Shows the Interval and Locations Options for the available telemetry.

+

+

The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.

+

For additional information, refer to Grafana Templates and Variables.

+

The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.

+

The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.

+

Grafana Fabric Congestion Dashboard

+

+

SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.

+

This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.

+

The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.

+

The port type of a link is reported as a port state “subtype” event when defined at port initialization.

+

Grafana Fabric Errors Dashboard

+

+

This dashboard reports error counters in a tabular format in three panels.

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.

+

Grafana Fabric Port State Dashboard

+

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.

+

The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.

+

Grafana Fabric RFC3635 Dashboard

+

+

For additional information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.

+

Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-23/dashboards/sat_kibana_dashboards/index.html b/en-23/dashboards/sat_kibana_dashboards/index.html new file mode 100644 index 0000000000..4267be40c1 --- /dev/null +++ b/en-23/dashboards/sat_kibana_dashboards/index.html @@ -0,0 +1,855 @@ + + + + + + + + + + + + SAT Kibana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Kibana Dashboards

+

Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.

+

Kibana can be accessed via web browser at the following URL:

+
    +
  • https://sma-kibana.cmn.<site-domain>
  • +
+

The value of site-domain can be obtained as follows:

+
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+    base64 -d | grep "external:"
+

That command will produce the following output, for example:

+
    external: EXAMPLE_DOMAIN.com
+

This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com

+

For additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product +documentation.

+

Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this +table.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DashboardShort DescriptionLong DescriptionKibana Visualization and Search Name
sat-aerAER correctedCorrected Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-corrected Search: sat-aer-corrected
sat-aerAER fatalFatal Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-fatal Search: sat-aer-fatal
sat-atomATOM failuresApplication Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged.sat-atom-failed
sat-atomATOM admindownApplication Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch.sat-atom-admindown
sat-heartbeatHeartbeat loss eventsHeartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system.sat-heartbeat
sat-kernelKernel assertionsThe kernel software performs a failed assertion when some condition represents a serious fault. The node goes down.sat-kassertions
sat-kernelKernel panicsThe kernel panics when something is seriously wrong. The node goes down.sat-kernel-panic
sat-kernelLustre bugs (LBUGs)The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down.sat-lbug
sat-kernelCPU stallsCPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric.sat-cpu-stall
sat-kernelOut of memoryAn Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided.sat-oom
sat-mceMCEMachine Check Exceptions (MCE) are errors detected at the processor level.sat-mce
sat-rasdaemonrasdaemon errorsErrors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future.sat-rasdaemon-error
sat-rasdaemonrasdaemon messagesAll messages from the rasdaemon service on nodes.sat-rasdaemon
+

Disable Search Highlighting in Kibana Dashboard

+

By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.

+

The Kibana Dashboard should be open on your system.

+
    +
  1. +

    Navigate to Management

    +
  2. +
  3. +

    Navigate to Advanced Settings in the Kibana section, below the Elastic search section

    +
  4. +
  5. +

    Scroll down to the Discover section

    +
  6. +
  7. +

    Change Highlight results from on to off

    +
  8. +
  9. +

    Click Save to save changes

    +
  10. +
+

AER Kibana Dashboard

+

The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.

+

View the AER Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-aer dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.

    +
  8. +
+

ATOM Kibana Dashboard

+

The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health +checks and application test failures. Some test failures are of possible interest even though a node is not marked +admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide +clues if a node otherwise fails. They might also show application problems.

+

View the ATOM Kibana Dashboard

+

HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-atom dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View any nodes marked admindown and any ATOM test failures. These failures occur during health checks and +application test failures. Test failures marked admindown are important to note. View the matching log messages +in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, +results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

Heartbeat Kibana Dashboard

+

The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods +are responsible for monitoring nodes in the system for heartbeat loss.

+

View the Heartbeat Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-heartbeat dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for +monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.

    +
  8. +
+

Kernel Kibana Dashboard

+

The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.

+

View the Kernel Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-kernel dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.

    +
  8. +
+

MCE Kibana Dashboard

+

The MCE Dashboard displays CPU detected processor-level hardware errors.

+

View the MCE Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-mce dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

Rasdaemon Kibana Dashboard

+

The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon +service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including +PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages +presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one +for only messages of severity “emerg” or “err” and another for all messages from rasdaemon.

+

View the Rasdaemon Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-rasdaemon dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.

    +
  8. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-23/img/Fabric_PortState_Locations_UI.png b/en-23/img/Fabric_PortState_Locations_UI.png new file mode 100644 index 0000000000..704511ebce Binary files /dev/null and b/en-23/img/Fabric_PortState_Locations_UI.png differ diff --git a/en-23/img/Grafana_Fabric_Congestion.png b/en-23/img/Grafana_Fabric_Congestion.png new file mode 100644 index 0000000000..dbf481d94c Binary files /dev/null and b/en-23/img/Grafana_Fabric_Congestion.png differ diff --git a/en-23/img/Grafana_HSN_Errors.png b/en-23/img/Grafana_HSN_Errors.png new file mode 100644 index 0000000000..f43b7d02a6 Binary files /dev/null and b/en-23/img/Grafana_HSN_Errors.png differ diff --git a/en-23/img/Grafana_rfc3635.png b/en-23/img/Grafana_rfc3635.png new file mode 100644 index 0000000000..dff176c82d Binary files /dev/null and b/en-23/img/Grafana_rfc3635.png differ diff --git a/en-23/img/SAT_Grafana_Fabric_Vars.png b/en-23/img/SAT_Grafana_Fabric_Vars.png new file mode 100644 index 0000000000..194d75b124 Binary files /dev/null and b/en-23/img/SAT_Grafana_Fabric_Vars.png differ diff --git a/en-23/index.html b/en-23/index.html new file mode 100644 index 0000000000..160bd92c7f --- /dev/null +++ b/en-23/index.html @@ -0,0 +1,575 @@ + + + + + + + + + + + + HPE Cray EX System Admin Toolkit (SAT) Guide :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ + + + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-23/index.json b/en-23/index.json new file mode 100644 index 0000000000..70b4e168d5 --- /dev/null +++ b/en-23/index.json @@ -0,0 +1,71 @@ +[ +{ + "uri": "/docs-sat/en-23/", + "title": "HPE Cray EX System Admin Toolkit (SAT) Guide", + "tags": [], + "description": "", + "content": "HPE Cray EX System Admin Toolkit (SAT) Guide Introduction to SAT About System Admin Toolkit (SAT) System Admin Toolkit Command Overview Command Prompt Conventions in SAT SAT Dependencies SAT Installation Install SAT Install the System Admin Toolkit Product Stream Perform NCN Personalization SAT Setup SAT Authentication Generate SAT S3 Credentials Run sat setrev to Set System Information SAT Post-Upgrade Remove obsolete configuration file sections SAT Logging SAT Uninstall and Downgrade Uninstall: Removing a Version of SAT Activate: Switching Between Versions SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards SAT Usage SAT Bootprep SAT Release Notes Summary of changes in SAT 2.3 Summary of changes in SAT 2.2 Summary of SAT changes in Shasta v1.5 Summary of SAT Changes in Shasta v1.4.1 Summary of SAT Changes in Shasta v1.4 Summary of SAT Changes in Shasta v1.3.2 Summary of SAT Changes in Shasta v1.3 " +}, +{ + "uri": "/docs-sat/en-23/dashboards/", + "title": "SAT Dashboards", + "tags": [], + "description": "", + "content": "SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards " +}, +{ + "uri": "/docs-sat/en-23/install/", + "title": "SAT Installation", + "tags": [], + "description": "", + "content": "SAT Installation Install the System Admin Toolkit Product Stream Describes how to install the System Admin Toolkit (SAT) product stream.\nPrerequisites CSM is installed and verified. cray-product-catalog is running. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.2.x with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. To upgrade SAT, execute the pre-installation, installation, and post-installation procedures for a newer distribution. The newly installed version will become the default. Pre-Installation Procedure Start a typescript.\nThe typescript will record the commands and the output from this installation.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Installation Procedure Copy the release distribution gzipped tar file to ncn-m001.\nUnzip and extract the release distribution, 2.2.x.\nncn-m001# tar -xvzf sat-2.2.x.tar.gz Change directory to the extracted release distribution directory.\nncn-m001# cd sat-2.2.x Run the installer: install.sh.\nThe script produces a lot of output. A successful install ends with \u0026ldquo;SAT version 2.2.x has been installed\u0026rdquo;.\nncn-m001# ./install.sh ... ====\u0026gt; Updating active CFS configurations ... ====\u0026gt; SAT version 2.2.x has been installed. Upgrade only: Record the names of the CFS configuration or configurations modified by install.sh.\nThe install.sh script attempts to modify any CFS configurations that apply to the master management NCNs. During an upgrade, install.sh will log messages indicating the CFS configuration or configurations that were modified. For example, if there are three master nodes all using the same CFS configuration named \u0026ldquo;ncn-personalization\u0026rdquo;, the output would look like this:\n====\u0026gt; Updating active CFS configurations INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0 INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s1b0n0 INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s3b0n0 INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s5b0n0 INFO: Updating CFS configuration \u0026#34;ncn-personalization\u0026#34; INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml in configuration \u0026#34;ncn-personalization\u0026#34;. INFO: Key \u0026#34;name\u0026#34; in layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-ncn to sat-2.2.16 INFO: Successfully updated layers in configuration \u0026#34;ncn-personalization\u0026#34; Save the name of each CFS configuration updated by the installer. In the previous example, a single configuration named \u0026ldquo;ncn-personalization\u0026rdquo; was updated, so that name is saved to a temporary file.\nncn-m001# echo ncn-personalization \u0026gt;\u0026gt; /tmp/sat-ncn-cfs-configurations.txt Repeat the previous command for each CFS configuration that was updated.\nUpgrade only: Save the new name of the SAT CFS configuration layer.\nIn the example install.sh output above, the new layer name is sat-2.2.16. Save this value to a file to be used later.\nncn-m001# echo sat-2.2.16 \u0026gt; /tmp/sat-layer-name.txt Fresh install only: Save the CFS configuration layer for SAT to a file for later use.\nThe install.sh script attempts to modify any CFS configurations that apply to the master management NCNs. During a fresh install, no such CFS configurations will be found, and it will instead log the SAT configuration layer that must be added to the CFS configuration that will be created. Here is an example of the output in that case:\n====\u0026gt; Updating active CFS configurations INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0 WARNING: No CFS configurations found that apply to components with role Management and subrole Master. INFO: The following sat layer should be used in the CFS configuration that will be applied to NCNs with role Management and subrole Master. { \u0026#34;name\u0026#34;: \u0026#34;sat-2.2.15\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;9a74b8f5ba499af6fbcecfd2518a40e081312933\u0026#34;, \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;sat-ncn.yml\u0026#34; } Save the JSON output to a file for later use. For example:\nncn-m001# cat \u0026gt; /tmp/sat-layer.json \u0026lt;\u0026lt;EOF \u0026gt; { \u0026gt; \u0026#34;name\u0026#34;: \u0026#34;sat-2.2.15\u0026#34;, \u0026gt; \u0026#34;commit\u0026#34;: \u0026#34;9a74b8f5ba499af6fbcecfd2518a40e081312933\u0026#34;, \u0026gt; \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git\u0026#34;, \u0026gt; \u0026#34;playbook\u0026#34;: \u0026#34;sat-ncn.yml\u0026#34; \u0026gt; } \u0026gt; EOF Do not copy the previous command verbatim. Use the JSON output from the install.sh script.\nPost-Installation Procedure Optional: Remove the SAT release distribution tar file and extracted directory.\nncn-m001# rm sat-2.2.x.tar.gz ncn-m001# rm -rf sat-2.2.x/ Upgrade only: Ensure that the environment variable SAT_TAG is not set in the ~/.bashrc file on any of the management NCNs.\nNOTE: This step should only be required when updating from Shasta 1.4.1 or Shasta 1.4.2.\nThe following example assumes three manager NCNs: ncn-m001, ncn-m002, and ncn-m003, and shows output from a system in which no further action is needed.\nncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc ncn-m001: source \u0026lt;(kubectl completion bash) ncn-m003: source \u0026lt;(kubectl completion bash) ncn-m002: source \u0026lt;(kubectl completion bash) The following example shows that SAT_TAG is set in ~/.bashrc on ncn-m002. Remove that line from the ~/.bashrc file on ncn-m002.\nncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc ncn-m001: source \u0026lt;(kubectl completion bash) ncn-m002: source \u0026lt;(kubectl completion bash) ncn-m002: export SAT_TAG=3.5.0 ncn-m003: source \u0026lt;(kubectl completion bash) Stop the typescript.\nNOTE: This step can be skipped if you wish to use the same typescript for the remainder of the SAT install. See Next Steps.\nncn-m001# exit SAT version 2.2.x is now installed/upgraded, meaning the SAT 2.2.x release has been loaded into the system software repository.\nSAT configuration content for this release has been uploaded to VCS. SAT content for this release has been uploaded to the CSM product catalog. SAT content for this release has been uploaded to Nexus repositories. The sat command won\u0026rsquo;t be available until the NCN Personalization procedure has been executed. Next Steps If other HPE Cray EX software products are being installed or upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide to determine which step to execute next.\nIf no other HPE Cray EX software products are being installed or upgraded at this time, proceed to the sections listed below.\nNOTE: The NCN Personalization procedure is required when upgrading SAT. The setup procedures in SAT Setup, however, are not required when upgrading SAT. They should have been executed during the first installation of SAT.\nExecute the NCN Personalization procedure:\nPerform NCN Personalization If performing a fresh install, execute the SAT Setup procedures:\nSAT Authentication Generate SAT S3 Credentials Run Sat Setrev to Set System Information If performing an upgrade, execute the upgrade procedures:\nRemove obsolete configuration file sections SAT Logging Perform NCN Personalization Describes how to perform NCN personalization using CFS. This personalization process will configure the System Admin Toolkit (SAT) product stream.\nPrerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. If upgrading, the names of the CFS configurations updated during installation were saved to the file /tmp/sat-ncn-cfs-configurations.txt. If upgrading, the name of the new SAT CFS configuration layer was saved to the file /tmp/sat-layer-name.txt. If performing a fresh install, the SAT CFS configuration layer was saved to the file /tmp/sat-layer.json. Notes on the Procedure Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.2.x with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. If upgrading SAT, the existing configuration will likely include other Cray EX product entries. Update the SAT entry as described in this procedure. The HPE Cray EX System Software Getting Started Guide provides guidance on how and when to update the entries for the other products. Procedure Start a typescript if not already using one.\nThe typescript will capture the commands and the output from this installation procedure.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Fresh install only: Add the SAT layer to the NCN personalization JSON file.\nIf the SAT install script, install.sh, did not identify and modify the CFS configurations that apply to each master management NCN, it will have printed the SAT CFS configuration layer in JSON format. This layer must be added to the JSON file being used to construct the CFS configuration. For example, if the file being used is named ncn-personalization.json, and the SAT layer was saved to the file /tmp/sat-layer.json as described in the install instructions, the following jq command will append the SAT layer and save the result in a new file named ncn-personalization.json.\nncn-m001# jq -s \u0026#39;{layers: (.[0].layers + [.[1]])}\u0026#39; ncn-personalization.json \\ /tmp/sat-layer.json \u0026gt; ncn-personalization.new.json For instructions on how to create a CFS configuration from the previous file and how to apply it to the management NCNs, refer to \u0026ldquo;Perform NCN Personalization\u0026rdquo; in the HPE Cray System Management Documentation. After the CFS configuration has been created and applied, return to this procedure.\nUpgrade only: Invoke each CFS configuration that was updated during the upgrade.\nIf the SAT install script, install.sh, identified CFS configurations that apply to the master management NCNs and modified them in place, invoke each CFS configuration that was created or updated during installation.\nThis step will create a CFS session for each given configuration and install SAT on the associated manager NCNs.\nThe --configuration-limit option limits the configuration session to run only the SAT layer of the configuration.\nYou should see a representation of the CFS session in the output.\nncn-m001# for cfs_configuration in $(cat /tmp/sat-ncn-cfs-configurations.txt); do cray cfs sessions create --name \u0026#34;sat-session-${cfs_configuration}\u0026#34; --configuration-name \\ \u0026#34;${cfs_configuration}\u0026#34; --configuration-limit $(cat /tmp/sat-layer-name.txt); done name=\u0026#34;sat-session-ncn-personalization\u0026#34; [ansible] ... Upgrade only: Monitor the progress of each CFS session.\nThis step assumes a single session named sat-session-ncn-personalization was created in the previous step.\nFirst, list all containers associated with the CFS session:\nncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \\ -o json | jq \u0026#39;.items[0].spec.containers[] | .name\u0026#39; \u0026#34;inventory\u0026#34; \u0026#34;ansible-1\u0026#34; \u0026#34;istio-proxy\u0026#34; Next, get the logs for the ansible-1 container.\nNOTE: the trailing digit might differ from \u0026ldquo;1\u0026rdquo;. It is the zero-based index of the sat-ncn layer within the configuration\u0026rsquo;s layers.\nncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \\ --selector=cfsession=sat-session-ncn-personalization Ansible plays, which are run by the CFS session, will install SAT on all the manager NCNs on the system. Successful results for all of the manager NCN xnames can be found at the end of the container log. For example:\n... PLAY RECAP ********************************************************************* x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 Execute this step for each unique CFS configuration.\nNOTE: Ensure that the PLAY RECAPs for each session show successes for all manager NCNs before proceeding.\nVerify that SAT was successfully configured.\nIf sat is configured, the --version command will indicate which version is installed. If sat is not properly configured, the command will fail.\nNOTE: This version number will differ from the version number of the SAT release distribution. This is the semantic version of the sat Python package, which is different from the version number of the overall SAT release distribution.\nncn-m001# sat --version sat 3.7.0 NOTE: Upon first running sat, you may see additional output while the sat container image is downloaded. This will occur the first time sat is run on each manager NCN. For example, if you run sat for the first time on ncn-m001 and then for the first time on ncn-m002, you will see this additional output both times.\nTrying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037... Getting image source signatures Copying blob da64e8df3afc done Copying blob 0f36fd81d583 done Copying blob 12527cf455ba done ... sat 3.7.0 Stop the typescript.\nncn-m001# exit SAT version 2.2.x is now configured:\nThe SAT RPM package is installed on the associated NCNs. Next Steps If other HPE Cray EX software products are being installed or upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide to determine which step to execute next.\nIf no other HPE Cray EX software products are being installed or upgraded at this time, proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.\nIf performing a fresh install, execute the SAT Setup procedures:\nSAT Authentication Generate SAT S3 Credentials Run Sat Setrev to Set System Information If performing an upgrade, execute the SAT Post-Upgrade procedures:\nRemove obsolete configuration file sections SAT Logging SAT Authentication Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in later steps of the install process. The admin account used to authenticate with sat auth must be enabled in Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation. For additional information on SAT authentication, see System Security and Authentication in the CSM documentation.\nNOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nDescription of SAT Command Authentication Types Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes manager node where SAT commands are run.\nBelow is a table describing SAT commands and the types of authentication they require.\nSAT Subcommand Authentication/Credentials Required Man Page Description sat auth Responsible for authenticating to the API gateway and storing a token. sat-auth Authenticate to the API gateway and save the token. sat bmccreds Requires authentication to the API gateway. sat-bmccreds Set BMC passwords. sat bootprep Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is done on ncn-m001 during the install. sat-bootprep Prepare to boot nodes with images and configurations. sat bootsys Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. sat-bootsys Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. sat diag Requires authentication to the API gateway. sat-diag Launch diagnostics on the HSN switches and generate a report. sat firmware Requires authentication to the API gateway. sat-firmware Report firmware version. sat hwhist Requires authentication to the API gateway. sat-hwhist Report hardware component history. sat hwinv Requires authentication to the API gateway. sat-hwinv Give a listing of the hardware of the HPE Cray EX system. sat hwmatch Requires authentication to the API gateway. sat-hwmatch Report hardware mismatches. sat init None sat-init Create a default SAT configuration file. sat k8s Requires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install. sat-k8s Report on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node). sat linkhealth This command has been deprecated. sat nid2xname Requires authentication to the API gateway. sat-nid2xname Translate node IDs to node xnames. sat sensors Requires authentication to the API gateway. sat-sensors Report current sensor data. sat setrev Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-setrev Set HPE Cray EX system revision information. sat showrev Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-showrev Print revision information for the HPE Cray EX system. sat slscheck Requires authentication to the API gateway. sat-slscheck Perform a cross-check between SLS and HSM. sat status Requires authentication to the API gateway. sat-status Report node status across the HPE Cray EX system. sat swap Requires authentication to the API gateway. sat-swap Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. sat xname2nid Requires authentication to the API gateway. sat-xname2nid Translate node and node BMC xnames to node IDs. sat switch This command has been deprecated. It has been replaced by sat swap. In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password on the command line. The username value is obtained from the following locations, in order of higher precedence to lower precedence:\nThe --username global command-line option. The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml. The name of currently logged in user running the sat command. If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to ~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above, and will use the token for that username if it has been obtained and saved by sat auth.\nPrerequisites The sat CLI has been installed following Install The System Admin Toolkit Product Stream. Procedure The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:\nGenerate a default SAT configuration file, if one does not exist.\nncn-m001# sat init Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; generated. Note: If the config file already exists, it will print out an error:\nERROR: Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; already exists. Not generating configuration file. Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file. E.g.:\nusername = \u0026#34;crayadmin\u0026#34; Run sat auth. Enter your password when prompted. E.g.:\nncn-m001# sat auth Password for crayadmin: Succeeded! Other sat commands are now authenticated to make requests to the API gateway. E.g.:\nncn-m001# sat status Generate SAT S3 Credentials Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes master node where SAT commands are run.\nSAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev (see: Run Sat Setrev to Set System Information).\nNOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nPrerequisites The SAT CLI has been installed following Install The System Admin Toolkit Product Stream The SAT configuration file has been created (See SAT Authentication). CSM has been installed and verified. Procedure Ensure the files are readable only by root.\nncn-m001# touch /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key ncn-m001# chmod 600 /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key Write the credentials to local files using kubectl.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.access_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_access_key ncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.secret_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_secret_key Verify the S3 endpoint specified in the SAT configuration file is correct.\nGet the SAT configuration file\u0026rsquo;s endpoint value.\nNOTE: If the command\u0026rsquo;s output is commented out, indicated by an initial # character, the SAT configuration will take the default value – \u0026quot;https://rgw-vip.nmn\u0026quot;.\nncn-m001# grep endpoint ~/.config/sat/sat.toml # endpoint = \u0026#34;https://rgw-vip.nmn\u0026#34; Get the sat-s3-credentials secret\u0026rsquo;s endpoint value.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.s3_endpoint}\u0026#39; | base64 -d | xargs https://rgw-vip.nmn Compare the two endpoint values.\nIf the values differ, change the SAT configuration file\u0026rsquo;s endpoint value to match the secret\u0026rsquo;s.\nCopy SAT configurations to each manager node on the system.\nncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \\ mkdir -p /root/.config/sat; \\ scp -pr /root/.config/sat ${i}:/root/.config; done NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may be different. This example assumes three manager nodes, where the configuration files must be copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 and ncn-m003.\nRun sat setrev to Set System Information NOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nPrerequisites S3 credentials have been generated. See Generate SAT S3 Credentials. SAT authentication has been set up. See SAT Authentication. Procedure Run sat setrev to set System Revision Information. Follow the on-screen prompts to set the following site-specific values:\nSerial number System name System type System description Product number Company name Site name Country code System install date TIP: For \u0026ldquo;System type\u0026rdquo;, a system with any liquid-cooled components should be considered a liquid-cooled system. I.e., \u0026ldquo;System type\u0026rdquo; is EX-1C.\nncn-m001# sat setrev -------------------------------------------------------------------------------- Setting: Serial number Purpose: System identification. This will affect how snapshots are identified in the HPE backend services. Description: This is the top-level serial number which uniquely identifies the system. It can be requested from an HPE representative. Valid values: Alpha-numeric string, 4 - 20 characters. Type: \u0026lt;class \u0026#39;str\u0026#39;\u0026gt; Default: None Current value: None -------------------------------------------------------------------------------- Please do one of the following to set the value of the above setting: - Input a new value - Press CTRL-C to exit ... Run sat showrev to verify System Revision Information. The following tables contain example information.\nncn-m001# sat showrev ################################################################################ System Revision Information ################################################################################ +---------------------+---------------+ | component | data | +---------------------+---------------+ | Company name | HPE | | Country code | US | | Interconnect | Sling | | Product number | R4K98A | | Serial number | 12345 | | Site name | HPE | | Slurm version | slurm 20.02.5 | | System description | Test System | | System install date | 2021-01-29 | | System name | eniac | | System type | EX-1C | +---------------------+---------------+ ################################################################################ Product Revision Information ################################################################################ +--------------+-----------------+------------------------------+------------------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+------------------------------+------------------------------+ | csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... | | sat | 2.0.1 | - | - | | sdu | 1.0.8 | - | - | | slingshot | 0.8.0 | - | - | | sma | 1.4.12 | - | - | +--------------+-----------------+------------------------------+------------------------------+ ################################################################################ Local Host Operating System ################################################################################ +-----------+----------------------+ | component | version | +-----------+----------------------+ | Kernel | 5.3.18-24.15-default | | SLES | SLES 15-SP2 | +-----------+----------------------+ Remove obsolete configuration file sections Prerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. The Perform NCN Personalization procedure has been successfully completed. Procedure After upgrading SAT, if using the configuration file from a previous version, there may be configuration file sections no longer used in the new version. For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. In that case, the following warning may appear upon running sat commands.\nWARNING: Ignoring unknown section \u0026#39;redfish\u0026#39; in config file. Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.\n[redfish] username = \u0026#34;admin\u0026#34; password = \u0026#34;adminpass\u0026#34; Repeat this process for any configuration file sections for which there are \u0026ldquo;unknown section\u0026rdquo; warnings.\nSAT Logging As of SAT version 2.2, some command output that was previously printed to stdout is now logged to stderr. These messages are logged at the INFO level. The default logging threshold was changed from WARNING to INFO to accomodate this logging change. Additionally, some messages previously logged at the INFO are now logged at the DEBUG level.\nThese changes take effect automatically. However, if the default output threshold has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure that important output is shown in the terminal.\nUpdate Configuration In the following example, the stderr log level, logging.stderr_level, is set to WARNING, which will exclude INFO-level logging from terminal output.\nncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml [logging] ... stderr_level = \u0026#34;WARNING\u0026#34; To enable the new default behavior, comment this line out, delete it, or set the value to \u0026ldquo;INFO\u0026rdquo;.\nIf logging.stderr_level is commented out, its value will not affect logging behavior. However, it may be helpful set its value to INFO as a reminder of the new default behavior.\nAffected Commands The following commands trigger messages that have been changed from stdout print calls to INFO-level (or WARNING- or ERROR-level) log messages:\nsat bootsys --stage shutdown --stage session-checks sat sensors The following commands trigger messages that have been changed from INFO-level log messages to DEBUG-level log messages:\nsat nid2xname sat xname2nid sat swap Uninstall: Removing a Version of SAT Prerequisites Only versions 2.2 or newer of SAT can be uninstalled with prodmgr. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT.\nNOTE: It is not recommended to uninstall a version designated as \u0026ldquo;active\u0026rdquo;. If the active version is uninstalled, then the activate procedure must be executed on a remaining version.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+-------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+-------------------+-----------------------+ | sat | 2.3.3 | True | - | - | | sat | 2.2.10 | False | - | - | +--------------+-----------------+--------+-------------------+-----------------------+ Use prodmgr to uninstall a version of SAT.\nThis command will do three things:\nRemove all hosted-type package repositories associated with the given version of SAT. Group-type repositories are not removed. Remove all container images associated with the given version of SAT. Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up in the output of sat showrev. ncn-m001# prodmgr uninstall sat 2.2.10 Repository sat-2.2.10-sle-15sp2 has been removed. Removed Docker image cray/cray-sat:3.9.0 Removed Docker image cray/sat-cfs-install:1.0.2 Removed Docker image cray/sat-install-utility:1.4.0 Deleted sat-2.2.10 from product catalog. Activate: Switching Between Versions This procedure can be used to downgrade the active version of SAT.\nPrerequisites Only versions 2.2 or newer of SAT can be activated. Older versions must be activated manually. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+--------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+--------------------+-----------------------+ | sat | 2.3.3 | True | - | - | | sat | 2.2.10 | False | - | - | +--------------+-----------------+--------+--------------------+-----------------------+ Use prodmgr to activate a different version of SAT.\nThis command will do three things:\nFor all hosted-type package repositories associated with this version of SAT, set them as the sole member of their corresponding group-type repository. For example, activating SAT version 2.2.10 sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group. Set the version 2.2.10 as active within the product catalog, so that it appears active in the output of sat showrev. Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are associated with NCNs with the role \u0026ldquo;Management\u0026rdquo; and subrole \u0026ldquo;Master\u0026rdquo; (for example, the CFS configuration ncn-personalization). Specifically, it will ensure that the layer refers to the version of SAT CFS configuration content associated with the version of SAT being activated. ncn-m001# prodmgr activate sat 2.2.10 Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2. Set sat-2.2.10 as active in product catalog. Updated CFS configurations: [ncn-personalization] Verify that the chosen version is marked as active.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+--------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+--------------------+-----------------------+ | sat | 2.3.3 | False | - | - | | sat | 2.2.10 | True | - | - | +--------------+-----------------+--------+--------------------+-----------------------+ Run NCN Personalization.\nAt this point, the command has modified Nexus package repositories to set a particular package repository as active, but no packages on the NCNs have been changed. In order to complete the activation process, NCN Personalization must be executed to change the cray-sat-podman package version on the manager NCNs.\nNOTE: Refer to the command output from step 2 for the names of all CFS configurations that were updated, which may not necessarily be just ncn-personalization. If multiple configurations were updated in step 2, then a cray cfs sessions create command should be run for each of them. This example assumes a single configuration named ncn-personalization was updated. If multiple were updated, set cfs_configurations to a space-separated list below.\nncn-m001# cfs_configurations=\u0026#34;ncn-personalization\u0026#34; ncn-m001# for cfs_configuration in ${cfs_configurations} do cray cfs sessions create --name \u0026#34;sat-session-${cfs_configuration}\u0026#34; --configuration-name \\ \u0026#34;${cfs_configuration}\u0026#34; --configuration-limit sat-ncn; done Monitor the progress of each CFS session.\nThis step assumes a single session named sat-session-ncn-personalization was created in the previous step.\nFirst, list all containers associated with the CFS session:\nncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \\ -o json | jq \u0026#39;.items[0].spec.containers[] | .name\u0026#39; \u0026#34;inventory\u0026#34; \u0026#34;ansible-1\u0026#34; \u0026#34;istio-proxy\u0026#34; Next, get the logs for the ansible-1 container.\nNOTE: the trailing digit might differ from \u0026ldquo;1\u0026rdquo;. It is the zero-based index of the sat-ncn layer within the configuration\u0026rsquo;s layers.\nncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \\ --selector=cfsession=sat-session-ncn-personalization Ansible plays, which are run by the CFS session, will install SAT on all the manager NCNs on the system. Successful results for all of the manager NCN xnames can be found at the end of the container log. For example:\n... PLAY RECAP ********************************************************************* x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 Execute this step for each unique CFS configuration.\nNOTE: Ensure that the PLAY RECAPs for each session show successes for all manager NCNs before proceeding.\nVerify the new version of the SAT CLI.\nNOTE: This version number will differ from the version number of the SAT release distribution. This is the semantic version of the SAT Python package, which is different from the version number of the overall SAT release distribution.\nncn-m001# sat --version 3.9.0 " +}, +{ + "uri": "/docs-sat/en-23/introduction/", + "title": "Introduction to SAT", + "tags": [], + "description": "", + "content": "Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components.\nSAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.\nSix Kibana Dashboards are included with SAT. They provide organized output for system health information.\nAER Kibana Dashboard ATOM Kibana Dashboard Heartbeat Kibana Dashboard Kernel Kibana Dashboard MCE Kibana Dashboard Rasdaemon Kibana Dashboard Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and are reported through Redfish.\nGrafana Fabric Congestion Dashboard Grafana Fabric Errors Dashboard Grafana Fabric Port State Dashboard Grafana Fabric RFC3635 Dashboard SAT is installed as a separate product as part of the HPE Cray EX System base installation.\nSystem Admin Toolkit Command Overview Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides instruction on the SAT Container Environment.\nSAT Command Line Utility The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes (ncn-m nodes).\nIt is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are similarities between SAT commands and xt commands used on the Cray XC platform.\nSAT Commands The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each have their own set of options.\nSAT Container Environment The sat command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes manager nodes. A few important points about the SAT container environment include the following:\nUsing either sat or sat bash always launches a container. The SAT container does not have access to the NCN file system. There are two ways to run sat.\nInteractive: Launching a container using sat bash, followed by a sat command. Non-interactive: Running a sat command directly on a Kubernetes manager node. In both of these cases, a container is launched in the background to execute the command. The first option, running sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the container is launched, executes the command, and upon the command\u0026rsquo;s completion the container exits. The following two examples show the same action, checking the system status, using interactive and non-interactive modes.\nInteractive ncn-m001# sat bash (CONTAINER-ID)sat-container# sat status Non-interactive ncn-m001# sat status Interactive Advantages Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral container storage. If multiple sat commands are being run in succession, then use sat bash to launch the container beforehand. This will save time because the container does not need to be launched for each sat command.\nNon-interactive Advantages The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of several steps that need to be executed from a management NCN.\nMan Pages - Interactive and Non-interactive Modes To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following example.\nncn-m001# sat-man status A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed either with man sat or man sat-podman from the manager node.\nncn-m001# man sat ncn-m001# man sat-podman Command Prompt Conventions in SAT The host name in a command prompt indicates where the command must be run. The account that must run the command is also indicated in the prompt.\nThe root or super-user account always has the # character at the end of the prompt and has the host name of the host in the prompt. Any non-root account is indicated with account@hostname\u0026gt;. A user account that is neither root nor crayadm is referred to as user. The command prompt inside the SAT container environment is indicated with the string as follows. It also has the \u0026ldquo;#\u0026rdquo; character at the end of the prompt. Command Prompt Meaning ncn-m001# Run on one of the Kubernetes Manager servers. (Non-interactive) (CONTAINER_ID) sat-container# Run the command inside the SAT container environment by first running sat bash. (Interactive) Examples of the sat status command used by an administrator:\nncn-m001# sat status ncn-m001# sat bash (CONTAINER_ID) sat-container# sat status SAT Dependencies Most sat subcommands depend on services or components from other products in the HPE Cray EX (Shasta) software stack. The following list shows these dependencies for each subcommand. Each service or component is listed under the product it belongs to.\nsat auth CSM Keycloak sat bmccreds CSM System Configuration Service (SCSD) sat bootprep CSM Boot Orchestration Service (BOS) Configuration Framework Service (CFS) Image Management Service (IMS) Version Control Service (VCS) Kubernetes S3 sat bootsys CSM Boot Orchestration Service (BOS) Cray Advanced Platform Monitoring and Control (CAPMC) Ceph Compute Rolling Upgrade Service (CRUS) Etcd Firmware Action Service (FAS) Hardware State Manager (HSM) Kubernetes S3 COS Node Memory Dump (NMD) sat diag CSM Hardware State Manager (HSM) CSM-Diag Fox sat firmware CSM Firmware Action Service (FAS) sat hwhist CSM Hardware State Manager (HSM) sat hwinv CSM Hardware State Manager (HSM) sat hwmatch CSM Hardware State Manager (HSM) sat init None\nsat k8s CSM Kubernetes sat nid2xname CSM Hardware State Manager (HSM) sat sensors CSM Hardware State Manager (HSM) HM Collector SMA Telemetry API sat setrev CSM S3 sat showrev CSM Hardware State Manager (HSM) Kubernetes S3 sat slscheck CSM Hardware State Manager (HSM) Kubernetes S3 sat status CSM Hardware State Manager (HSM) sat swap Slingshot Fabric Manager sat switch Deprecated: See sat swap\nsat xname2nid CSM Hardware State Manager (HSM) " +}, +{ + "uri": "/docs-sat/en-23/dashboards/sat_grafana_dashboards/", + "title": "SAT Grafana Dashboards", + "tags": [], + "description": "", + "content": "SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity.\nGrafana can be accessed via web browser at the following URL:\nhttps://sma-grafana.cmn.\u0026lt;site-domain\u0026gt; The value of site-domain can be obtained as follows:\nncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=\u0026#39;{.data.customizations\\.yaml}\u0026#39; | \\ base64 -d | grep \u0026#34;external:\u0026#34; That command will produce the following output, for example:\nexternal: EXAMPLE_DOMAIN.com This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com\nFor additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the SMA product documentation.\nFor more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry Kafka Topics in the SMA product documentation.\nNavigate SAT Grafana Dashboards There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display telemetry in a tabular format.\nDashboard Name Display Type Fabric Congestion Chart Panels Fabric RFC3635 Chart Panels Fabric Errors Tabular Format Fabric Port State Tabular Format The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location during the time range selected, if any. The interval setting is not used for tabular dashboards.\nSAT Grafana Interval and Locations Options Shows the Interval and Locations Options for the available telemetry.\nThe value of the Interval option sets the time resolution of the received telemetry. This works a bit like a histogram, with the available telemetry in an interval of time going into a \u0026ldquo;bucket\u0026rdquo; and averaging out to a single point on the chart or table. The special value auto will choose an interval based on the time range selected.\nFor additional information, refer to Grafana Templates and Variables.\nThe Locations option allows restriction of the telemetry shown by locations, either individual links or all links in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, which always has entries for all links and switches, although the errors shown are restricted to the selected time range.\nThe chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart\u0026rsquo;s legend or the trace on the chart.\nGrafana Fabric Congestion Dashboard SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in the system and assess the past and present health of the high-speed network. It also allows the ability to drill down to view data for specific ports on specific switches.\nThis dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, local, and global and correspond to the link\u0026rsquo;s relationship to the network topology. The locations presented in the panels are restricted to the values (any combination, defaults to \u0026ldquo;all\u0026rdquo;) selected.\nThe metric values for links of a given port type are similar in value to each other but very distinct from the values of other types. If the values for different port types are all plotted together, the values for links with lower values are indistinguishable from zero when plotted.\nThe port type of a link is reported as a port state \u0026ldquo;subtype\u0026rdquo; event when defined at port initialization.\nGrafana Fabric Errors Dashboard This dashboard reports error counters in a tabular format in three panels.\nThere is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nUnlike other dashboards, the locations presented are all locations in the system rather than having telemetry within the time range selected. However, the values are taken from telemetry within the time range.\nGrafana Fabric Port State Dashboard There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nThe Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours results in all states for all links in the system being shown.\nThe three columns named, group, switch, and port are not port state events, but extra information included with all port state events.\nGrafana Fabric RFC3635 Dashboard For additional information on performance counters, refer to Definitions of Managed Objects for the Ethernet-like Interface Types, an Internet standards document.\nBecause these metrics are counters that only increase over time, the values plotted are the change in the counter\u0026rsquo;s value over the interval setting.\n" +}, +{ + "uri": "/docs-sat/en-23/dashboards/sat_kibana_dashboards/", + "title": "SAT Kibana Dashboards", + "tags": [], + "description": "", + "content": "SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information.\nKibana can be accessed via web browser at the following URL:\nhttps://sma-kibana.cmn.\u0026lt;site-domain\u0026gt; The value of site-domain can be obtained as follows:\nncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=\u0026#39;{.data.customizations\\.yaml}\u0026#39; | \\ base64 -d | grep \u0026#34;external:\u0026#34; That command will produce the following output, for example:\nexternal: EXAMPLE_DOMAIN.com This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com\nFor additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product documentation.\nAdditional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this table.\nDashboard Short Description Long Description Kibana Visualization and Search Name sat-aer AER corrected Corrected Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-corrected Search: sat-aer-corrected sat-aer AER fatal Fatal Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-fatal Search: sat-aer-fatal sat-atom ATOM failures Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. sat-atom-failed sat-atom ATOM admindown Application Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch. sat-atom-admindown sat-heartbeat Heartbeat loss events Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. sat-heartbeat sat-kernel Kernel assertions The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. sat-kassertions sat-kernel Kernel panics The kernel panics when something is seriously wrong. The node goes down. sat-kernel-panic sat-kernel Lustre bugs (LBUGs) The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. sat-lbug sat-kernel CPU stalls CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. sat-cpu-stall sat-kernel Out of memory An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. sat-oom sat-mce MCE Machine Check Exceptions (MCE) are errors detected at the processor level. sat-mce sat-rasdaemon rasdaemon errors Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. sat-rasdaemon-error sat-rasdaemon rasdaemon messages All messages from the rasdaemon service on nodes. sat-rasdaemon Disable Search Highlighting in Kibana Dashboard By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.\nThe Kibana Dashboard should be open on your system.\nNavigate to Management\nNavigate to Advanced Settings in the Kibana section, below the Elastic search section\nScroll down to the Discover section\nChange Highlight results from on to off\nClick Save to save changes\nAER Kibana Dashboard The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors are split up into separate visualizations depending on whether they are fatal or corrected errors.\nView the AER Kibana Dashboard Go to the dashboard section.\nSelect sat-aer dashboard.\nChoose the time range of interest.\nView the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nATOM Kibana Dashboard The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health checks and application test failures. Some test failures are of possible interest even though a node is not marked admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide clues if a node otherwise fails. They might also show application problems.\nView the ATOM Kibana Dashboard HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.\nGo to the dashboard section.\nSelect sat-atom dashboard.\nChoose the time range of interest.\nView any nodes marked admindown and any ATOM test failures. These failures occur during health checks and application test failures. Test failures marked admindown are important to note. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nHeartbeat Kibana Dashboard The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss.\nView the Heartbeat Kibana Dashboard Go to the dashboard section.\nSelect sat-heartbeat dashboard.\nChoose the time range of interest.\nView the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.\nKernel Kibana Dashboard The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using too much memory.\nView the Kernel Kibana Dashboard Go to the dashboard section.\nSelect sat-kernel dashboard.\nChoose the time range of interest.\nView the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nMCE Kibana Dashboard The MCE Dashboard displays CPU detected processor-level hardware errors.\nView the MCE Kibana Dashboard Go to the dashboard section.\nSelect sat-mce dashboard.\nChoose the time range of interest.\nView the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nRasdaemon Kibana Dashboard The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one for only messages of severity \u0026ldquo;emerg\u0026rdquo; or \u0026ldquo;err\u0026rdquo; and another for all messages from rasdaemon.\nView the Rasdaemon Kibana Dashboard Go to the dashboard section.\nSelect sat-rasdaemon dashboard.\nChoose the time range of interest.\nView the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\n" +}, +{ + "uri": "/docs-sat/en-23/release_notes/", + "title": "SAT Release Notes", + "tags": [], + "description": "", + "content": "SAT Release Notes Summary of Changes in SAT 2.3 The 2.3.4 version of the SAT product includes:\nVersion 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None.\nCurrent Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share.\nFiles in the current working directory must be specified using relative paths to that directory, because the current working directory is always mounted on /sat/share. Absolute paths should be avoided, and paths that are outside of $HOME or $PWD are never accessible to the container environment.\nThe home directory is still mounted on the same path inside the container as it is on the host.\nChanges to sat bootsys The following options were added to sat bootsys.\n--bos-limit --recursive The --bos-limit option passes a given limit string to a BOS session. The --recursive option specifies a slot or other higher-level component in the limit string\nChanges to sat bootprep The --delete-ims-jobs option was added to sat bootprep run. It deletes IMS jobs after sat bootprep is run. Jobs are no longer deleted by default.\nChanges to sat status sat status now includes information about nodes\u0026rsquo; CFS configuration statuses, such as desired configuration, configuration status, and error count.\nThe output of sat status now splits different component types into different report tables.\nThe following options were added to sat status.\n--hsm-fields, --sls-fields, --cfs-fields --bos-template The --hsm-fields, --sls-fields, --cfs-fields options limit the output columns according to specified CSM services.\nThe --bos-template option filters the status report according to the specified session template\u0026rsquo;s boot sets.\nCompatibility with CSM 1.2 The following components were modified to be compatible with CSM 1.2.\nsat-cfs-install container image and Helm chart sat-install-utility container image SAT product installer GPG Checking The sat-ncn ansible role provided by sat-cfs-install was modified to enable GPG checks on packages while leaving GPG checks disabled on repository metadata.\nSecurity Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and refreshed Python dependency versions.\nBug Fixes Minor bug fixes were made in each of the repositories. For full change lists, see each repository’s CHANGELOG.md file.\nThe known issues listed under the SAT 2.2 release were fixed.\nSummary of changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022.\nThis version of the SAT product included:\nVersion 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components:\nVersion 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release.\nKnown issues in SAT 2.2 sat command unavailable in sat bash shell After launching a shell within the SAT container with sat bash, the sat command will not be found. For example:\n(CONTAINER-ID) sat-container:~ # sat status bash: sat: command not found This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be prepended to the $PATH environment variable:\n(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH (CONTAINER-ID) sat-container:~ # sat status Or, the file /sat/venv/bin/activate may be sourced:\n(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate (CONTAINER-ID) sat-container:~ # sat status Tab completion unavailable in sat bash shell After launching a shell within the SAT container with sat bash, tab completion for sat commands does not work.\nThis can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash:\nsource /etc/bash_completion.d/sat-completion.bash OCI runtime permission error when running sat in root directory sat commands will not work if the current directory is /. For example:\nncn-m001:/ # sat --help Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error To resolve, run sat in another directory.\nDuplicate mount error when running sat in config directory sat commands will not work if the current directory is ~/.config/sat. For example:\nncn-m001:~/.config/sat # sat --help Error: /root/.config/sat: duplicate mount destination To resolve, run sat in another directory.\nNew sat commands sat bootprep automates the creation of CFS configurations, the build and customization of IMS images, and the creation of BOS session templates. See SAT Bootprep for details. sat slscheck performs a check for consistency between the System Layout Service (SLS) and the Hardware State Manager (HSM). sat bmccreds provides a simple interface for interacting with the System Configuration Service (SCSD) to set BMC Redfish credentials. sat hwhist displays hardware component history by xname (location) or by its Field-Replaceable Unit ID (FRUID). This command queries the Hardware State Manager (HSM) API to obtain this information. Since the sat hwhist command supports querying for the history of a component by its FRUID, the FRUID of components has been added to the output of sat hwinv. Additional Install Automation The following automation has been added to the install script, install.sh:\nWait for the completion of the sat-config-import Kubernetes job, which is started when the sat-cfs-install Helm chart is deployed. Automate the modification of the CFS configuration, which applies to master management NCNs (e.g. \u0026ldquo;ncn-personalization\u0026rdquo;). Changes to Product Catalog Data Schema The SAT product uploads additional information to the cray-product-catalog Kubernetes ConfigMap detailing the components it provides, including container (Docker) images, Helm charts, RPMs, and package repositories.\nThis information is used to support uninstall and activation of SAT product versions moving forward.\nSupport for Uninstall and Activation of SAT Versions Beginning with the 2.2 release, SAT now provides partial support for the uninstall and activation of the SAT product stream.\nSee Uninstall: Removing a Version of SAT and Activate: Switching Between Versions for details.\nImprovements to sat status A Subrole column has been added to the output of sat status. This allows you to easily differentiate between master, worker, and storage nodes in the management role, for example.\nHostname information from SLS has been added to sat status output.\nAdded Support for JSON Output Support for JSON-formatted output has been added to commands which currently support the --format option, such as hwinv, status, and showrev.\nUsability Improvements Many usability improvements have been made to multiple sat commands, mostly related to filtering command output. The following are some highlights:\nAdded --fields option to display only specific fields for subcommands which display tabular reports. Added ability to filter on exact matches of a field name. Improved handling of multiple matches of a field name in --filter queries so that the first match is used, similar to --sort-by. Added support for --filter, --fields, and --reverse for summaries displayed by sat hwinv. Added borders to summary tables generated by sat hwinv. Improved documentation in the man pages. Default Log Level Changed The default log level for stderr has been changed from \u0026ldquo;WARNING\u0026rdquo; to \u0026ldquo;INFO\u0026rdquo;. For details, see SAT Logging.\nMore Granular Log Level Configuration Options With the command-line options --loglevel-stderr and --loglevel-file, the log level can now be configured separately for stderr and the log file.\nThe existing --loglevel option is now an alias for the --loglevel-stderr option.\nPodman Wrapper Script Improvements The Podman wrapper script is the script installed at /usr/bin/sat on the master management NCNs by the cray-sat-podman RPM that runs the cray-sat container in podman. The following subsections detail improvements that were made to the wrapper script in this release.\nMounting of $HOME and Current Directories in cray-sat Container The Podman wrapper script that launches the cray-sat container with podman has been modified to mount the user\u0026rsquo;s current directory and home directory into the cray-sat container to provide access to local files in the container.\nPodman Wrapper Script Documentation Improvements The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:\nEnvironment variables that affect execution of the wrapper script Host files and directories mounted in the container Fixes to Podman Wrapper Script Output Redirection Fixed issues with redirecting stdout and stderr, and piping output to commands, such as awk, less, and more.\nConfigurable HTTP Timeout A new sat option has been added to configure the HTTP timeout length for requests to the API gateway. See sat-man sat for details.\nsat bootsys Improvements Many improvements and fixes have been made to sat bootsys. The following are some highlights:\nAdded the --excluded-ncns option, which can be used to omit NCNs from the platform-services and ncn-power stages in case they are inaccessible. Disruptive shutdown stages in sat bootsys shutdown now prompt the user to continue before proceeding. A new option, --disruptive, will bypass this. Improvements to Ceph service health checks and restart during the platform-services stage of sat bootsys boot. sat xname2nid Improvements sat xname2nid can now recursively expand slot, chassis, and cabinet xnames to a list of nids in those locations.\nA new --format option has been added to sat xname2nid. It sets the output format to either \u0026ldquo;range\u0026rdquo; (the default) or \u0026ldquo;nid\u0026rdquo;. The \u0026ldquo;range\u0026rdquo; format displays nids in a compressed range format suitable for use with a workload manager like Slurm.\nUsage of v2 HSM API The commands which interact with HSM (e.g., sat status and sat hwinv) now use the v2 HSM API.\nsat diag Limited to HSN Switches sat diag will now only operate against HSN switches by default. These are the only controllers that support running diagnostics with HMJTD.\nsat showrev Enhancements A column has been added to the output of sat showrev that indicates whether a product version is \u0026ldquo;active\u0026rdquo;. The definition of \u0026ldquo;active\u0026rdquo; varies across products, and not all products may set an \u0026ldquo;active\u0026rdquo; version.\nFor SAT, the active version is the one with its hosted-type package repository in Nexus set as the member of the group-type package repository in Nexus, meaning that it will be used when installing the cray-sat-podman RPM.\ncray-sat Container Image Size Reduction The size of the cray-sat container image has been approximately cut in half by leveraging multi-stage builds. This also improved the repeatability of the unit tests by running them in the container.\nBug Fixes Minor bug fixes were made in cray-sat and in cray-sat-podman. For full change lists, see each repository\u0026rsquo;s CHANGELOG.md file.\nSummary of SAT changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5.\nThis version of the SAT product included:\nVersion 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component:\nVersion 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release.\nInstall Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. The cray-sat-podman RPM is no longer installed in the management non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on all master management NCNs via an Ansible playbook which is referenced by a layer of the CFS configuration that applies to management NCNs. This CFS configuration is typically named \u0026ldquo;ncn-personalization\u0026rdquo;.\nThe SAT product now includes a Docker image and a Helm chart named sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart with Loftsman. This helm chart deploys a Kubernetes job that imports the SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. This repository is referenced by the layer added to the NCN personalization CFS configuration.\nRemoval of Direct Redfish Access All commands which used to access Redfish directly have either been removed or modified to use higher-level service APIs. This includes the following commands:\nsat sensors sat diag sat linkhealth The sat sensors command has been rewritten to use the SMA telemetry API to obtain the latest sensor values. The command\u0026rsquo;s usage has changed slightly, but legacy options work as before, so it is backwards compatible. Additionally, new commands have been added.\nThe sat diag command has been rewritten to use a new service called Fox, which is delivered with the CSM-diags product. The sat diag command now launches diagnostics using the Fox service, which launches the corresponding diagnostic executables on controllers using the Hardware Management Job and Task Daemon (HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start diagnostics over Redfish.\nThe sat linkhealth command has been removed. Its functionality has been replaced by functionality from the Slingshot Topology Tool (STT) in the fabric manager pod.\nThe Redfish username and password command line options and config file options have been removed. For further instructions, see Remove Obsolete Configuration File Sections.\nAdditional Fields in sat setrev and sat showrev sat setrev now collects the following information from the admin, which is then displayed by sat showrev:\nSystem description Product number Company name Country code Additional guidance and validation has been added to each field collected by sat setrev. This sets the stage for sdu setup to stop collecting this information and instead collect it from sat showrev or its S3 bucket.\nImprovements to sat bootsys The platform-services stage of the sat bootsys boot command has been improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph health in the correct order. The ceph-check stage has been removed as it is no longer needed.\nThe platform-services stage of sat bootsys boot now prompts for confirmation of the storage NCN hostnames in addition to the Kubernetes masters and workers.\nBug Fixes and Security Fixes Improved error handling in sat firmware. Incremented version of Alpine Linux to 3.13.2 to address a security vulnerability. Other Notable Changes Ansible has been removed from the cray-sat container image. Support for the Firmware Update Service (FUS) has been removed from the sat firmware command. Summary of SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1.\nThis version of the SAT product included:\nVersion 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release.\nNew Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames:\nsat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API.\nBug Fixes Fixed a problem in sat swap where creating the offline port policy failed. Changed sat bootsys shutdown --stage bos-operations to no longer forcefully power off all compute nodes and application nodes using CAPMC when BOS sessions complete or time out. Fixed an issue with the command sat bootsys boot --stage cabinet-power. Summary of SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4.\nThis version of the SAT product included the following components:\nVersion 3.4.0 of the sat python package and CLI It also added the following new component:\nVersion 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release.\nSAT as an Independent Product SAT is now packaged and released as an independent product. The product deliverable is called a \u0026ldquo;release distribution\u0026rdquo;. The release distribution is a gzipped tar file containing an install script. This install script loads the cray/cray-sat container image into the Docker registry in Nexus and loads the cray-sat-podman RPM into a package repository in Nexus.\nIn this release, the cray-sat-podman package is still installed in the master and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in Shasta v1.5.\nSAT Running in a Container Under Podman The sat command now runs in a container under Podman. The sat executable is now installed on all nodes in the Kubernetes management cluster (i.e., workers and masters). This executable is a wrapper script that starts a SAT container in Podman and invokes the sat Python CLI within that container. The admin can run individual sat commands directly on the master or worker NCNs as before, or they can run sat commands inside the SAT container after using sat bash to enter an interactive shell inside the SAT container.\nTo view man pages for sat commands, the user can run sat-man SAT_COMMAND, replacing SAT_COMMAND with the name of the sat command. Alternatively, the user can enter the sat container with sat bash and use the man command.\nNew sat init Command and Config File Location Change The default location of the SAT config file has been changed from /etc/sat.toml to ~/.config/sat/sat.toml. A new command, sat init, has been added that initializes a configuration file in the new default directory. This better supports individual users on the system who want their own config files.\n~/.config/sat is mounted into the container that runs under Podman, so changes are persistent across invocations of the sat container. If desired, an alternate configuration directory can be specified with the SAT_CONFIG_DIR environment variable.\nAdditionally, if a config file does not yet exist when a user runs a sat command, one is generated automatically.\nAdditional Types Added to sat hwinv Additional functionality has been added to sat hwinv including:\nList node enclosure power supplies with the --list-node-enclosure-power-supplies option. List node accelerators (e.g., GPUs) with the --list-node-accels option. The count of node accelerators is also included for each node. List node accelerator risers (e.g., Redstone modules) with the --list-node-accel-risers option. The count of node accelerator risers is also included for each node. List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics option. The count of HSN NICs is also included for each node. Documentation for these new options has been added to the man page for sat hwinv.\nSite Information Stored by sat setrev in S3 The sat setrev and sat showrev commands now use S3 to store and obtain site information, including system name, site name, serial number, install date, and system type. Since the information is stored in S3, it will now be consistent regardless of the node on which sat is executed.\nAs a result of this change, S3 credentials must be configured for SAT. For detailed instructions, see Generate SAT S3 Credentials.\nProduct Version Information Shown by sat showrev sat showrev now shows product information from the cray-product-catalog ConfigMap in Kubernetes.\nAdditional Changes to sat showrev The output from sat showrev has also been changed in the following ways:\nThe --docker and --packages options were considered misleading and have been removed. Information pertaining to only to the local host, where the command is run, has been moved to the output of the --local option. Removal of sat cablecheck The sat cablecheck command has been removed. To verify that the system\u0026rsquo;s Slingshot network is cabled correctly, admins should now use the show cables command in the Slingshot Topology Tool (STT).\nsat swap Command Compatibility with Next-gen Fabric Controller The sat swap command was added in Shasta v1.3.2. This command used the Fabric Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the Fabric Controller API, so this command has been rewritten to use the new backwards-incompatible API. Usage of the command did not change.\nsat bootsys Functionality Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken by changes introduced in Shasta v1.4, which removed the Ansible inventory and playbooks.\nThe functionality in the platform-services stage of sat bootsys has been re-implemented to use python directly instead of Ansible. This resulted in a more robust procedure with better logging to the sat log file. Failures to stop containers on Kubernetes nodes are handled more gracefully, and more information about the containers that failed to stop, including how to debug the problem, is included.\nImprovements were made to console logging setup for non-compute nodes (NCNs) when they are shut down and booted.\nThe following improvements were made to the bos-operations stage of sat bootsys:\nMore information about the BOS sessions, BOA jobs, and BOA pods is printed. A command-line option, --bos-templates, and a corresponding config-file option, bos_templates, were added, and the --cle-bos-template and --uan-bos-template options and their corresponding config file options were deprecated. The following functionality has been removed from sat bootsys:\nThe hsn-bringup stage of sat bootsys boot has been removed due to removal of the underlying Ansible playbook. The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is now a manual procedure. Log File Location Change The location of the sat log file has changed from /var/log/cray/sat.log to /var/log/cray/sat/sat.log. This change simplifies mounting this file into the sat container running under Podman.\nSummary of SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.\nThe following sections detail the changes in this release.\nsat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable.\nThe sat swap switch command is equivalent to sat switch. The sat switch command will be removed in a future release.\nAddition of Stages to sat bootsys Command The sat bootsys command now has multiple stages for both the boot and shutdown actions. Please refer to the \u0026ldquo;System Power On Procedures\u0026rdquo; and \u0026ldquo;System Power Off Procedures\u0026rdquo; sections of the Cray Shasta Administration Guide (S-8001) for more details on using this command in the context of a full system power off and power on.\nSummary of SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI.\nThis version of the sat CLI contained the following commands:\nauth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch See the System Admin Toolkit Command Overview and the table of commands in the SAT Authentication section of this document for more details on each of these commands.\n" +}, +{ + "uri": "/docs-sat/en-23/usage/", + "title": "SAT Usage", + "tags": [], + "description": "", + "content": "SAT Usage SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates based on a given input file which defines how those configurations, images, and session templates should be created.\nThis automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similarly to other SAT commands.\nncn-m001# sat-man sat-bootprep SAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session.\nsat bootsys automates several portions of the boot and shutdown processes, including (but not limited to) performing BOS operations (such as creating BOS sessions), powering on and off cabinets, and checking the state of the system prior to shutdown.\nEditing a bootprep input file The input file provided to sat bootprep is a YAML-formatted file containing information which CFS, IMS, and BOS use to create configurations, images, and BOS session templates respectively. Writing and modifying these input files is the main task associated with using sat bootprep. An input file is composed of three main sections, one each for configurations, images, and session templates. These sections may be specified in any order, and any of the sections may be omitted if desired.\nCreating CFS configurations The configurations section begins with a configurations: key.\n--- configurations: Under this key, the user can list one or more configurations to create. For each configuration, a name should be given, in addition to the list of layers which comprise the configuration. Each layer can be defined by a product name and optionally a version number, or commit hash or branch in the product\u0026rsquo;s configuration repository. Alternatively, a layer can be defined by a Git repository URL directly, along with an associated branch or commit hash.\nWhen a configuration layer is specified in terms of a product name, the layer is created in CFS by looking up relevant configuration information (including the configuration repository and commit information) from the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be supplied, but if it is absent, the version is assumed to be the latest version found in the cray-product-catalog.\n--- configurations: - name: example-configuration layers: - name: example product playbook: example.yml product: name: example version: 1.2.3 Alternatively, a configuration layer may be specified by explicitly referencing the desired configuration repository, along with the branch containing the intended version of the Ansible playbooks. A commit hash may be specified by replacing branch with commit.\n... - name: another example product playbook: another-example.yml git: url: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34; branch: main ... When sat bootprep is run against an input file, a CFS configuration will be created corresponding to each configuration in the configurations section. For example, the configuration created from an input file with the layers listed above might look something like the following:\n{ \u0026#34;lastUpdated\u0026#34;: \u0026#34;2022-02-07T21:47:49Z\u0026#34;, \u0026#34;layers\u0026#34;: [ { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;example.yml\u0026#34; }, { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;another example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;another-example.yml\u0026#34; } ], \u0026#34;name\u0026#34;: \u0026#34;example-configuration\u0026#34; } Creating IMS images After specifying configurations, the user may add images to the input file which are to be built by IMS. To add an images section, the user should add an images key.\n--- configurations: ... (omitted for brevity) images: Under the images key, the user may define one or more images to be created in a list. Each element of the list defines a separate IMS image to be built and/or configured. Images must contain a name, as well as an ims section containing a definition of the image to be built and/or configured. Images may be defined by an image recipe, or by a pre-built image. Recipes and pre-built images are referred to by their names or IDs in IMS. The ims section should also contain an is_recipe property, which indicates whether the name or ID refers to an image recipe or a pre-built image. Images may also optionally provide a text description of the image. This description is not stored or used by sat bootprep or any CSM services, but is useful for documenting images in the input file.\n--- configurations: ... (omitted for brevity) images: - name: example-compute-image description: \u0026gt; An example compute node image for illustrative purposes. ims: name: example-compute-image-recipe is_recipe: true - name: another-example-compute-image description: \u0026gt; Another example compute node image. ims: id: \u0026lt;IMS image UUID\u0026gt; is_recipe: false Images may also contain a configuration property in their definition, which specifies a configuration with which to customize the built image prior to booting. If a configuration is specified, then configuration groups must also be specified using the configuration_group_names property.\n--- configurations: ... (omitted for brevity) images: - name: example-compute-image description: \u0026gt; An example compute node image for illustrative purposes. ims: name: example-compute-image-recipe is_recipe: true configuration: example configuration configuration_group_names: - Compute Creating BOS session templates BOS session templates are the final section of the input file, and are defined under the session_templates key.\n--- configurations: ... (omitted for brevity) images: ... (omitted for brevity) session_templates: Each session template is defined in terms of its name, an image, a configuration, and a set of parameters which can be used to configure the session. The name, image, and configuration are specified with their respective name, image, and configuration keys. bos_parameters may also be specified; currently, the only setting under bos_parameters that is supported is boot_sets, which can be used to define boot sets in the BOS session template. Each boot set is defined under its own property under boot_sets, and the value of each boot set can contain the following properties, all of which are optional:\nkernel_parameters: the parameters passed to the kernel on the command line network: the network over which the nodes will boot node_list: nodes to add to the boot set node_roles_groups: HSM roles to add to the boot set node_groups: HSM groups to add to the boot set rootfs_provider: the root file system provider rootfs_provider_passthrough: parameters to add to the rootfs= kernel parameter The properties listed previously are the same as the parameters that can be specified directly through BOS boot sets. More information can be found in the CSM documentation on session templates. Additional properties not listed are passed through to the BOS session template as written.\nAn example session template might look like the following:\nconfigurations: ... (omitted for brevity) images: ... (omitted for brevity) session_templates: - name: example-session-template image: example-image configuration: example-configuration bos_parameters: boot_sets: example_boot_set: kernel_parameters: ip=dhcp quiet node_list: [] rootfs_provider: cpss3 rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0 Example bootprep input files Putting together all of the previous input file sections, an example bootprep input file might look something like the following.\n--- configurations: - name: cos-config layers: - name: cos-integration-2.2.87 playbook: site.yml product: name: cos version: 2.2.87 branch: integration - name: cpe-integration-21.12.3 playbook: pe_deploy.yml product: name: cpe version: 21.12.3 branch: integration - name: slurm-master-1.1.1 playbook: site.yml product: name: slurm version: 1.1.1 branch: master images: - name: cray-shasta-compute-sles15sp3.x86_64-2.2.35 ims: is_recipe: true name: cray-shasta-compute-sles15sp3.x86_64-2.2.35 configuration: cos-config configuration_group_names: - Compute session_templates: - name: cray-shasta-compute-sles15sp3.x86_64-2.2.35 image: cray-shasta-compute-sles15sp3.x86_64-2.2.35 configuration: cos-config bos_parameters: boot_sets: compute: kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN} node_roles_groups: - Compute Creating a pre-populated example bootprep input file It is possible to create an example bootprep input file using values from the system\u0026rsquo;s product catalog using the sat bootprep generate-example command.\nncn-m001# sat bootprep generate-example INFO: Using latest version (2.3.24-20220113160653) of product cos INFO: Using latest version (21.11.4) of product cpe INFO: Using latest version (1.0.7) of product slurm INFO: Using latest version (1.1.24) of product analytics INFO: Using latest version (2.1.5) of product uan INFO: Using latest version (21.11.4) of product cpe INFO: Using latest version (1.0.7) of product slurm INFO: Using latest version (1.1.24) of product analytics INFO: Using latest version (2.3.24-20220113160653) of product cos INFO: Using latest version (2.1.5) of product uan INFO: Wrote example bootprep input file to ./example-bootprep-input.yaml. This file should be reviewed and edited to match the desired parameters of the configurations, images, and session templates.\nViewing built-in generated documentation The contents of the YAML input files described above must conform to a schema which defines the structure of the data. The schema definition is written using the JSON Schema format. (Although the format is named \u0026ldquo;JSON Schema\u0026rdquo;, the schema itself is written in YAML as well.) More information, including introductory materials and a formal specification of the JSON Schema metaschema, can be found on the JSON Schema website.\nViewing the exact schema specification To view the exact schema specification, run sat bootprep view-schema.\nncn-m001# sat bootprep view-schema --- $schema: \u0026#34;https://json-schema.org/draft-07/schema\u0026#34; title: Bootprep Input File description: \u0026gt; A description of the set of CFS configurations to create, the set of IMS images to create and optionally customize with the defined CFS configurations, and the set of BOS session templates to create that reference the defined images and configurations. type: object additionalProperties: false properties: ... Generating user-friendly documentation The raw schema definition can be difficult to understand without experience working with JSON Schema specifications. For this reason, a feature was included which can generate user-friendly HTML documentation for the input file schema which can be browsed with the user\u0026rsquo;s preferred web browser.\nCreate a documentation tarball using sat bootprep.\nncn-m001# sat bootprep generate-docs INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz An alternate output directory can be specified with the --output-dir option. The generated tarball is always named bootprep-schema-docs.tar.gz.\nncn-m001# sat bootprep generate-docs --output-dir /tmp INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz From another machine, copy the tarball to a local directory.\nanother-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz . Extract the contents of the tarball and open the contained index.html.\nanother-machine$ tar xzvf bootprep-schema-docs.tar.gz x bootprep-schema-docs/ x bootprep-schema-docs/index.html x bootprep-schema-docs/schema_doc.css x bootprep-schema-docs/schema_doc.min.js another-machine$ open bootprep-schema-docs/index.html " +}, +{ + "uri": "/docs-sat/en-23/categories/", + "title": "Categories", + "tags": [], + "description": "", + "content": "" +}, +{ + "uri": "/docs-sat/en-23/tags/", + "title": "Tags", + "tags": [], + "description": "", + "content": "" +}] \ No newline at end of file diff --git a/en-23/index.xml b/en-23/index.xml new file mode 100644 index 0000000000..62096a20a3 --- /dev/null +++ b/en-23/index.xml @@ -0,0 +1,54 @@ + + + + HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + /docs-sat/en-23/ + Recent content in HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-23 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Installation + /docs-sat/en-23/install/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-23/install/ + SAT Installation Install the System Admin Toolkit Product Stream Describes how to install the System Admin Toolkit (SAT) product stream. Prerequisites CSM is installed and verified. cray-product-catalog is running. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace 2.2.x with the version of the SAT product stream being installed. + + + Introduction to SAT + /docs-sat/en-23/introduction/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-23/introduction/ + Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview. + + + SAT Grafana Dashboards + /docs-sat/en-23/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-23/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.cmn.&lt;site-domain&gt; The value of site-domain can be obtained as follows: ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=&#39;{.data.customizations\.yaml}&#39; | \ base64 -d | grep &#34;external:&#34; That command will produce the following output, for example: + + + SAT Kibana Dashboards + /docs-sat/en-23/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-23/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + SAT Release Notes + /docs-sat/en-23/release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-23/release_notes/ + SAT Release Notes Summary of Changes in SAT 2.3 The 2.3.4 version of the SAT product includes: Version 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None. Current Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share. + + + SAT Usage + /docs-sat/en-23/usage/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-23/usage/ + SAT Usage SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates based on a given input file which defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similarly to other SAT commands. ncn-m001# sat-man sat-bootprep SAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session. + + + diff --git a/en-23/install/index.html b/en-23/install/index.html new file mode 100644 index 0000000000..e598887d68 --- /dev/null +++ b/en-23/install/index.html @@ -0,0 +1,1505 @@ + + + + + + + + + + + + SAT Installation :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Installation

+

Install the System Admin Toolkit Product Stream

+

Describes how to install the System Admin Toolkit (SAT) product stream.

+

Prerequisites

+
    +
  • CSM is installed and verified.
  • +
  • cray-product-catalog is running.
  • +
  • There must be at least 2 gigabytes of free space on the manager NCN on which the +procedure is run.
  • +
+

Notes on the Procedures

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace 2.2.x with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
  • To upgrade SAT, execute the pre-installation, installation, and post-installation +procedures for a newer distribution. The newly installed version will become +the default.
  • +
+

Pre-Installation Procedure

+
    +
  1. +

    Start a typescript.

    +

    The typescript will record the commands and the output from this installation.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
+

Installation Procedure

+
    +
  1. +

    Copy the release distribution gzipped tar file to ncn-m001.

    +
  2. +
  3. +

    Unzip and extract the release distribution, 2.2.x.

    +
    ncn-m001# tar -xvzf sat-2.2.x.tar.gz
    +
  4. +
  5. +

    Change directory to the extracted release distribution directory.

    +
    ncn-m001# cd sat-2.2.x
    +
  6. +
  7. +

    Run the installer: install.sh.

    +

    The script produces a lot of output. A successful install ends with “SAT +version 2.2.x has been installed”.

    +
    ncn-m001# ./install.sh
    +...
    +====> Updating active CFS configurations
    +...
    +====> SAT version 2.2.x has been installed.
    +
  8. +
  9. +

    Upgrade only: Record the names of the CFS configuration or +configurations modified by install.sh.

    +

    The install.sh script attempts to modify any CFS configurations that apply +to the master management NCNs. During an upgrade, install.sh will log +messages indicating the CFS configuration or configurations that were +modified. For example, if there are three master nodes all using the same +CFS configuration named “ncn-personalization”, the output would look like +this:

    +
    ====> Updating active CFS configurations
    +INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0
    +INFO: Found configuration "ncn-personalization" for component x3000c0s1b0n0
    +INFO: Found configuration "ncn-personalization" for component x3000c0s3b0n0
    +INFO: Found configuration "ncn-personalization" for component x3000c0s5b0n0
    +INFO: Updating CFS configuration "ncn-personalization"
    +INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml in configuration "ncn-personalization".
    +INFO: Key "name" in layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-ncn to sat-2.2.16
    +INFO: Successfully updated layers in configuration "ncn-personalization"
    +

    Save the name of each CFS configuration updated by the installer. In the +previous example, a single configuration named “ncn-personalization” was +updated, so that name is saved to a temporary file.

    +
    ncn-m001# echo ncn-personalization >> /tmp/sat-ncn-cfs-configurations.txt
    +

    Repeat the previous command for each CFS configuration that was updated.

    +
  10. +
  11. +

    Upgrade only: Save the new name of the SAT CFS configuration layer.

    +

    In the example install.sh output above, the new layer name is +sat-2.2.16. Save this value to a file to be used later.

    +
    ncn-m001# echo sat-2.2.16 > /tmp/sat-layer-name.txt
    +
  12. +
  13. +

    Fresh install only: Save the CFS configuration layer for SAT to a file +for later use.

    +

    The install.sh script attempts to modify any CFS configurations that apply +to the master management NCNs. During a fresh install, no such CFS +configurations will be found, and it will instead log the SAT configuration +layer that must be added to the CFS configuration that will be created. Here +is an example of the output in that case:

    +
    ====> Updating active CFS configurations
    +INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0
    +WARNING: No CFS configurations found that apply to components with role Management and subrole Master.
    +INFO: The following sat layer should be used in the CFS configuration that will be applied to NCNs with role Management and subrole Master.
    +{
    +    "name": "sat-2.2.15",
    +    "commit": "9a74b8f5ba499af6fbcecfd2518a40e081312933",
    +    "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
    +    "playbook": "sat-ncn.yml"
    +}
    +

    Save the JSON output to a file for later use. For example:

    +
    ncn-m001# cat > /tmp/sat-layer.json <<EOF
    +> {
    +>     "name": "sat-2.2.15",
    +>     "commit": "9a74b8f5ba499af6fbcecfd2518a40e081312933",
    +>     "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
    +>     "playbook": "sat-ncn.yml"
    +> }
    +> EOF
    +

    Do not copy the previous command verbatim. Use the JSON output from the +install.sh script.

    +
  14. +
+

Post-Installation Procedure

+
    +
  1. +

    Optional: Remove the SAT release distribution tar file and extracted directory.

    +
    ncn-m001# rm sat-2.2.x.tar.gz
    +ncn-m001# rm -rf sat-2.2.x/
    +
  2. +
  3. +

    Upgrade only: Ensure that the environment variable SAT_TAG is not set +in the ~/.bashrc file on any of the management NCNs.

    +

    NOTE: This step should only be required when updating from +Shasta 1.4.1 or Shasta 1.4.2.

    +

    The following example assumes three manager NCNs: ncn-m001, ncn-m002, and ncn-m003, +and shows output from a system in which no further action is needed.

    +
    ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
    +ncn-m001: source <(kubectl completion bash)
    +ncn-m003: source <(kubectl completion bash)
    +ncn-m002: source <(kubectl completion bash)
    +

    The following example shows that SAT_TAG is set in ~/.bashrc on ncn-m002. +Remove that line from the ~/.bashrc file on ncn-m002.

    +
    ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
    +ncn-m001: source <(kubectl completion bash)
    +ncn-m002: source <(kubectl completion bash)
    +ncn-m002: export SAT_TAG=3.5.0
    +ncn-m003: source <(kubectl completion bash)
    +
  4. +
  5. +

    Stop the typescript.

    +

    NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT install. See Next Steps.

    +
    ncn-m001# exit
    +
  6. +
+

SAT version 2.2.x is now installed/upgraded, meaning the SAT 2.2.x release +has been loaded into the system software repository.

+
    +
  • SAT configuration content for this release has been uploaded to VCS.
  • +
  • SAT content for this release has been uploaded to the CSM product catalog.
  • +
  • SAT content for this release has been uploaded to Nexus repositories.
  • +
  • The sat command won’t be available until the NCN Personalization +procedure has been executed.
  • +
+

Next Steps

+

If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.

+

If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the sections listed below.

+

NOTE: The NCN Personalization procedure is required when +upgrading SAT. The setup procedures in SAT Setup, however, are +not required when upgrading SAT. They should have been executed +during the first installation of SAT.

+

Execute the NCN Personalization procedure:

+ +

If performing a fresh install, execute the SAT Setup procedures:

+ +

If performing an upgrade, execute the upgrade procedures:

+ +

Perform NCN Personalization

+

Describes how to perform NCN personalization using CFS. This personalization process +will configure the System Admin Toolkit (SAT) product stream.

+

Prerequisites

+
    +
  • The Install the System Admin Toolkit Product Stream +procedure has been successfully completed.
  • +
  • If upgrading, the names of the CFS configurations updated during installation +were saved to the file /tmp/sat-ncn-cfs-configurations.txt.
  • +
  • If upgrading, the name of the new SAT CFS configuration layer was saved to +the file /tmp/sat-layer-name.txt.
  • +
  • If performing a fresh install, the SAT CFS configuration layer was saved to +the file /tmp/sat-layer.json.
  • +
+

Notes on the Procedure

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace 2.2.x with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
  • If upgrading SAT, the existing configuration will likely include other Cray EX product +entries. Update the SAT entry as described in this procedure. The HPE Cray EX System +Software Getting Started Guide provides guidance on how and when to update the +entries for the other products.
  • +
+

Procedure

+
    +
  1. +

    Start a typescript if not already using one.

    +

    The typescript will capture the commands and the output from this installation procedure.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
  3. +

    Fresh install only: Add the SAT layer to the NCN personalization JSON file.

    +

    If the SAT install script, install.sh, did not identify and modify the CFS +configurations that apply to each master management NCN, it will have printed +the SAT CFS configuration layer in JSON format. This layer must be added to +the JSON file being used to construct the CFS configuration. For example, +if the file being used is named ncn-personalization.json, and the SAT +layer was saved to the file /tmp/sat-layer.json as described in the +install instructions, the following jq command will append the SAT layer +and save the result in a new file named ncn-personalization.json.

    +
    ncn-m001# jq -s '{layers: (.[0].layers + [.[1]])}' ncn-personalization.json \
    +    /tmp/sat-layer.json > ncn-personalization.new.json
    +

    For instructions on how to create a CFS configuration from the previous +file and how to apply it to the management NCNs, refer to “Perform NCN +Personalization” in the HPE Cray System Management Documentation. After +the CFS configuration has been created and applied, return to this +procedure.

    +
  4. +
  5. +

    Upgrade only: Invoke each CFS configuration that was updated during the +upgrade.

    +

    If the SAT install script, install.sh, identified CFS configurations that +apply to the master management NCNs and modified them in place, invoke each +CFS configuration that was created or updated during installation.

    +

    This step will create a CFS session for each given configuration and install +SAT on the associated manager NCNs.

    +

    The --configuration-limit option limits the configuration session to run +only the SAT layer of the configuration.

    +

    You should see a representation of the CFS session in the output.

    +
    ncn-m001# for cfs_configuration in $(cat /tmp/sat-ncn-cfs-configurations.txt);
    +do cray cfs sessions create --name "sat-session-${cfs_configuration}" --configuration-name \
    +    "${cfs_configuration}" --configuration-limit $(cat /tmp/sat-layer-name.txt);
    +done
    +
    +name="sat-session-ncn-personalization"
    +
    +[ansible]
    +...
    +
  6. +
  7. +

    Upgrade only: Monitor the progress of each CFS session.

    +

    This step assumes a single session named sat-session-ncn-personalization was created in the previous step.

    +

    First, list all containers associated with the CFS session:

    +
    ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \
    +    -o json | jq '.items[0].spec.containers[] | .name'
    +"inventory"
    +"ansible-1"
    +"istio-proxy"
    +

    Next, get the logs for the ansible-1 container.

    +

    NOTE: the trailing digit might differ from “1”. It is the zero-based +index of the sat-ncn layer within the configuration’s layers.

    +
    ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
    +    --selector=cfsession=sat-session-ncn-personalization
    +

    Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:

    +
    ...
    +PLAY RECAP *********************************************************************
    +x3000c0s1b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s3b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s5b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +

    Execute this step for each unique CFS configuration.

    +

    NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.

    +
  8. +
  9. +

    Verify that SAT was successfully configured.

    +

    If sat is configured, the --version command will indicate which version +is installed. If sat is not properly configured, the command will fail.

    +

    NOTE: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the sat Python package, +which is different from the version number of the overall SAT release distribution.

    +
    ncn-m001# sat --version
    +sat 3.7.0
    +

    NOTE: Upon first running sat, you may see additional output while the sat +container image is downloaded. This will occur the first time sat is run on +each manager NCN. For example, if you run sat for the first time on ncn-m001 +and then for the first time on ncn-m002, you will see this additional output +both times.

    +
    Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
    +Getting image source signatures
    +Copying blob da64e8df3afc done
    +Copying blob 0f36fd81d583 done
    +Copying blob 12527cf455ba done
    +...
    +sat 3.7.0
    +
  10. +
  11. +

    Stop the typescript.

    +
    ncn-m001# exit
    +
  12. +
+

SAT version 2.2.x is now configured:

+
    +
  • The SAT RPM package is installed on the associated NCNs.
  • +
+

Next Steps

+

If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.

+

If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.

+

If performing a fresh install, execute the SAT Setup procedures:

+ +

If performing an upgrade, execute the SAT Post-Upgrade procedures:

+ +

SAT Authentication

+

Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in +later steps of the install process. The admin account used to authenticate with sat auth must be enabled in +Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see +Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation. +For additional information on SAT authentication, see System Security and Authentication in the CSM +documentation.

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Description of SAT Command Authentication Types

+

Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to +the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to +be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket, +the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be +done on every Kubernetes manager node where SAT commands are run.

+

Below is a table describing SAT commands and the types of authentication they require.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SAT SubcommandAuthentication/Credentials RequiredMan PageDescription
sat authResponsible for authenticating to the API gateway and storing a token.sat-authAuthenticate to the API gateway and save the token.
sat bmccredsRequires authentication to the API gateway.sat-bmccredsSet BMC passwords.
sat bootprepRequires authentication to the API gateway. Requires kubernetes configuration and authentication, which is done on ncn-m001 during the install.sat-bootprepPrepare to boot nodes with images and configurations.
sat bootsysRequires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages.sat-bootsysBoot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software.
sat diagRequires authentication to the API gateway.sat-diagLaunch diagnostics on the HSN switches and generate a report.
sat firmwareRequires authentication to the API gateway.sat-firmwareReport firmware version.
sat hwhistRequires authentication to the API gateway.sat-hwhistReport hardware component history.
sat hwinvRequires authentication to the API gateway.sat-hwinvGive a listing of the hardware of the HPE Cray EX system.
sat hwmatchRequires authentication to the API gateway.sat-hwmatchReport hardware mismatches.
sat initNonesat-initCreate a default SAT configuration file.
sat k8sRequires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install.sat-k8sReport on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node).
sat linkhealthThis command has been deprecated.
sat nid2xnameRequires authentication to the API gateway.sat-nid2xnameTranslate node IDs to node xnames.
sat sensorsRequires authentication to the API gateway.sat-sensorsReport current sensor data.
sat setrevRequires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-setrevSet HPE Cray EX system revision information.
sat showrevRequires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-showrevPrint revision information for the HPE Cray EX system.
sat slscheckRequires authentication to the API gateway.sat-slscheckPerform a cross-check between SLS and HSM.
sat statusRequires authentication to the API gateway.sat-statusReport node status across the HPE Cray EX system.
sat swapRequires authentication to the API gateway.sat-swapPrepare HSN switch or cable for replacement and bring HSN switch or cable into service.
sat xname2nidRequires authentication to the API gateway.sat-xname2nidTranslate node and node BMC xnames to node IDs.
sat switchThis command has been deprecated. It has been replaced by sat swap.
+

In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password +on the command line. The username value is obtained from the following locations, in order of higher precedence to lower +precedence:

+
    +
  • The --username global command-line option.
  • +
  • The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml.
  • +
  • The name of currently logged in user running the sat command.
  • +
+

If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to +~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above, +and will use the token for that username if it has been obtained and saved by sat auth.

+

Prerequisites

+ +

Procedure

+

The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:

+
    +
  1. +

    Generate a default SAT configuration file, if one does not exist.

    +
    ncn-m001# sat init
    +Configuration file "/root/.config/sat/sat.toml" generated.
    +

    Note: If the config file already exists, it will print out an error:

    +
    ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
    +Not generating configuration file.
    +
  2. +
  3. +

    Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file. E.g.:

    +
    username = "crayadmin"
    +
  4. +
  5. +

    Run sat auth. Enter your password when prompted. E.g.:

    +
    ncn-m001# sat auth
    +Password for crayadmin:
    +Succeeded!
    +
  6. +
  7. +

    Other sat commands are now authenticated to make requests to the API gateway. E.g.:

    +
    ncn-m001# sat status
    +
  8. +
+

Generate SAT S3 Credentials

+

Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT +S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. +This must be done on every Kubernetes master node where SAT commands are run.

+

SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev +(see: Run Sat Setrev to Set System Information).

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Ensure the files are readable only by root.

    +
    ncn-m001# touch /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
    ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
  2. +
  3. +

    Write the credentials to local files using kubectl.

    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.access_key}' | base64 -d > \
    +    /root/.config/sat/s3_access_key
    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.secret_key}' | base64 -d > \
    +    /root/.config/sat/s3_secret_key
    +
  4. +
  5. +

    Verify the S3 endpoint specified in the SAT configuration file is correct.

    +
      +
    1. +

      Get the SAT configuration file’s endpoint value.

      +

      NOTE: If the command’s output is commented out, indicated by an initial # +character, the SAT configuration will take the default value – "https://rgw-vip.nmn".

      +
      ncn-m001# grep endpoint ~/.config/sat/sat.toml
      +# endpoint = "https://rgw-vip.nmn"
      +
    2. +
    3. +

      Get the sat-s3-credentials secret’s endpoint value.

      +
      ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
      +    jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
      +https://rgw-vip.nmn
      +
    4. +
    5. +

      Compare the two endpoint values.

      +

      If the values differ, change the SAT configuration file’s endpoint value to match the secret’s.

      +
    6. +
    +
  6. +
  7. +

    Copy SAT configurations to each manager node on the system.

    +
    ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
    +    mkdir -p /root/.config/sat; \
    +    scp -pr /root/.config/sat ${i}:/root/.config; done
    +

    NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may +be different. This example assumes three manager nodes, where the configuration files must be +copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 +and ncn-m003.

    +
  8. +
+

Run sat setrev to Set System Information

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Run sat setrev to set System Revision Information. Follow the on-screen prompts to set +the following site-specific values:

    +
      +
    • Serial number
    • +
    • System name
    • +
    • System type
    • +
    • System description
    • +
    • Product number
    • +
    • Company name
    • +
    • Site name
    • +
    • Country code
    • +
    • System install date
    • +
    +

    TIP: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. I.e., “System type” is EX-1C.

    +
    ncn-m001# sat setrev
    +--------------------------------------------------------------------------------
    +Setting:        Serial number
    +Purpose:        System identification. This will affect how snapshots are
    +                identified in the HPE backend services.
    +Description:    This is the top-level serial number which uniquely identifies
    +                the system. It can be requested from an HPE representative.
    +Valid values:   Alpha-numeric string, 4 - 20 characters.
    +Type:           <class 'str'>
    +Default:        None
    +Current value:  None
    +--------------------------------------------------------------------------------
    +Please do one of the following to set the value of the above setting:
    +    - Input a new value
    +    - Press CTRL-C to exit
    +...
    +
  2. +
  3. +

    Run sat showrev to verify System Revision Information. The following tables contain example information.

    +
    ncn-m001# sat showrev
    +################################################################################
    +System Revision Information
    +################################################################################
    ++---------------------+---------------+
    +| component           | data          |
    ++---------------------+---------------+
    +| Company name        | HPE           |
    +| Country code        | US            |
    +| Interconnect        | Sling         |
    +| Product number      | R4K98A        |
    +| Serial number       | 12345         |
    +| Site name           | HPE           |
    +| Slurm version       | slurm 20.02.5 |
    +| System description  | Test System   |
    +| System install date | 2021-01-29    |
    +| System name         | eniac         |
    +| System type         | EX-1C         |
    ++---------------------+---------------+
    +################################################################################
    +Product Revision Information
    +################################################################################
    ++--------------+-----------------+------------------------------+------------------------------+
    +| product_name | product_version | images                       | image_recipes                |
    ++--------------+-----------------+------------------------------+------------------------------+
    +| csm          | 0.8.14          | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
    +| sat          | 2.0.1           | -                            | -                            |
    +| sdu          | 1.0.8           | -                            | -                            |
    +| slingshot    | 0.8.0           | -                            | -                            |
    +| sma          | 1.4.12          | -                            | -                            |
    ++--------------+-----------------+------------------------------+------------------------------+
    +################################################################################
    +Local Host Operating System
    +################################################################################
    ++-----------+----------------------+
    +| component | version              |
    ++-----------+----------------------+
    +| Kernel    | 5.3.18-24.15-default |
    +| SLES      | SLES 15-SP2          |
    ++-----------+----------------------+
    +
  4. +
+

Remove obsolete configuration file sections

+

Prerequisites

+ +

Procedure

+

After upgrading SAT, if using the configuration file from a previous version, there may be +configuration file sections no longer used in the new version. For example, when upgrading +from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. +In that case, the following warning may appear upon running sat commands.

+
WARNING: Ignoring unknown section 'redfish' in config file.
+

Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.

+
[redfish]
+username = "admin"
+password = "adminpass"
+

Repeat this process for any configuration file sections for which there are “unknown section” warnings.

+

SAT Logging

+

As of SAT version 2.2, some command output that was previously printed to stdout +is now logged to stderr. These messages are logged at the INFO level. The +default logging threshold was changed from WARNING to INFO to accomodate +this logging change. Additionally, some messages previously logged at the INFO +are now logged at the DEBUG level.

+

These changes take effect automatically. However, if the default output threshold +has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure +that important output is shown in the terminal.

+

Update Configuration

+

In the following example, the stderr log level, logging.stderr_level, is set to +WARNING, which will exclude INFO-level logging from terminal output.

+
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+

To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.

+

If logging.stderr_level is commented out, its value will not affect logging +behavior. However, it may be helpful set its value to INFO as a reminder of +the new default behavior.

+

Affected Commands

+

The following commands trigger messages that have been changed from stdout +print calls to INFO-level (or WARNING- or ERROR-level) log messages:

+
sat bootsys --stage shutdown --stage session-checks
+sat sensors
+

The following commands trigger messages that have been changed from INFO-level +log messages to DEBUG-level log messages:

+
sat nid2xname
+sat xname2nid
+sat swap
+

Uninstall: Removing a Version of SAT

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be uninstalled with prodmgr.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    Use sat showrev to list versions of SAT.

    +

    NOTE: It is not recommended to uninstall a version designated as “active”. +If the active version is uninstalled, then the activate procedure must be executed +on a remaining version.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +| product_name | product_version | active | images            | image_recipes         |
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +| sat          | 2.3.3           | True   | -                 | -                     |
    +| sat          | 2.2.10          | False  | -                 | -                     |
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +
  2. +
  3. +

    Use prodmgr to uninstall a version of SAT.

    +

    This command will do three things:

    +
      +
    • Remove all hosted-type package repositories associated with the given version of SAT. Group-type +repositories are not removed.
    • +
    • Remove all container images associated with the given version of SAT.
    • +
    • Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up +in the output of sat showrev.
    • +
    +
    ncn-m001# prodmgr uninstall sat 2.2.10
    +Repository sat-2.2.10-sle-15sp2 has been removed.
    +Removed Docker image cray/cray-sat:3.9.0
    +Removed Docker image cray/sat-cfs-install:1.0.2
    +Removed Docker image cray/sat-install-utility:1.4.0
    +Deleted sat-2.2.10 from product catalog.
    +
  4. +
+

Activate: Switching Between Versions

+

This procedure can be used to downgrade the active version of SAT.

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be activated. Older versions must be activated manually.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    Use sat showrev to list versions of SAT.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| product_name | product_version | active | images             | image_recipes         |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| sat          | 2.3.3           | True   | -                  | -                     |
    +| sat          | 2.2.10          | False  | -                  | -                     |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +
  2. +
  3. +

    Use prodmgr to activate a different version of SAT.

    +

    This command will do three things:

    +
      +
    • For all hosted-type package repositories associated with this version of SAT, set them as the sole member +of their corresponding group-type repository. For example, activating SAT version 2.2.10 +sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group.
    • +
    • Set the version 2.2.10 as active within the product catalog, so that it appears active in the output of +sat showrev.
    • +
    • Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are +associated with NCNs with the role “Management” and subrole “Master” (for example, the CFS configuration +ncn-personalization). Specifically, it will ensure that the layer refers to the version of SAT CFS +configuration content associated with the version of SAT being activated.
    • +
    +
    ncn-m001# prodmgr activate sat 2.2.10
    +Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2.
    +Set sat-2.2.10 as active in product catalog.
    +Updated CFS configurations: [ncn-personalization]
    +
  4. +
  5. +

    Verify that the chosen version is marked as active.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| product_name | product_version | active | images             | image_recipes         |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| sat          | 2.3.3           | False  | -                  | -                     |
    +| sat          | 2.2.10          | True   | -                  | -                     |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +
  6. +
  7. +

    Run NCN Personalization.

    +

    At this point, the command has modified Nexus package repositories to set a particular package repository +as active, but no packages on the NCNs have been changed. In order to complete the activation process, +NCN Personalization must be executed to change the cray-sat-podman package version on the manager NCNs.

    +

    NOTE: Refer to the command output from step 2 for the names of all CFS configurations that were updated, +which may not necessarily be just ncn-personalization. If multiple configurations were updated in step 2, then +a cray cfs sessions create command should be run for each of them. This example assumes a single configuration +named ncn-personalization was updated. If multiple were updated, set cfs_configurations to a space-separated +list below.

    +
    ncn-m001# cfs_configurations="ncn-personalization"
    +ncn-m001# for cfs_configuration in ${cfs_configurations}
    +do cray cfs sessions create --name "sat-session-${cfs_configuration}" --configuration-name \
    +    "${cfs_configuration}" --configuration-limit sat-ncn;
    +done
    +
  8. +
  9. +

    Monitor the progress of each CFS session.

    +

    This step assumes a single session named sat-session-ncn-personalization was created in the previous step.

    +

    First, list all containers associated with the CFS session:

    +
    ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \
    +    -o json | jq '.items[0].spec.containers[] | .name'
    +"inventory"
    +"ansible-1"
    +"istio-proxy"
    +

    Next, get the logs for the ansible-1 container.

    +

    NOTE: the trailing digit might differ from “1”. It is the zero-based +index of the sat-ncn layer within the configuration’s layers.

    +
    ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
    +    --selector=cfsession=sat-session-ncn-personalization
    +

    Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:

    +
    ...
    +PLAY RECAP *********************************************************************
    +x3000c0s1b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s3b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s5b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +

    Execute this step for each unique CFS configuration.

    +

    NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.

    +
  10. +
  11. +

    Verify the new version of the SAT CLI.

    +

    NOTE: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the SAT Python package, +which is different from the version number of the overall SAT release distribution.

    +
    ncn-m001# sat --version
    +3.9.0
    +
  12. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-23/introduction/index.html b/en-23/introduction/index.html new file mode 100644 index 0000000000..0e051c8096 --- /dev/null +++ b/en-23/introduction/index.html @@ -0,0 +1,823 @@ + + + + + + + + + + + + Introduction to SAT :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Introduction to SAT

+

About System Admin Toolkit (SAT)

+

The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.

+

SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.

+

Six Kibana Dashboards are included with SAT. They provide organized output for system health information.

+ +

Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.

+ +

SAT is installed as a separate product as part of the HPE Cray EX System base installation.

+

System Admin Toolkit Command Overview

+

Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.

+

SAT Command Line Utility

+

The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes +(ncn-m nodes).

+

It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.

+

SAT Commands

+

The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.

+

SAT Container Environment

+

The sat command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes +manager nodes. A few important points about the SAT container environment include the following:

+
    +
  • Using either sat or sat bash always launches a container.
  • +
  • The SAT container does not have access to the NCN file system.
  • +
+

There are two ways to run sat.

+
    +
  • Interactive: Launching a container using sat bash, followed by a sat command.
  • +
  • Non-interactive: Running a sat command directly on a Kubernetes manager node.
  • +
+

In both of these cases, a container is launched in the background to execute the command. The first option, running +sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the +container is launched, executes the command, and upon the command’s completion the container exits. The following two +examples show the same action, checking the system status, using interactive and non-interactive modes.

+

Interactive

+
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+

Non-interactive

+
ncn-m001# sat status
+

Interactive Advantages

+

Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral +container storage. If multiple sat commands are being run in succession, then use sat bash to launch the +container beforehand. This will save time because the container does not need to be launched for each sat command.

+

Non-interactive Advantages

+

The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of +several steps that need to be executed from a management NCN.

+

Man Pages - Interactive and Non-interactive Modes

+

To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following +example.

+
ncn-m001# sat-man status
+

A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed +either with man sat or man sat-podman from the manager node.

+
ncn-m001# man sat
+
ncn-m001# man sat-podman
+

Command Prompt Conventions in SAT

+

The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.

+
    +
  • The root or super-user account always has the # character at the end of the prompt and has the host name of the +host in the prompt.
  • +
  • Any non-root account is indicated with account@hostname>. A user account that is neither root nor crayadm is +referred to as user.
  • +
  • The command prompt inside the SAT container environment is indicated with the string as follows. It also has the “#” +character at the end of the prompt.
  • +
+ + + + + + + + + + + + + + + + + +
Command PromptMeaning
ncn-m001#Run on one of the Kubernetes Manager servers. (Non-interactive)
(CONTAINER_ID) sat-container#Run the command inside the SAT container environment by first running sat bash. (Interactive)
+

Examples of the sat status command used by an administrator:

+
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+

SAT Dependencies

+

Most sat subcommands depend on services or components from other products in the +HPE Cray EX (Shasta) software stack. The following list shows these dependencies +for each subcommand. Each service or component is listed under the product it belongs to.

+

sat auth

+

CSM

+
    +
  • Keycloak
  • +
+

sat bmccreds

+

CSM

+
    +
  • System Configuration Service (SCSD)
  • +
+

sat bootprep

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Configuration Framework Service (CFS)
  • +
  • Image Management Service (IMS)
  • +
  • Version Control Service (VCS)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat bootsys

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Cray Advanced Platform Monitoring and Control (CAPMC)
  • +
  • Ceph
  • +
  • Compute Rolling Upgrade Service (CRUS)
  • +
  • Etcd
  • +
  • Firmware Action Service (FAS)
  • +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

COS

+
    +
  • Node Memory Dump (NMD)
  • +
+

sat diag

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

CSM-Diag

+
    +
  • Fox
  • +
+

sat firmware

+

CSM

+
    +
  • Firmware Action Service (FAS)
  • +
+

sat hwhist

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwinv

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwmatch

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat init

+

None

+

sat k8s

+

CSM

+
    +
  • Kubernetes
  • +
+

sat nid2xname

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat sensors

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • HM Collector
  • +
+

SMA

+
    +
  • Telemetry API
  • +
+

sat setrev

+

CSM

+
    +
  • S3
  • +
+

sat showrev

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat slscheck

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat status

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat swap

+

Slingshot

+
    +
  • Fabric Manager
  • +
+

sat switch

+

Deprecated: See sat swap

+

sat xname2nid

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-23/release_notes/index.html b/en-23/release_notes/index.html new file mode 100644 index 0000000000..07443f28eb --- /dev/null +++ b/en-23/release_notes/index.html @@ -0,0 +1,1132 @@ + + + + + + + + + + + + SAT Release Notes :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Release Notes

+

Summary of Changes in SAT 2.3

+

The 2.3.4 version of the SAT product includes:

+
    +
  • Version 3.15.4 of the sat python package and CLI
  • +
  • Version 1.6.11 of the sat-podman wrapper script
  • +
  • Version 1.2.0 of the sat-cfs-install container image
  • +
  • Version 2.0.0 of the sat-cfs-install Helm chart
  • +
  • Version 1.5.0 of the sat-install-utility container image
  • +
  • Version 2.0.3 of the cfs-config-util container image
  • +
+

New sat Commands

+

None.

+

Current Working Directory in SAT Container

+

When running sat commands, the current working directory is now mounted in the +container as /sat/share, and the current working directory within the container +is also /sat/share.

+

Files in the current working directory must be specified using relative paths to +that directory, because the current working directory is always mounted on /sat/share. +Absolute paths should be avoided, and paths that are outside of $HOME or $PWD +are never accessible to the container environment.

+

The home directory is still mounted on the same path inside the container as it +is on the host.

+

Changes to sat bootsys

+

The following options were added to sat bootsys.

+
    +
  • --bos-limit
  • +
  • --recursive
  • +
+

The --bos-limit option passes a given limit string to a BOS session. The --recursive +option specifies a slot or other higher-level component in the limit string

+

Changes to sat bootprep

+

The --delete-ims-jobs option was added to sat bootprep run. It deletes IMS +jobs after sat bootprep is run. Jobs are no longer deleted by default.

+

Changes to sat status

+

sat status now includes information about nodes’ CFS configuration statuses, such +as desired configuration, configuration status, and error count.

+

The output of sat status now splits different component types into different report tables.

+

The following options were added to sat status.

+
    +
  • --hsm-fields, --sls-fields, --cfs-fields
  • +
  • --bos-template
  • +
+

The --hsm-fields, --sls-fields, --cfs-fields options limit the output columns +according to specified CSM services.

+

The --bos-template option filters the status report according to the specified +session template’s boot sets.

+

Compatibility with CSM 1.2

+

The following components were modified to be compatible with CSM 1.2.

+
    +
  • sat-cfs-install container image and Helm chart
  • +
  • sat-install-utility container image
  • +
  • SAT product installer
  • +
+

GPG Checking

+

The sat-ncn ansible role provided by sat-cfs-install was modified to enable +GPG checks on packages while leaving GPG checks disabled on repository metadata.

+

Security

+

Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and refreshed +Python dependency versions.

+

Bug Fixes

+

Minor bug fixes were made in each of the repositories. For full change lists, see each +repository’s CHANGELOG.md file.

+

The known issues listed under the SAT 2.2 release were fixed.

+

Summary of changes in SAT 2.2

+

SAT 2.2.16 was released on February 25th, 2022.

+

This version of the SAT product included:

+
    +
  • Version 3.14.0 of the sat python package and CLI
  • +
  • Version 1.6.4 of the sat-podman wrapper script
  • +
  • Version 1.0.4 of the sat-cfs-install container image and Helm chart
  • +
+

It also added the following new components:

+
    +
  • Version 1.4.3 of the sat-install-utility container image
  • +
  • Version 2.0.2 of the cfs-config-util container image
  • +
+

The following sections detail the changes in this release.

+

Known issues in SAT 2.2

+

sat command unavailable in sat bash shell

+

After launching a shell within the SAT container with sat bash, the sat command will not +be found. For example:

+
(CONTAINER-ID) sat-container:~ # sat status
+bash: sat: command not found
+

This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be prepended to the +$PATH environment variable:

+
(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH
+(CONTAINER-ID) sat-container:~ # sat status
+

Or, the file /sat/venv/bin/activate may be sourced:

+
(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate
+(CONTAINER-ID) sat-container:~ # sat status
+

Tab completion unavailable in sat bash shell

+

After launching a shell within the SAT container with sat bash, tab completion for sat +commands does not work.

+

This can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash:

+
source /etc/bash_completion.d/sat-completion.bash
+

OCI runtime permission error when running sat in root directory

+

sat commands will not work if the current directory is /. For example:

+
ncn-m001:/ # sat --help
+Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+

To resolve, run sat in another directory.

+

Duplicate mount error when running sat in config directory

+

sat commands will not work if the current directory is ~/.config/sat. For example:

+
ncn-m001:~/.config/sat # sat --help
+Error: /root/.config/sat: duplicate mount destination
+

To resolve, run sat in another directory.

+

New sat commands

+
    +
  • sat bootprep automates the creation of CFS configurations, the build and +customization of IMS images, and the creation of BOS session templates. See +SAT Bootprep for details.
  • +
  • sat slscheck performs a check for consistency between the System Layout +Service (SLS) and the Hardware State Manager (HSM).
  • +
  • sat bmccreds provides a simple interface for interacting with the System +Configuration Service (SCSD) to set BMC Redfish credentials.
  • +
  • sat hwhist displays hardware component history by xname (location) or by +its Field-Replaceable Unit ID (FRUID). This command queries the Hardware +State Manager (HSM) API to obtain this information. Since the sat hwhist +command supports querying for the history of a component by its FRUID, the +FRUID of components has been added to the output of sat hwinv.
  • +
+

Additional Install Automation

+

The following automation has been added to the install script, install.sh:

+
    +
  • Wait for the completion of the sat-config-import Kubernetes job, which is +started when the sat-cfs-install Helm chart is deployed.
  • +
  • Automate the modification of the CFS configuration, which applies to master +management NCNs (e.g. “ncn-personalization”).
  • +
+

Changes to Product Catalog Data Schema

+

The SAT product uploads additional information to the cray-product-catalog +Kubernetes ConfigMap detailing the components it provides, including container +(Docker) images, Helm charts, RPMs, and package repositories.

+

This information is used to support uninstall and activation of SAT product +versions moving forward.

+

Support for Uninstall and Activation of SAT Versions

+

Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and activation of the SAT product stream.

+

See Uninstall: Removing a Version of SAT +and Activate: Switching Between Versions +for details.

+

Improvements to sat status

+

A Subrole column has been added to the output of sat status. This allows you +to easily differentiate between master, worker, and storage nodes in the +management role, for example.

+

Hostname information from SLS has been added to sat status output.

+

Added Support for JSON Output

+

Support for JSON-formatted output has been added to commands which currently +support the --format option, such as hwinv, status, and showrev.

+

Usability Improvements

+

Many usability improvements have been made to multiple sat commands, +mostly related to filtering command output. The following are some highlights:

+
    +
  • Added --fields option to display only specific fields for subcommands which +display tabular reports.
  • +
  • Added ability to filter on exact matches of a field name.
  • +
  • Improved handling of multiple matches of a field name in --filter queries +so that the first match is used, similar to --sort-by.
  • +
  • Added support for --filter, --fields, and --reverse for summaries +displayed by sat hwinv.
  • +
  • Added borders to summary tables generated by sat hwinv.
  • +
  • Improved documentation in the man pages.
  • +
+

Default Log Level Changed

+

The default log level for stderr has been changed from “WARNING” to “INFO”. For +details, see SAT Logging.

+

More Granular Log Level Configuration Options

+

With the command-line options --loglevel-stderr and --loglevel-file, the log level +can now be configured separately for stderr and the log file.

+

The existing --loglevel option is now an alias for the --loglevel-stderr option.

+

Podman Wrapper Script Improvements

+

The Podman wrapper script is the script installed at /usr/bin/sat on the +master management NCNs by the cray-sat-podman RPM that runs the cray-sat +container in podman. The following subsections detail improvements that were +made to the wrapper script in this release.

+

Mounting of $HOME and Current Directories in cray-sat Container

+

The Podman wrapper script that launches the cray-sat container with podman +has been modified to mount the user’s current directory and home directory into +the cray-sat container to provide access to local files in the container.

+

Podman Wrapper Script Documentation Improvements

+

The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:

+
    +
  • Environment variables that affect execution of the wrapper script
  • +
  • Host files and directories mounted in the container
  • +
+

Fixes to Podman Wrapper Script Output Redirection

+

Fixed issues with redirecting stdout and stderr, and piping output to commands, +such as awk, less, and more.

+

Configurable HTTP Timeout

+

A new sat option has been added to configure the HTTP timeout length for +requests to the API gateway. See sat-man sat for details.

+

sat bootsys Improvements

+

Many improvements and fixes have been made to sat bootsys. The following are some +highlights:

+
    +
  • Added the --excluded-ncns option, which can be used to omit NCNs +from the platform-services and ncn-power stages in case they are +inaccessible.
  • +
  • Disruptive shutdown stages in sat bootsys shutdown now prompt the user to +continue before proceeding. A new option, --disruptive, will bypass this.
  • +
  • Improvements to Ceph service health checks and restart during the platform-services +stage of sat bootsys boot.
  • +
+

sat xname2nid Improvements

+

sat xname2nid can now recursively expand slot, chassis, and cabinet xnames to +a list of nids in those locations.

+

A new --format option has been added to sat xname2nid. It sets the output format to +either “range” (the default) or “nid”. The “range” format displays nids in a +compressed range format suitable for use with a workload manager like Slurm.

+

Usage of v2 HSM API

+

The commands which interact with HSM (e.g., sat status and sat hwinv) now +use the v2 HSM API.

+

sat diag Limited to HSN Switches

+

sat diag will now only operate against HSN switches by default. These are the +only controllers that support running diagnostics with HMJTD.

+

sat showrev Enhancements

+

A column has been added to the output of sat showrev that indicates whether a +product version is “active”. The definition of “active” varies across products, +and not all products may set an “active” version.

+

For SAT, the active version is the one with its hosted-type package repository in +Nexus set as the member of the group-type package repository in Nexus, +meaning that it will be used when installing the cray-sat-podman RPM.

+

cray-sat Container Image Size Reduction

+

The size of the cray-sat container image has been approximately cut in half by +leveraging multi-stage builds. This also improved the repeatability of the unit +tests by running them in the container.

+

Bug Fixes

+

Minor bug fixes were made in cray-sat and in cray-sat-podman. For full change lists, +see each repository’s CHANGELOG.md file.

+

Summary of SAT changes in Shasta v1.5

+

We released version 2.1.16 of the SAT product in Shasta v1.5.

+

This version of the SAT product included:

+
    +
  • Version 3.7.4 of the sat python package and CLI
  • +
  • Version 1.4.10 of the sat-podman wrapper script
  • +
+

It also added the following new component:

+
    +
  • Version 1.0.3 of the sat-cfs-install docker image and helm chart
  • +
+

The following sections detail the changes in this release.

+

Install Changes to Separate Product from CSM

+

This release further decouples the installation of the SAT product from the CSM +product. The cray-sat-podman RPM is no longer installed in the management +non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on +all master management NCNs via an Ansible playbook which is referenced by a +layer of the CFS configuration that applies to management NCNs. This CFS +configuration is typically named “ncn-personalization”.

+

The SAT product now includes a Docker image and a Helm chart named +sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart +with Loftsman. This helm chart deploys a Kubernetes job that imports the +SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. +This repository is referenced by the layer added to the NCN personalization +CFS configuration.

+

Removal of Direct Redfish Access

+

All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:

+
    +
  • sat sensors
  • +
  • sat diag
  • +
  • sat linkhealth
  • +
+

The sat sensors command has been rewritten to use the SMA telemetry API to +obtain the latest sensor values. The command’s usage has changed slightly, but +legacy options work as before, so it is backwards compatible. Additionally, new +commands have been added.

+

The sat diag command has been rewritten to use a new service called Fox, which +is delivered with the CSM-diags product. The sat diag command now launches +diagnostics using the Fox service, which launches the corresponding diagnostic +executables on controllers using the Hardware Management Job and Task Daemon +(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start +diagnostics over Redfish.

+

The sat linkhealth command has been removed. Its functionality has been +replaced by functionality from the Slingshot Topology Tool (STT) in the +fabric manager pod.

+

The Redfish username and password command line options and config file options +have been removed. For further instructions, see Remove Obsolete Configuration +File Sections.

+

Additional Fields in sat setrev and sat showrev

+

sat setrev now collects the following information from the admin, which is then displayed by sat showrev:

+
    +
  • System description
  • +
  • Product number
  • +
  • Company name
  • +
  • Country code
  • +
+

Additional guidance and validation has been added to each field collected by +sat setrev. This sets the stage for sdu setup to stop collecting this +information and instead collect it from sat showrev or its S3 bucket.

+

Improvements to sat bootsys

+

The platform-services stage of the sat bootsys boot command has been +improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph +health in the correct order. The ceph-check stage has been removed as it is no +longer needed.

+

The platform-services stage of sat bootsys boot now prompts for confirmation +of the storage NCN hostnames in addition to the Kubernetes masters and workers.

+

Bug Fixes and Security Fixes

+
    +
  • Improved error handling in sat firmware.
  • +
  • Incremented version of Alpine Linux to 3.13.2 to address a security +vulnerability.
  • +
+

Other Notable Changes

+
    +
  • Ansible has been removed from the cray-sat container image.
  • +
  • Support for the Firmware Update Service (FUS) has been removed from the sat firmware command.
  • +
+

Summary of SAT Changes in Shasta v1.4.1

+

We released version 2.0.4 of the SAT product in Shasta v1.4.1.

+

This version of the SAT product included:

+
    +
  • Version 3.5.0 of the sat python package and CLI.
  • +
  • Version 1.4.3 of the sat-podman wrapper script.
  • +
+

The following sections detail the changes in this release.

+

New Commands to Translate Between NIDs and XNames

+

Two new commands were added to translate between NIDs and XNames:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
+

These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.

+

Bug Fixes

+
    +
  • Fixed a problem in sat swap where creating the offline port policy failed.
  • +
  • Changed sat bootsys shutdown --stage bos-operations to no longer forcefully +power off all compute nodes and application nodes using CAPMC when BOS +sessions complete or time out.
  • +
  • Fixed an issue with the command sat bootsys boot --stage cabinet-power.
  • +
+

Summary of SAT Changes in Shasta v1.4

+

In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.

+

This version of the SAT product included the following components:

+
    +
  • Version 3.4.0 of the sat python package and CLI
  • +
+

It also added the following new component:

+
    +
  • Version 1.4.2 of the sat-podman wrapper script
  • +
+

The following sections detail the changes in this release.

+

SAT as an Independent Product

+

SAT is now packaged and released as an independent product. The product +deliverable is called a “release distribution”. The release distribution is a +gzipped tar file containing an install script. This install script loads the +cray/cray-sat container image into the Docker registry in Nexus and loads the +cray-sat-podman RPM into a package repository in Nexus.

+

In this release, the cray-sat-podman package is still installed in the master +and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in +Shasta v1.5.

+

SAT Running in a Container Under Podman

+

The sat command now runs in a container under Podman. The sat executable is +now installed on all nodes in the Kubernetes management cluster (i.e., workers +and masters). This executable is a wrapper script that starts a SAT container in +Podman and invokes the sat Python CLI within that container. The admin can run +individual sat commands directly on the master or worker NCNs as before, or +they can run sat commands inside the SAT container after using sat bash to +enter an interactive shell inside the SAT container.

+

To view man pages for sat commands, the user can run sat-man SAT_COMMAND, +replacing SAT_COMMAND with the name of the sat command. Alternatively, +the user can enter the sat container with sat bash and use the man command.

+

New sat init Command and Config File Location Change

+

The default location of the SAT config file has been changed from /etc/sat.toml +to ~/.config/sat/sat.toml. A new command, sat init, has been added that +initializes a configuration file in the new default directory. This better supports +individual users on the system who want their own config files.

+

~/.config/sat is mounted into the container that runs under Podman, so changes +are persistent across invocations of the sat container. If desired, an alternate +configuration directory can be specified with the SAT_CONFIG_DIR environment variable.

+

Additionally, if a config file does not yet exist when a user runs a sat +command, one is generated automatically.

+

Additional Types Added to sat hwinv

+

Additional functionality has been added to sat hwinv including:

+
    +
  • List node enclosure power supplies with the --list-node-enclosure-power-supplies option.
  • +
  • List node accelerators (e.g., GPUs) with the --list-node-accels option. The count of +node accelerators is also included for each node.
  • +
  • List node accelerator risers (e.g., Redstone modules) with the --list-node-accel-risers +option. The count of node accelerator risers is also included for each node.
  • +
  • List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics +option. The count of HSN NICs is also included for each node.
  • +
+

Documentation for these new options has been added to the man page for sat hwinv.

+

Site Information Stored by sat setrev in S3

+

The sat setrev and sat showrev commands now use S3 to store and obtain site +information, including system name, site name, serial number, install date, and +system type. Since the information is stored in S3, it will now be consistent +regardless of the node on which sat is executed.

+

As a result of this change, S3 credentials must be configured for SAT. For detailed +instructions, see Generate SAT S3 Credentials.

+

Product Version Information Shown by sat showrev

+

sat showrev now shows product information from the cray-product-catalog +ConfigMap in Kubernetes.

+

Additional Changes to sat showrev

+

The output from sat showrev has also been changed in the following ways:

+
    +
  • The --docker and --packages options were considered misleading and have +been removed.
  • +
  • Information pertaining to only to the local host, where the command is run, +has been moved to the output of the --local option.
  • +
+

Removal of sat cablecheck

+

The sat cablecheck command has been removed. To verify that the system’s Slingshot +network is cabled correctly, admins should now use the show cables command in the +Slingshot Topology Tool (STT).

+

sat swap Command Compatibility with Next-gen Fabric Controller

+

The sat swap command was added in Shasta v1.3.2. This command used the Fabric +Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the +Fabric Controller API, so this command has been rewritten to use the new +backwards-incompatible API. Usage of the command did not change.

+

sat bootsys Functionality

+

Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken +by changes introduced in Shasta v1.4, which removed the Ansible inventory +and playbooks.

+

The functionality in the platform-services stage of sat bootsys has been +re-implemented to use python directly instead of Ansible. This resulted in +a more robust procedure with better logging to the sat log file. Failures +to stop containers on Kubernetes nodes are handled more gracefully, and +more information about the containers that failed to stop, including how to +debug the problem, is included.

+

Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.

+

The following improvements were made to the bos-operations stage +of sat bootsys:

+
    +
  • More information about the BOS sessions, BOA jobs, and BOA pods is printed.
  • +
  • A command-line option, --bos-templates, and a corresponding config-file +option, bos_templates, were added, and the --cle-bos-template and +--uan-bos-template options and their corresponding config file options were +deprecated.
  • +
+

The following functionality has been removed from sat bootsys:

+
    +
  • The hsn-bringup stage of sat bootsys boot has been removed due to removal +of the underlying Ansible playbook.
  • +
  • The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is +now a manual procedure.
  • +
+

Log File Location Change

+

The location of the sat log file has changed from /var/log/cray/sat.log to +/var/log/cray/sat/sat.log. This change simplifies mounting this file into the +sat container running under Podman.

+

Summary of SAT Changes in Shasta v1.3.2

+

Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.

+

The following sections detail the changes in this release.

+

sat swap Command for Switch and Cable Replacement

+

The sat switch command which supported operations for replacing a switch has +been deprecated and replaced with the sat swap command, which now supports +replacing a switch OR cable.

+

The sat swap switch command is equivalent to sat switch. The sat switch +command will be removed in a future release.

+

Addition of Stages to sat bootsys Command

+

The sat bootsys command now has multiple stages for both the boot and +shutdown actions. Please refer to the “System Power On Procedures” and “System +Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001) +for more details on using this command in the context of a full system power off +and power on.

+

Summary of SAT Changes in Shasta v1.3

+

Shasta v1.3 included version 2.2.3 of the sat python package and CLI.

+

This version of the sat CLI contained the following commands:

+
    +
  • auth
  • +
  • bootsys
  • +
  • cablecheck
  • +
  • diag
  • +
  • firmware
  • +
  • hwinv
  • +
  • hwmatch
  • +
  • k8s
  • +
  • linkhealth
  • +
  • sensors
  • +
  • setrev
  • +
  • showrev
  • +
  • status
  • +
  • swap
  • +
  • switch
  • +
+

See the System Admin Toolkit Command Overview +and the table of commands in the SAT Authentication section +of this document for more details on each of these commands.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-23/sitemap.xml b/en-23/sitemap.xml new file mode 100644 index 0000000000..3cc2f26aa9 --- /dev/null +++ b/en-23/sitemap.xml @@ -0,0 +1,308 @@ + + + + /docs-sat/en-23/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-23/dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-23/install/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-23/introduction/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-23/dashboards/sat_grafana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-23/dashboards/sat_kibana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-23/release_notes/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-23/usage/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-23/categories/ + + + + + + + + /docs-sat/en-23/tags/ + + + + + + + + diff --git a/en-23/tags/index.html b/en-23/tags/index.html new file mode 100644 index 0000000000..dcecd78679 --- /dev/null +++ b/en-23/tags/index.html @@ -0,0 +1,579 @@ + + + + + + + + + + + + Tags :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + tag :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-23/tags/index.xml b/en-23/tags/index.xml new file mode 100644 index 0000000000..b732ee9033 --- /dev/null +++ b/en-23/tags/index.xml @@ -0,0 +1,11 @@ + + + + Tags on System Admin Toolkit (SAT) + /docs-sat/en-23/tags/ + Recent content in Tags on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-23 + + + diff --git a/en-23/usage/index.html b/en-23/usage/index.html new file mode 100644 index 0000000000..5120166708 --- /dev/null +++ b/en-23/usage/index.html @@ -0,0 +1,858 @@ + + + + + + + + + + + + SAT Usage :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Usage

+

SAT Bootprep

+

SAT provides an automated solution for creating CFS configurations, building +and configuring images in IMS, and creating BOS session templates based on a +given input file which defines how those configurations, images, and session +templates should be created.

+

This automated process centers around the sat bootprep command. Man page +documentation for sat bootprep can be viewed similarly to other SAT commands.

+
ncn-m001# sat-man sat-bootprep
+

SAT Bootprep vs SAT Bootsys

+

sat bootprep is used to create CFS configurations, build and +rename IMS images, and create BOS session templates which tie the +configurations and images together during a BOS session.

+

sat bootsys automates several portions of the boot and shutdown processes, +including (but not limited to) performing BOS operations (such as creating BOS +sessions), powering on and off cabinets, and checking the state of the system +prior to shutdown.

+

Editing a bootprep input file

+

The input file provided to sat bootprep is a YAML-formatted file containing +information which CFS, IMS, and BOS use to create configurations, images, and +BOS session templates respectively. Writing and modifying these input files is +the main task associated with using sat bootprep. An input file is composed of +three main sections, one each for configurations, images, and session templates. +These sections may be specified in any order, and any of the sections may be +omitted if desired.

+

Creating CFS configurations

+

The configurations section begins with a configurations: key.

+
---
+configurations:
+

Under this key, the user can list one or more configurations to create. For +each configuration, a name should be given, in addition to the list of layers +which comprise the configuration. Each layer can be defined by a product name +and optionally a version number, or commit hash or branch in the product’s +configuration repository. Alternatively, a layer can be defined by a Git +repository URL directly, along with an associated branch or commit hash.

+

When a configuration layer is specified in terms of a product name, the layer +is created in CFS by looking up relevant configuration information (including +the configuration repository and commit information) from the +cray-product-catalog Kubernetes ConfigMap as necessary. A version may be +supplied, but if it is absent, the version is assumed to be the latest version +found in the cray-product-catalog.

+
---
+configurations:
+- name: example-configuration
+  layers:
+  - name: example product
+    playbook: example.yml
+    product:
+      name: example
+      version: 1.2.3
+

Alternatively, a configuration layer may be specified by explicitly referencing +the desired configuration repository, along with the branch containing the +intended version of the Ansible playbooks. A commit hash may be specified by replacing +branch with commit.

+
  ...
+  - name: another example product
+    playbook: another-example.yml
+    git:
+      url: "https://vcs.local/vcs/another-example-config-management.git"
+      branch: main
+  ...
+

When sat bootprep is run against an input file, a CFS configuration will be +created corresponding to each configuration in the configurations section. For +example, the configuration created from an input file with the layers listed +above might look something like the following:

+
{
+    "lastUpdated": "2022-02-07T21:47:49Z",
+    "layers": [
+        {
+            "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "example product",
+            "playbook": "example.yml"
+        },
+        {
+            "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "another example product",
+            "playbook": "another-example.yml"
+        }
+    ],
+    "name": "example-configuration"
+}
+

Creating IMS images

+

After specifying configurations, the user may add images to the input file +which are to be built by IMS. To add an images section, the user should add +an images key.

+
---
+configurations:
+  ... (omitted for brevity)
+images:
+

Under the images key, the user may define one or more images to be created in +a list. Each element of the list defines a separate IMS image to be built and/or +configured. Images must contain a name, as well as an ims section containing a +definition of the image to be built and/or configured. Images may be defined by +an image recipe, or by a pre-built image. Recipes and pre-built images are +referred to by their names or IDs in IMS. The ims section should also contain +an is_recipe property, which indicates whether the name or ID refers to an +image recipe or a pre-built image. Images may also optionally provide a text +description of the image. This description is not stored or used by sat bootprep or any CSM services, but is useful for documenting images in the input +file.

+
---
+configurations:
+  ... (omitted for brevity)
+images:
+- name: example-compute-image
+  description: >
+    An example compute node image for illustrative purposes.
+  ims:
+    name: example-compute-image-recipe
+    is_recipe: true
+- name: another-example-compute-image
+  description: >
+    Another example compute node image.
+  ims:
+    id: <IMS image UUID>
+    is_recipe: false
+

Images may also contain a configuration property in their definition, which +specifies a configuration with which to customize the built image prior to +booting. If a configuration is specified, then configuration groups must also +be specified using the configuration_group_names property.

+
---
+configurations:
+  ... (omitted for brevity)
+images:
+- name: example-compute-image
+  description: >
+    An example compute node image for illustrative purposes.
+  ims:
+    name: example-compute-image-recipe
+    is_recipe: true
+  configuration: example configuration
+  configuration_group_names:
+  - Compute
+

Creating BOS session templates

+

BOS session templates are the final section of the input file, and are defined +under the session_templates key.

+
---
+configurations:
+  ... (omitted for brevity)
+images:
+  ... (omitted for brevity)
+session_templates:
+

Each session template is defined in terms of its name, an image, a +configuration, and a set of parameters which can be used to configure the +session. The name, image, and configuration are specified with their respective +name, image, and configuration keys. bos_parameters may also be +specified; currently, the only setting under bos_parameters that is supported +is boot_sets, which can be used to define boot sets in the BOS session +template. Each boot set is defined under its own property under boot_sets, and +the value of each boot set can contain the following properties, all of +which are optional:

+
    +
  • kernel_parameters: the parameters passed to the kernel on the command line
  • +
  • network: the network over which the nodes will boot
  • +
  • node_list: nodes to add to the boot set
  • +
  • node_roles_groups: HSM roles to add to the boot set
  • +
  • node_groups: HSM groups to add to the boot set
  • +
  • rootfs_provider: the root file system provider
  • +
  • rootfs_provider_passthrough: parameters to add to the rootfs= kernel +parameter
  • +
+

The properties listed previously are the same as the parameters that can be +specified directly through BOS boot sets. More information can be found in the +CSM documentation on session +templates. +Additional properties not listed are passed through to the BOS session template +as written.

+

An example session template might look like the following:

+
configurations:
+  ... (omitted for brevity)
+images:
+  ... (omitted for brevity)
+session_templates:
+- name: example-session-template
+  image: example-image
+  configuration: example-configuration
+  bos_parameters:
+    boot_sets:
+      example_boot_set:
+        kernel_parameters: ip=dhcp quiet
+        node_list: []
+        rootfs_provider: cpss3
+        rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+

Example bootprep input files

+

Putting together all of the previous input file sections, an example bootprep input +file might look something like the following.

+
---
+configurations:
+- name: cos-config
+  layers:
+  - name: cos-integration-2.2.87
+    playbook: site.yml
+    product:
+      name: cos
+      version: 2.2.87
+      branch: integration
+  - name: cpe-integration-21.12.3
+    playbook: pe_deploy.yml
+    product:
+      name: cpe
+      version: 21.12.3
+      branch: integration
+  - name: slurm-master-1.1.1
+    playbook: site.yml
+    product:
+      name: slurm
+      version: 1.1.1
+      branch: master
+images:
+- name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+  ims:
+    is_recipe: true
+    name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+  configuration: cos-config
+  configuration_group_names:
+  - Compute
+session_templates:
+- name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+  image: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+  configuration: cos-config
+  bos_parameters:
+    boot_sets:
+      compute:
+        kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+        node_roles_groups:
+        - Compute
+

Creating a pre-populated example bootprep input file

+

It is possible to create an example bootprep input file using values from the +system’s product catalog using the sat bootprep generate-example command.

+
ncn-m001# sat bootprep generate-example
+INFO: Using latest version (2.3.24-20220113160653) of product cos
+INFO: Using latest version (21.11.4) of product cpe
+INFO: Using latest version (1.0.7) of product slurm
+INFO: Using latest version (1.1.24) of product analytics
+INFO: Using latest version (2.1.5) of product uan
+INFO: Using latest version (21.11.4) of product cpe
+INFO: Using latest version (1.0.7) of product slurm
+INFO: Using latest version (1.1.24) of product analytics
+INFO: Using latest version (2.3.24-20220113160653) of product cos
+INFO: Using latest version (2.1.5) of product uan
+INFO: Wrote example bootprep input file to ./example-bootprep-input.yaml.
+

This file should be reviewed and edited to match the desired parameters of the +configurations, images, and session templates.

+

Viewing built-in generated documentation

+

The contents of the YAML input files described above must conform to a schema +which defines the structure of the data. The schema definition is written using +the JSON Schema format. (Although the format is named “JSON Schema”, the schema +itself is written in YAML as well.) More information, including introductory +materials and a formal specification of the JSON Schema metaschema, can be found +on the JSON Schema website.

+

Viewing the exact schema specification

+

To view the exact schema specification, run sat bootprep view-schema.

+
ncn-m001# sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft-07/schema"
+title: Bootprep Input File
+description: >
+  A description of the set of CFS configurations to create, the set of IMS
+  images to create and optionally customize with the defined CFS configurations,
+  and the set of BOS session templates to create that reference the defined
+  images and configurations.
+type: object
+additionalProperties: false
+properties:
+  ...
+

Generating user-friendly documentation

+

The raw schema definition can be difficult to understand without experience +working with JSON Schema specifications. For this reason, a feature was included +which can generate user-friendly HTML documentation for the input file schema +which can be browsed with the user’s preferred web browser.

+
    +
  1. +

    Create a documentation tarball using sat bootprep.

    +
    ncn-m001# sat bootprep generate-docs
    +INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
    +

    An alternate output directory can be specified with the --output-dir +option. The generated tarball is always named bootprep-schema-docs.tar.gz.

    +
    ncn-m001# sat bootprep generate-docs --output-dir /tmp
    +INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
    +
  2. +
  3. +

    From another machine, copy the tarball to a local directory.

    +
    another-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz .
    +
  4. +
  5. +

    Extract the contents of the tarball and open the contained index.html.

    +
    another-machine$ tar xzvf bootprep-schema-docs.tar.gz
    +x bootprep-schema-docs/
    +x bootprep-schema-docs/index.html
    +x bootprep-schema-docs/schema_doc.css
    +x bootprep-schema-docs/schema_doc.min.js
    +another-machine$ open bootprep-schema-docs/index.html
    +
  6. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/404.html b/en-24/404.html new file mode 100644 index 0000000000..6fbed99adc --- /dev/null +++ b/en-24/404.html @@ -0,0 +1,59 @@ + + + + + + + + + 404 Page not found + + + + + + + + + + + + + + + + + + +
+
+
+
+

+

+

+

+

+

+

Page not found!

+
+
+ +
+ + + diff --git a/en-24/categories/index.html b/en-24/categories/index.html new file mode 100644 index 0000000000..9f768b9977 --- /dev/null +++ b/en-24/categories/index.html @@ -0,0 +1,703 @@ + + + + + + + + + + + + Categories :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + category :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-24/categories/index.xml b/en-24/categories/index.xml new file mode 100644 index 0000000000..68f4b008af --- /dev/null +++ b/en-24/categories/index.xml @@ -0,0 +1,11 @@ + + + + Categories on System Admin Toolkit (SAT) + /docs-sat/en-24/categories/ + Recent content in Categories on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-24 + + + diff --git a/en-24/cne_install/index.html b/en-24/cne_install/index.html new file mode 100644 index 0000000000..6cc23db896 --- /dev/null +++ b/en-24/cne_install/index.html @@ -0,0 +1,775 @@ + + + + + + + + + + + + SAT Upgrade with CNE Installer :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Upgrade with CNE Installer

+

Upgrade the System Admin Toolkit Product Stream

+

Describes how to upgrade the System Admin Toolkit (SAT) product +stream by using the Compute Node Environment (CNE) installer (cne-install). +The CNE installer can be used only for upgrades and not for fresh installations. +For installation instructions, see Install the System Admin Toolkit Product +Stream.

+

Upgrading SAT with cne-install is recommended because the process is both +automated and logged to help you save time. The CNE installer can be used to +upgrade SAT alone or with other supported products. For more information +on cne-install and its options, refer to the HPE Cray EX System Software +Getting Started Guide (S-8000).

+

Prerequisites

+
    +
  • CSM is installed and verified.
  • +
  • There must be at least 2 gigabytes of free space on the manager NCN on which +the procedure is run.
  • +
+

Notes on the Procedures

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace x.y.z with the version of the SAT product stream +being upgraded.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
+

Pre-Upgrade Procedure

+
    +
  1. +

    Start a typescript and set the shell prompt.

    +

    The typescript will record the commands and the output from this upgrade. +The prompt is set to include the date and time.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
+

Upgrade Procedure

+
    +
  1. +

    Copy the release distribution gzipped tar file to ncn-m001.

    +

    The cne-install command installs all files in the media directory +by default. If you are upgrading SAT alone, ensure only the SAT tarball is in +the media directory.

    +
  2. +
  3. +

    Run the CNE installer.

    +
      +
    • +

      If you are upgrading SAT along with other supported products, run the +following command.

      +
      ncn-m001# cne-install -m MEDIA_DIR install -B WORKING_BRANCH -bpc BOOTPREP_CONFIG_CN \
      +    -bpn BOOTPREP_CONFIG_NCN
      +

      The cne-install command will use the provided BOOTPREP_CONFIG_CN and +BOOTPREP_CONFIG_NCN files for the run.

      +
    • +
    • +

      If you are upgrading SAT alone, run the following commands.

      +
      ncn-m001# cne-install -m MEDIA_DIR install -B '{{product_type}}-{{version_x_y_z}}' \
      +    -bpn BOOTPREP_CONFIG_NCN -e update_working_branches
      +ncn-m001# cne-install -m MEDIA_DIR install -B '{{product_type}}-{{version_x_y_z}}' \
      +    -bpn BOOTPREP_CONFIG_NCN -b sat_bootprep_ncn -e ncn_personalization
      +
    • +
    +
  4. +
  5. +

    Optional: Stop the typescript.

    +

    NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT upgrade (see Next Steps).

    +
    ncn-m001# exit
    +
  6. +
+

SAT version x.y.z is now upgraded, meaning the SAT x.y.z release +has been loaded into the system software repository.

+
    +
  • SAT configuration content for this release has been uploaded to VCS.
  • +
  • SAT content for this release has been uploaded to the CSM product catalog.
  • +
  • SAT content for this release has been uploaded to Nexus repositories.
  • +
  • The sat command is available.
  • +
+

Next Steps

+

At this point, the release distribution files can be removed from the system as +described in Post-Upgrade Cleanup Procedure.

+

If other HPE Cray EX software products are being upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +(S-8000) to determine which step +to execute next.

+

If no other HPE Cray EX software products are being upgraded at this time, +execute the SAT Post-Upgrade procedures:

+ +

Post-Upgrade Cleanup Procedure

+
    +
  1. +

    Optional: Remove the SAT release distribution tar file and extracted directory.

    +
    ncn-m001# rm sat-x.y.z.tar.gz
    +ncn-m001# rm -rf sat-x.y.z/
    +
  2. +
+

Remove Obsolete Configuration File Sections

+

Prerequisites

+ +

Procedure

+

After upgrading SAT, if using the configuration file from a previous version, +there may be configuration file sections no longer used in the new version. +For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish] +configuration file section is no longer used. In that case, the following +warning may appear upon running sat commands.

+
WARNING: Ignoring unknown section 'redfish' in config file.
+

Remove the [redfish] section from /root/.config/sat/sat.toml to resolve +the warning.

+
[redfish]
+username = "admin"
+password = "adminpass"
+

Repeat this process for any configuration file sections for which there are +“unknown section” warnings.

+

SAT Logging

+

As of SAT version 2.2, some command output that was previously printed to stdout +is now logged to stderr. These messages are logged at the INFO level. The +default logging threshold was changed from WARNING to INFO to accommodate +this logging change. Additionally, some messages previously logged at the INFO +are now logged at the DEBUG level.

+

These changes take effect automatically. However, if the default output threshold +has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure +that important output is shown in the terminal.

+

Update Configuration

+

In the following example, the stderr log level, logging.stderr_level, is set to +WARNING, which will exclude INFO-level logging from terminal output.

+
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+

To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.

+

If logging.stderr_level is commented out, its value will not affect logging +behavior. However, it may be helpful set its value to INFO as a reminder of +the new default behavior.

+

Affected Commands

+

The following commands trigger messages that have been changed from stdout +print calls to INFO-level (or WARNING- or ERROR-level) log messages:

+
    +
  • sat bootsys --stage shutdown --stage session-checks
  • +
  • sat sensors
  • +
+

The following commands trigger messages that have been changed from INFO-level +log messages to DEBUG-level log messages:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
  • sat swap
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/dashboards/index.html b/en-24/dashboards/index.html new file mode 100644 index 0000000000..1291acecbf --- /dev/null +++ b/en-24/dashboards/index.html @@ -0,0 +1,685 @@ + + + + + + + + + + + + SAT Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Dashboards

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-24/dashboards/index.xml b/en-24/dashboards/index.xml new file mode 100644 index 0000000000..915ed3e927 --- /dev/null +++ b/en-24/dashboards/index.xml @@ -0,0 +1,26 @@ + + + + SAT Dashboards on System Admin Toolkit (SAT) + /docs-sat/en-24/dashboards/ + Recent content in SAT Dashboards on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-24 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Grafana Dashboards + /docs-sat/en-24/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.cmn.&lt;site-domain&gt; The value of site-domain can be obtained as follows: ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=&#39;{.data.customizations\.yaml}&#39; | \ base64 -d | grep &#34;external:&#34; That command will produce the following output, for example: + + + SAT Kibana Dashboards + /docs-sat/en-24/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + diff --git a/en-24/dashboards/sat_grafana_dashboards/index.html b/en-24/dashboards/sat_grafana_dashboards/index.html new file mode 100644 index 0000000000..841ae09bdd --- /dev/null +++ b/en-24/dashboards/sat_grafana_dashboards/index.html @@ -0,0 +1,790 @@ + + + + + + + + + + + + SAT Grafana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Grafana Dashboards

+

The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.

+

Grafana can be accessed via web browser at the following URL:

+
    +
  • https://sma-grafana.cmn.<site-domain>
  • +
+

The value of site-domain can be obtained as follows:

+
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+    base64 -d | grep "external:"
+

That command will produce the following output, for example:

+
    external: EXAMPLE_DOMAIN.com
+

This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com

+

For more information on accessing the Grafana Dashboards, refer to Access the Grafana Monitoring UI in the +SMA product documentation.

+

For more information on the interpretation of metrics for the SAT Grafana Dashboards, refer to “Fabric Telemetry +Kafka Topics” in the SMA product documentation.

+ +

There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Dashboard NameDisplay Type
Fabric CongestionChart Panels
Fabric RFC3635Chart Panels
Fabric ErrorsTabular Format
Fabric Port StateTabular Format
+

The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.

+

SAT Grafana Interval and Locations Options

+

Shows the Interval and Locations Options for the available telemetry.

+

Grafana Interval and Locations Options

+

The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.

+

For more information, refer to Grafana Templates and Variables.

+

The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.

+

The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.

+

Grafana Fabric Congestion Dashboard

+

Grafana Fabric Congestion Dashboard

+

SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.

+

This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.

+

The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.

+

The port type of a link is reported as a port state “subtype” event when defined at port initialization.

+

Grafana Fabric Errors Dashboard

+

Grafana HSN Errors Dashboard

+

This dashboard reports error counters in a tabular format in three panels.

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.

+

Grafana Fabric Port State Dashboard

+

Grafana Fabric Port State Dashboard

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.

+

The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.

+

Grafana Fabric RFC3635 Dashboard

+

Grafana Fabric RFC3635 Dashboard

+

For more information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.

+

Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/dashboards/sat_kibana_dashboards/index.html b/en-24/dashboards/sat_kibana_dashboards/index.html new file mode 100644 index 0000000000..d93877cb7c --- /dev/null +++ b/en-24/dashboards/sat_kibana_dashboards/index.html @@ -0,0 +1,982 @@ + + + + + + + + + + + + SAT Kibana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Kibana Dashboards

+

Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.

+

Kibana can be accessed via web browser at the following URL:

+
    +
  • https://sma-kibana.cmn.<site-domain>
  • +
+

The value of site-domain can be obtained as follows:

+
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+    base64 -d | grep "external:"
+

That command will produce the following output, for example:

+
    external: EXAMPLE_DOMAIN.com
+

This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com

+

For more information on accessing the Kibana Dashboards, refer to View Logs Via Kibana in the SMA product +documentation.

+

Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and RAS Daemon Kibana Dashboards are included in this +table.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DashboardShort DescriptionLong DescriptionKibana Visualization and Search Name
sat-aerAER correctedCorrected Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-corrected Search: sat-aer-corrected
sat-aerAER fatalFatal Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-fatal Search: sat-aer-fatal
sat-atomATOM failuresApplication Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged.sat-atom-failed
sat-atomATOM admindownApplication Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch.sat-atom-admindown
sat-heartbeatHeartbeat loss eventsHeartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system.sat-heartbeat
sat-kernelKernel assertionsThe kernel software performs a failed assertion when some condition represents a serious fault. The node goes down.sat-kassertions
sat-kernelKernel panicsThe kernel panics when something is seriously wrong. The node goes down.sat-kernel-panic
sat-kernelLustre bugs (LBUGs)The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down.sat-lbug
sat-kernelCPU stallsCPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric.sat-cpu-stall
sat-kernelOut of memoryAn Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided.sat-oom
sat-mceMCEMachine Check Exceptions (MCE) are errors detected at the processor level.sat-mce
sat-rasdaemonrasdaemon errorsErrors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the Linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future.sat-rasdaemon-error
sat-rasdaemonrasdaemon messagesAll messages from the rasdaemon service on nodes.sat-rasdaemon
+

Disable Search Highlighting in Kibana Dashboard

+

By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.

+

The Kibana Dashboard should be open on your system.

+
    +
  1. +

    Navigate to Management

    +
  2. +
  3. +

    Navigate to Advanced Settings in the Kibana section, below the Elastic search section

    +
  4. +
  5. +

    Scroll down to the Discover section

    +
  6. +
  7. +

    Change Highlight results from on to off

    +
  8. +
  9. +

    Click Save to save changes

    +
  10. +
+

AER Kibana Dashboard

+

The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.

+

View the AER Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-aer dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.

    +
  8. +
+

ATOM Kibana Dashboard

+

The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health +checks and application test failures. Some test failures are of possible interest even though a node is not marked +admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide +clues if a node otherwise fails. They might also show application problems.

+

View the ATOM Kibana Dashboard

+

HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-atom dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View any nodes marked admindown and any ATOM test failures. These failures occur during health checks and +application test failures. Test failures marked admindown are important to note. View the matching log messages +in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, +results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

Heartbeat Kibana Dashboard

+

The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd +pods are responsible for monitoring nodes in the system for heartbeat loss.

+

View the Heartbeat Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-heartbeat dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible +for monitoring nodes in the system for heartbeat loss. View the matching log messages in the panel.

    +
  8. +
+

Kernel Kibana Dashboard

+

The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.

+

View the Kernel Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-kernel dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.

    +
  8. +
+

MCE Kibana Dashboard

+

The MCE Dashboard displays CPU detected processor-level hardware errors.

+

View the MCE Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-mce dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

RAS Daemon Kibana Dashboard

+

The RAS Daemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon +service on nodes in the system. This service collects all hardware error events reported by the Linux kernel, including +PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages +presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one +for only messages of severity emerg or err and another for all messages from rasdaemon.

+

View the RAS Daemon Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-rasdaemon dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.

    +
  8. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/img/Fabric_PortState_Locations_UI.png b/en-24/img/Fabric_PortState_Locations_UI.png new file mode 100644 index 0000000000..704511ebce Binary files /dev/null and b/en-24/img/Fabric_PortState_Locations_UI.png differ diff --git a/en-24/img/Grafana_Fabric_Congestion.png b/en-24/img/Grafana_Fabric_Congestion.png new file mode 100644 index 0000000000..dbf481d94c Binary files /dev/null and b/en-24/img/Grafana_Fabric_Congestion.png differ diff --git a/en-24/img/Grafana_HSN_Errors.png b/en-24/img/Grafana_HSN_Errors.png new file mode 100644 index 0000000000..f43b7d02a6 Binary files /dev/null and b/en-24/img/Grafana_HSN_Errors.png differ diff --git a/en-24/img/Grafana_rfc3635.png b/en-24/img/Grafana_rfc3635.png new file mode 100644 index 0000000000..dff176c82d Binary files /dev/null and b/en-24/img/Grafana_rfc3635.png differ diff --git a/en-24/img/SAT_Grafana_Fabric_Vars.png b/en-24/img/SAT_Grafana_Fabric_Vars.png new file mode 100644 index 0000000000..194d75b124 Binary files /dev/null and b/en-24/img/SAT_Grafana_Fabric_Vars.png differ diff --git a/en-24/index.html b/en-24/index.html new file mode 100644 index 0000000000..b653ee7b03 --- /dev/null +++ b/en-24/index.html @@ -0,0 +1,708 @@ + + + + + + + + + + + + HPE Cray EX System Admin Toolkit (SAT) Guide :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ + + + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/index.json b/en-24/index.json new file mode 100644 index 0000000000..f2684da3e3 --- /dev/null +++ b/en-24/index.json @@ -0,0 +1,92 @@ +[ +{ + "uri": "/docs-sat/en-24/", + "title": "HPE Cray EX System Admin Toolkit (SAT) Guide", + "tags": [], + "description": "", + "content": "HPE Cray EX System Admin Toolkit (SAT) Guide Introduction to SAT About System Admin Toolkit (SAT) System Admin Toolkit Command Overview Command Prompt Conventions in SAT SAT in CSM SAT Dependencies SAT Installation Install the System Admin Toolkit Product Stream Configure SAT SAT Authentication Generate SAT S3 Credentials Set System Revision Information Perform NCN Personalization SAT Post-Upgrade Remove Obsolete Configuration File Sections SAT Logging Set System Revision Information SAT Uninstall and Downgrade Uninstall: Removing a Version of SAT Activate: Switching Between Versions Optional: Installing and Configuring SAT on an External System SAT Upgrade with CNE Installer Upgrade the System Admin Toolkit Product Stream Post-Upgrade Cleanup Procedure Remove Obsolete Configuration File Sections SAT Logging SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards SAT Usage SAT Bootprep Change the BOS Version SAT Release Notes Summary of Changes in SAT 2.4 Summary of Changes in SAT 2.3 Summary of Changes in SAT 2.2 Summary of SAT Changes in Shasta v1.5 Summary of SAT Changes in Shasta v1.4.1 Summary of SAT Changes in Shasta v1.4 Summary of SAT Changes in Shasta v1.3.2 Summary of SAT Changes in Shasta v1.3 " +}, +{ + "uri": "/docs-sat/en-24/dashboards/", + "title": "SAT Dashboards", + "tags": [], + "description": "", + "content": "SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards " +}, +{ + "uri": "/docs-sat/en-24/install/", + "title": "SAT Installation", + "tags": [], + "description": "", + "content": "SAT Installation Install the System Admin Toolkit Product Stream Describes how to install or upgrade the System Admin Toolkit (SAT) product stream.\nPrerequisites CSM is installed and verified. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines.\nIn the examples below, replace x.y.z with the version of the SAT product stream being installed.\n\u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below.\nTo upgrade SAT, execute the pre-installation, installation, and post-installation procedures for a newer distribution. The newly installed version will become the default.\nIn SAT 2.4, you can instead upgrade the product stream by using the Compute Node Environment (CNE) installer. It is recommended that you upgrade SAT with the CNE installer because the process is both automated and logged to help you save time. For more information, see SAT Upgrade with CNE Installer.\nPre-Installation Procedure Start a typescript and set the shell prompt.\nThe typescript will record the commands and the output from this installation. The prompt is set to include the date and time.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Installation Procedure Copy the release distribution gzipped tar file to ncn-m001.\nUnzip and extract the release distribution.\nncn-m001# tar -xvzf sat-x.y.z.tar.gz Change directory to the extracted release distribution directory.\nncn-m001# cd sat-x.y.z Run the installer: install.sh.\nThe script produces a lot of output. A successful install ends with \u0026ldquo;SAT version x.y.z has been installed\u0026rdquo;, where x.y.z is the SAT product version.\nncn-m001# ./install.sh ====\u0026gt; Installing System Admin Toolkit version x.y.z ... ====\u0026gt; Waiting 300 seconds for sat-config-import-x.y.z to complete ... ====\u0026gt; SAT version x.y.z has been installed. Optional: Stop the typescript.\nNOTE: This step can be skipped if you wish to use the same typescript for the remainder of the SAT install (see Next Steps).\nncn-m001# exit SAT version x.y.z is now installed/upgraded, meaning the SAT x.y.z release has been loaded into the system software repository.\nSAT configuration content for this release has been uploaded to VCS. SAT content for this release has been uploaded to the CSM product catalog. SAT content for this release has been uploaded to Nexus repositories. The sat command won\u0026rsquo;t be available until the NCN Personalization procedure has been executed. Next Steps If other HPE Cray EX software products are being installed or upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide (S-8000) to determine which step to execute next.\nIf no other HPE Cray EX software products are being installed or upgraded at this time, proceed to the sections listed below.\nNOTE: The procedures in Configure SAT are only required during the first installation of SAT. However, the NCN Personalization procedure is required both when installing and upgrading SAT.\nIf performing a fresh install, execute the Configure SAT procedures:\nSAT Authentication Generate SAT S3 Credentials Set System Revision Information Execute the NCN Personalization procedure:\nPerform NCN Personalization If performing an upgrade, execute the SAT Post-Upgrade procedures:\nRemove obsolete configuration file sections SAT Logging Set System Revision Information NOTE: The Set System Revision Information procedure is not required after upgrading from SAT 2.1 or later.\nConfigure SAT SAT Authentication Initially, as part of the installation and configuration, SAT authentication is set up so SAT commands can be used in later steps of the install process. The admin account used to authenticate with sat auth must be enabled in Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation. For additional information on SAT authentication, see System Security and Authentication in the CSM documentation.\nNOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nDescription of SAT Command Authentication Types Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes manager node where SAT commands are run.\nBelow is a table describing SAT commands and the types of authentication they require.\nSAT Subcommand Authentication/Credentials Required Man Page Description sat auth Responsible for authenticating to the API gateway and storing a token. sat-auth Authenticate to the API gateway and save the token. sat bmccreds Requires authentication to the API gateway. sat-bmccreds Set BMC passwords. sat bootprep Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install. sat-bootprep Prepare to boot nodes with images and configurations. sat bootsys Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. sat-bootsys Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. sat diag Requires authentication to the API gateway. sat-diag Launch diagnostics on the HSN switches and generate a report. sat firmware Requires authentication to the API gateway. sat-firmware Report firmware version. sat hwhist Requires authentication to the API gateway. sat-hwhist Report hardware component history. sat hwinv Requires authentication to the API gateway. sat-hwinv Give a listing of the hardware of the HPE Cray EX system. sat hwmatch Requires authentication to the API gateway. sat-hwmatch Report hardware mismatches. sat init None sat-init Create a default SAT configuration file. sat k8s Requires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install. sat-k8s Report on Kubernetes replica sets that have co-located (on the same node) replicas. sat linkhealth This command has been deprecated. sat nid2xname Requires authentication to the API gateway. sat-nid2xname Translate node IDs to node XNames. sat sensors Requires authentication to the API gateway. sat-sensors Report current sensor data. sat setrev Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-setrev Set HPE Cray EX system revision information. sat showrev Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-showrev Print revision information for the HPE Cray EX system. sat slscheck Requires authentication to the API gateway. sat-slscheck Perform a cross-check between SLS and HSM. sat status Requires authentication to the API gateway. sat-status Report node status across the HPE Cray EX system. sat swap Requires authentication to the API gateway. sat-swap Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. sat xname2nid Requires authentication to the API gateway. sat-xname2nid Translate node and node BMC XNames to node IDs. sat switch This command has been deprecated. It has been replaced by sat swap. In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password on the command line. The username value is obtained from the following locations, in order of higher precedence to lower precedence:\nThe --username global command-line option. The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml. The name of currently logged in user running the sat command. If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to ~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above, and will use the token for that username if it has been obtained and saved by sat auth.\nPrerequisites The sat CLI has been installed following Install The System Admin Toolkit Product Stream. Procedure The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:\nGenerate a default SAT configuration file, if one does not exist.\nncn-m001# sat init Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; generated. Note: If the config file already exists, it will print out an error:\nERROR: Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; already exists. Not generating configuration file. Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file. For example:\nusername = \u0026#34;crayadmin\u0026#34; Run sat auth. Enter your password when prompted. For example:\nncn-m001# sat auth Password for crayadmin: Succeeded! Other sat commands are now authenticated to make requests to the API gateway. For example:\nncn-m001# sat status Generate SAT S3 Credentials Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes master node where SAT commands are run.\nSAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev (see: Set System Revision Information).\nNOTE: This procedure is only required after initially installing SAT. It is not required after upgrading SAT.\nPrerequisites The SAT CLI has been installed following Install The System Admin Toolkit Product Stream The SAT configuration file has been created (See SAT Authentication). CSM has been installed and verified. Procedure Ensure the files are readable only by root.\nncn-m001# touch /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key ncn-m001# chmod 600 /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key Write the credentials to local files using kubectl.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.access_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_access_key ncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.secret_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_secret_key Verify the S3 endpoint specified in the SAT configuration file is correct.\nGet the SAT configuration file\u0026rsquo;s endpoint value.\nNOTE: If the command\u0026rsquo;s output is commented out, indicated by an initial # character, the SAT configuration will take the default value – \u0026quot;https://rgw-vip.nmn\u0026quot;.\nncn-m001# grep endpoint ~/.config/sat/sat.toml # endpoint = \u0026#34;https://rgw-vip.nmn\u0026#34; Get the sat-s3-credentials secret\u0026rsquo;s endpoint value.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.s3_endpoint}\u0026#39; | base64 -d | xargs https://rgw-vip.nmn Compare the two endpoint values.\nIf the values differ, change the SAT configuration file\u0026rsquo;s endpoint value to match the secret\u0026rsquo;s.\nCopy SAT configurations to each manager node on the system.\nncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \\ mkdir -p /root/.config/sat; \\ scp -pr /root/.config/sat ${i}:/root/.config; done NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may be different. This example assumes three manager nodes, where the configuration files must be copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 and ncn-m003.\nSet System Revision Information HPE service representatives use system revision information data to identify systems in support cases.\nPrerequisites SAT authentication has been set up. See SAT Authentication. S3 credentials have been generated. See Generate SAT S3 Credentials. Notes on the Procedure This procedure is required after a fresh install of SAT. After an upgrade of SAT, this procedure is not required if SAT was upgraded from 2.1 (Shasta v1.5) or later. It is required if SAT was upgraded from 2.0 (Shasta v1.4) or earlier. Procedure Set System Revision Information.\nRun sat setrev and follow the prompts to set the following site-specific values:\nSerial number System name System type System description Product number Company name Site name Country code System install date TIP: For \u0026ldquo;System type\u0026rdquo;, a system with any liquid-cooled components should be considered a liquid-cooled system. In other words, \u0026ldquo;System type\u0026rdquo; is EX-1C.\nncn-m001# sat setrev -------------------------------------------------------------------------------- Setting: Serial number Purpose: System identification. This will affect how snapshots are identified in the HPE backend services. Description: This is the top-level serial number which uniquely identifies the system. It can be requested from an HPE representative. Valid values: Alpha-numeric string, 4 - 20 characters. Type: \u0026lt;class \u0026#39;str\u0026#39;\u0026gt; Default: None Current value: None -------------------------------------------------------------------------------- Please do one of the following to set the value of the above setting: - Input a new value - Press CTRL-C to exit ... Verify System Revision Information.\nRun sat showrev and verify the output shown in the \u0026ldquo;System Revision Information table.\u0026rdquo;\nThe following example shows sample table output.\nncn-m001# sat showrev ################################################################################ System Revision Information ################################################################################ +---------------------+---------------+ | component | data | +---------------------+---------------+ | Company name | HPE | | Country code | US | | Interconnect | Sling | | Product number | R4K98A | | Serial number | 12345 | | Site name | HPE | | Slurm version | slurm 20.02.5 | | System description | Test System | | System install date | 2021-01-29 | | System name | eniac | | System type | EX-1C | +---------------------+---------------+ ################################################################################ Product Revision Information ################################################################################ +--------------+-----------------+------------------------------+------------------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+------------------------------+------------------------------+ | csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... | | sat | 2.0.1 | - | - | | sdu | 1.0.8 | - | - | | slingshot | 0.8.0 | - | - | | sma | 1.4.12 | - | - | +--------------+-----------------+------------------------------+------------------------------+ ################################################################################ Local Host Operating System ################################################################################ +-----------+----------------------+ | component | version | +-----------+----------------------+ | Kernel | 5.3.18-24.15-default | | SLES | SLES 15-SP2 | +-----------+----------------------+ Perform NCN Personalization A new CFS configuration layer must be added to the CFS configuration used on management NCNs. It is required following SAT installation and configuration. This procedure describes how to add that layer.\nPrerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. If performing a fresh install, the Configure SAT procedures have been successfully completed. Notes on the Procedure Ellipses (...) in shell output indicate omitted lines. In the examples below, replace x.y.z with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. If upgrading SAT, the existing configuration will likely include other Cray EX product entries. Update the SAT entry as described in this procedure. The HPE Cray EX System Software Getting Started Guide (S-8000) provides guidance on how and when to update the entries for the other products. Pre-NCN-Personalization Procedure Start a typescript if not already using one, and set the shell prompt.\nThe typescript will record the commands and the output from this installation. The prompt is set to include the date and time.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Procedure to Update CFS Configuration The SAT release distribution includes a script, update-mgmt-ncn-cfs-config.sh, that updates a CFS configuration to include the SAT layer required to install and configure SAT on the management NCNs.\nThe script supports modifying a named CFS configuration in CFS, a CFS configuration defined in a JSON file, or the CFS configuration currently applied to particular components in CFS.\nThe script also includes options for specifying:\nhow the modified CFS configuration should be saved. the git commit hash or branch specified in the SAT layer. This procedure is split into three alternatives, which cover common use cases:\nUpdate Active CFS Configuration Update CFS Configuration in a JSON File Update Existing CFS Configuration by Name If none of these alternatives fit your use case, see Advanced Options for Updating CFS Configurations.\nUpdate Active CFS Configuration Use this alternative if there is already a CFS configuration assigned to the management NCNs and you would like to update it in place for the new version of SAT.\nRun the script with the following options:\nncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-query role=Management,type=Node --save Examine the output to ensure the CFS configuration was updated.\nFor example, if there is a single CFS configuration that applies to NCNs, and if that configuration does not have a layer yet for any version of SAT, the output will look like this:\n====\u0026gt; Updating CFS configuration(s) INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, ..., x3000c0s9b0n0 INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s1b0n0 ... INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s9b0n0 ... INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found. INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end. INFO: Successfully saved CFS configuration \u0026#34;ncn-personalization\u0026#34; INFO: Successfully saved 1 changed CFS configurations. ====\u0026gt; Completed CFS configuration(s) ====\u0026gt; Cleaning up install dependencies Alternatively, if the CFS configuration already contains a layer for SAT that just needs to be updated, the output will look like this:\n====\u0026gt; Updating CFS configuration(s) INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, ..., x3000c0s9b0n0 INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s1b0n0 ... INFO: Found configuration \u0026#34;ncn-personalization\u0026#34; for component x3000c0s9b0n0 ... INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml INFO: Property \u0026#34;commit\u0026#34; of layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from 01ae28c92b9b4740e9e0e01ae01216c6c2d89a65 to bcbd6db0803cc4137c7558df9546b0faab303cbd INFO: Property \u0026#34;name\u0026#34; of layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-2.2.16 to sat-sat-ncn-bcbd6db-20220608T170152 INFO: Successfully saved CFS configuration \u0026#34;ncn-personalization\u0026#34; INFO: Successfully saved 1 changed CFS configurations. ====\u0026gt; Completed CFS configuration(s) ====\u0026gt; Cleaning up install dependencies Update CFS Configuration in a JSON File Use this alternative if you are constructing a new CFS configuration for management NCNs in a JSON file.\nRun the script with the following options, where JSON_FILE is an environment variable set to the path of the JSON file to modify:\nncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-file $JSON_FILE --save Examine the output to ensure the JSON file was updated.\nFor example, if the configuration defined in the JSON file does not have a layer yet for any version of SAT, the output will look like this:\n====\u0026gt; Updating CFS configuration(s) INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found. INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end. INFO: Successfully saved 1 changed CFS configurations. ====\u0026gt; Completed CFS configuration(s) ====\u0026gt; Cleaning up install dependencies Update Existing CFS Configuration by Name Use this alternative if you are updating a specific named CFS configuration. This may be the case if you are constructing a new CFS configuration during an install or upgrade of multiple products.\nRun the script with the following options, where CFS_CONFIG_NAME is an environment variable set to the name of the CFS configuration to update.\nncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-config $CFS_CONFIG_NAME --save Examine the output to ensure the CFS configuration was updated.\nFor example, if the CFS configuration does not have a layer yet for any version of SAT, the output will look like this:\n====\u0026gt; Updating CFS configuration(s) INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found. INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end. INFO: Successfully saved CFS configuration \u0026#34;CFS_CONFIG_NAME\u0026#34; INFO: Successfully saved 1 changed CFS configurations. ====\u0026gt; Completed CFS configuration(s) ====\u0026gt; Cleaning up install dependencies Advanced Options for Updating CFS Configurations If none of the alternatives described in the previous sections apply, view the full description of the options accepted by the update-mgmt-ncn-cfs-config.sh script by invoking it with the --help option.\nncn-m001# ./update-mgmt-ncn-cfs-config.sh --help Procedure to Apply CFS Configuration After the CFS configuration that applies to management NCNs has been updated as described in the Procedure to Update CFS Configuration, execute the following steps to ensure the modified CFS configuration is re-applied to the management NCNs.\nSet an environment variable that refers to the name of the CFS configuration to be applied to the management NCNs.\nncn-m001# export CFS_CONFIG_NAME=\u0026#34;ncn-personalization\u0026#34; Note: If the Update Active CFS Configuration section was followed above, the name of the updated CFS configuration will have been logged in the following format. If multiple CFS configurations were modified, any one of them can be used in this procedure.\nINFO: Successfully saved CFS configuration \u0026#34;ncn-personalization\u0026#34; Obtain the name of the CFS configuration layer for SAT and save it in an environment variable:\nncn-m001# export SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \\ | jq -r \u0026#39;.layers | map(select(.cloneUrl | contains(\u0026#34;sat-config-management.git\u0026#34;)))[0].name\u0026#39;) Create a CFS session that executes only the SAT layer of the given CFS configuration.\nThe --configuration-limit option limits the configuration session to run only the SAT layer of the configuration.\nncn-m001# cray cfs sessions create --name \u0026#34;sat-session-${CFS_CONFIG_NAME}\u0026#34; --configuration-name \\ \u0026#34;${CFS_CONFIG_NAME}\u0026#34; --configuration-limit \u0026#34;${SAT_LAYER_NAME}\u0026#34; Monitor the progress of the CFS session.\nSet an environment variable to name of the Ansible container within the pod for the CFS session:\nncn-m001# export ANSIBLE_CONTAINER=$(kubectl get pod -n services \\ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \\ -o json | jq -r \u0026#39;.items[0].spec.containers | map(select(.name | contains(\u0026#34;ansible\u0026#34;))) | .[0].name\u0026#39;) Next, get the logs for the Ansible container.\nncn-m001# kubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \\ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} Ansible plays, which are run by the CFS session, will install SAT on all the master management NCNs on the system. A summary of results can be found at the end of the log output. The following example shows a successful session.\n... PLAY RECAP ********************************************************************* x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 NOTE: Ensure that the PLAY RECAPs for each session show successes for all manager NCNs before proceeding.\nVerify that SAT was successfully configured.\nIf sat is configured, the --version command will indicate which version is installed. If sat is not properly configured, the command will fail.\nNOTE: This version number will differ from the version number of the SAT release distribution. This is the semantic version of the sat Python package, which is different from the version number of the overall SAT release distribution.\nncn-m001# sat --version sat 3.7.0 NOTE: Upon first running sat, you may see additional output while the sat container image is downloaded. This will occur the first time sat is run on each manager NCN. For example, if you run sat for the first time on ncn-m001 and then for the first time on ncn-m002, you will see this additional output both times.\nTrying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037... Getting image source signatures Copying blob da64e8df3afc done Copying blob 0f36fd81d583 done Copying blob 12527cf455ba done ... sat 3.7.0 Stop the typescript.\nncn-m001# exit SAT version x.y.z is now installed and configured:\nThe SAT RPM package is installed on the associated NCNs. Note on Procedure to Apply CFS Configuration The previous procedure is not always necessary because the CFS Batcher service automatically detects configuration changes and will automatically create new sessions to apply configuration changes according to certain rules. For more information on these rules, refer to Configuration Management with the CFS Batcher in the Cray System Management Documentation.\nThe main scenario in which the CFS batcher will not automatically re-apply the SAT layer is when the commit hash of the sat-config-management git repository has not changed between SAT versions. The previous procedure ensures the configuration is re-applied in all cases, and it is harmless if the batcher has already applied an updated configuration.\nNext Steps At this point, the release distribution files can be removed from the system as described in Post-Installation Cleanup Procedure.\nIf other HPE Cray EX software products are being installed or upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide (S-8000) to determine which step to execute next.\nIf no other HPE Cray EX software products are being installed at this time, the installation process is complete. If no other HPE Cray EX software products are being upgraded at this time, proceed to the remaining SAT Post-Upgrade procedures:\nRemove obsolete configuration file sections SAT Logging Set System Revision Information NOTE: The Set System Revision Information procedure is not required after upgrading from SAT 2.1 or later.\nPost-Installation Cleanup Procedure Optional: Remove the SAT release distribution tar file and extracted directory.\nncn-m001# rm sat-x.y.z.tar.gz ncn-m001# rm -rf sat-x.y.z/ SAT Post-Upgrade Remove Obsolete Configuration File Sections Prerequisites The Install the System Admin Toolkit Product Stream procedure has been successfully completed. The Perform NCN Personalization procedure has been successfully completed. Procedure After upgrading SAT, if using the configuration file from a previous version, there may be configuration file sections no longer used in the new version. For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. In that case, the following warning may appear upon running sat commands.\nWARNING: Ignoring unknown section \u0026#39;redfish\u0026#39; in config file. Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.\n[redfish] username = \u0026#34;admin\u0026#34; password = \u0026#34;adminpass\u0026#34; Repeat this process for any configuration file sections for which there are \u0026ldquo;unknown section\u0026rdquo; warnings.\nSAT Logging As of SAT version 2.2, some command output that was previously printed to stdout is now logged to stderr. These messages are logged at the INFO level. The default logging threshold was changed from WARNING to INFO to accommodate this logging change. Additionally, some messages previously logged at the INFO are now logged at the DEBUG level.\nThese changes take effect automatically. However, if the default output threshold has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure that important output is shown in the terminal.\nUpdate Configuration In the following example, the stderr log level, logging.stderr_level, is set to WARNING, which will exclude INFO-level logging from terminal output.\nncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml [logging] ... stderr_level = \u0026#34;WARNING\u0026#34; To enable the new default behavior, comment this line out, delete it, or set the value to \u0026ldquo;INFO\u0026rdquo;.\nIf logging.stderr_level is commented out, its value will not affect logging behavior. However, it may be helpful set its value to INFO as a reminder of the new default behavior.\nAffected Commands The following commands trigger messages that have been changed from stdout print calls to INFO-level (or WARNING- or ERROR-level) log messages:\nsat bootsys --stage shutdown --stage session-checks sat sensors The following commands trigger messages that have been changed from INFO-level log messages to DEBUG-level log messages:\nsat nid2xname sat xname2nid sat swap SAT Uninstall and Downgrade Uninstall: Removing a Version of SAT This procedure can be used to uninstall a version of SAT.\nPrerequisites Only versions 2.2 or newer of SAT can be uninstalled with prodmgr. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT.\nNOTE: It is not recommended to uninstall a version designated as \u0026ldquo;active\u0026rdquo;. If the active version is uninstalled, then the activate procedure must be executed on a remaining version.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+-------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+-------------------+-----------------------+ | sat | 2.3.3 | True | - | - | | sat | 2.2.10 | False | - | - | +--------------+-----------------+--------+-------------------+-----------------------+ Use prodmgr to uninstall a version of SAT.\nThis command will do three things:\nRemove all hosted-type package repositories associated with the given version of SAT. Group-type repositories are not removed. Remove all container images associated with the given version of SAT. Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up in the output of sat showrev. ncn-m001# prodmgr uninstall sat 2.2.10 Repository sat-2.2.10-sle-15sp2 has been removed. Removed Docker image cray/cray-sat:3.9.0 Removed Docker image cray/sat-cfs-install:1.0.2 Removed Docker image cray/sat-install-utility:1.4.0 Deleted sat-2.2.10 from product catalog. Activate: Switching Between Versions This procedure can be used to downgrade the active version of SAT.\nPrerequisites Only versions 2.2 or newer of SAT can be activated. Older versions must be activated manually. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+--------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+--------------------+-----------------------+ | sat | 2.3.3 | True | - | - | | sat | 2.2.10 | False | - | - | +--------------+-----------------+--------+--------------------+-----------------------+ Use prodmgr to activate a different version of SAT.\nThis command will do three things:\nFor all hosted-type package repositories associated with this version of SAT, set them as the sole member of their corresponding group-type repository. For example, activating SAT version 2.2.10 sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group. Set the version 2.2.10 as active within the product catalog, so that it appears active in the output of sat showrev. Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are associated with NCNs with the role \u0026ldquo;Management\u0026rdquo; and subrole \u0026ldquo;Master\u0026rdquo; (for example, the CFS configuration ncn-personalization). Specifically, it will ensure that the layer refers to the version of SAT CFS configuration content associated with the version of SAT being activated. ncn-m001# prodmgr activate sat 2.2.10 Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2. Set sat-2.2.10 as active in product catalog. Updated CFS configurations: [ncn-personalization] Verify that the chosen version is marked as active.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------+--------------------+-----------------------+ | product_name | product_version | active | images | image_recipes | +--------------+-----------------+--------+--------------------+-----------------------+ | sat | 2.3.3 | False | - | - | | sat | 2.2.10 | True | - | - | +--------------+-----------------+--------+--------------------+-----------------------+ Apply the modified CFS configuration to the management NCNs.\nAt this point, Nexus package repositories have been modified to set a particular package repository as active, but the SAT package may not have been updated on management NCNs.\nTo ensure that management NCNs have been updated to use the active SAT version, follow the Procedure to Apply CFS Configuration. Refer to the output from the prodmgr activate command to find the name of the modified CFS configuration. If more than one CFS configuration was modified, use the first one.\nOptional: Installing and Configuring SAT on an External System SAT can optionally be installed and configured on an external system to interact with CSM over the CAN.\nLimitations Most SAT subcommands work by accessing APIs which are reachable via the CAN. However, certain SAT commands depend on host-based functionality on the management NCNs and will not work from an external system. This includes the following:\nThe platform-services and ncn-power stages of sat bootsys The local host information displayed by the --local option of sat showrev Installing SAT on an external system is not an officially supported configuration. These instructions are provided \u0026ldquo;as-is\u0026rdquo; with the hope that they can useful for users who desire additional flexibility.\nCertain additional steps may need to be taken to install and configure SAT depending on the configuration of the external system in use. These additional steps may include provisioning virtual machines, installing packages, or configuring TLS certificates, and these steps are outside the scope of this documentation. This section covers only the steps needed to configure SAT to use externally-accessible API endpoints exposed by CSM.\nPrerequisites The external system must be on the Customer Access Network (CAN). Python 3.7 or newer is installed on the system. kubectl, openssh, git, and curl are installed on the external system. The root CA certificates used when installing CSM have been added to the external system\u0026rsquo;s trust store such that authenticated TLS connections can be made to the CSM REST API gateway. For more information, refer to Certificate Authority in the Cray System Management Documentation. Procedure Create a Python virtual environment.\n$ SAT_VENV_PATH=\u0026#34;$(pwd)/venv\u0026#34; $ python3 -m venv ${SAT_VENV_PATH} $ . ${SAT_VENV_PATH}/bin/activate Clone the SAT source code.\nNote: To use SAT version 3.19, this example clones the release/3.19 branch of Cray-HPE/sat. However, for better clarity, these instructions include steps that apply only to versions newer than 3.19. Specifically, the instructions include references to the csm-api-client package, which was not a dependency of SAT in version 3.19.\n(venv) $ git clone --branch=release/3.19 https://github.com/Cray-HPE/sat.git Set up the SAT CSM Python dependencies to be installed from their source code.\nSAT CSM Python dependency packages are not currently distributed publicly as source packages or binary distributions. They must be installed from their source code hosted on GitHub. Also, to install the cray-product-catalog Python package, you must first clone it locally. Use the following steps to modify the SAT CSM Python dependencies so they can be installed from their source code.\nClone the source code for cray-product-catalog.\n(venv) $ git clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog In the cray-product-catalog directory, create a file named .version that contains the version of cray-product-catalog.\n(venv) $ echo 1.6.0 \u0026gt; cray-product-catalog/.version Open the \u0026ldquo;locked\u0026rdquo; requirements file in a text editor.\n(venv) $ vim sat/requirements.lock.txt Update the line containing cray-product-catalog so that it reflects the local path to cray-product-catalog.\nIt should read as follows:\n./cray-product-catalog For versions of SAT newer than 3.19, change the line containing csm-api-client to read as follows:\ncsm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1 (Optional) Confirm that requirements.lock.txt is modified as expected.\nNote: For versions newer than 3.19, you will see both cray-product-catalog and csm-api-client. For version 3.19 and older, you will only see cray-product-catalog.\n(venv) $ grep -E \u0026#39;cray-product-catalog|csm-api-client\u0026#39; sat/requirements.lock.txt ./cray-product-catalog csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1 Install the modified SAT dependencies.\n(venv) $ pip install -r sat/requirements.lock.txt ... Install the SAT Python package.\n(venv) $ pip install ./sat ... Optional: Add the sat virtual environment to the user\u0026rsquo;s PATH environment variable.\nIf a shell other than bash is in use, replace ~/.bash_profile with the appropriate profile path.\nIf the virtual environment is not added to the user\u0026rsquo;s PATH environment variable, then source ${SAT_VENV_PATH}/bin/activate will need to be run before running any SAT commands.\n(venv) $ deactivate $ echo export PATH=\\\u0026#34;${SAT_VENV_PATH}/bin:${PATH}\\\u0026#34; \u0026gt;\u0026gt; ~/.bash_profile $ source ~/.bash_profile Copy the file /etc/kubernetes/admin.conf from ncn-m001 to ~/.kube/config on the external system.\nNote that this file contains credentials to authenticate against the Kubernetes API as the administrative user, so it should be treated as sensitive.\n$ mkdir -p ~/.kube $ scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config admin.conf 100% 5566 3.0MB/s 00:00 Add a new entry for the hostname kubernetes to the external system\u0026rsquo;s /etc/hosts file.\nThe kubernetes hostname should correspond to the CAN IP address on ncn-m001. On CSM 1.2, this can be determined by querying the IP address of the bond0.cmn0 interface.\n$ ssh ncn-m001 ip addr show bond0.cmn0 13: bond0.cmn0@bond0: \u0026lt;BROADCAST,MULTICAST,UP,LOWER_UP\u0026gt; mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007 valid_lft forever preferred_lft forever inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link valid_lft forever preferred_lft forever $ IP_ADDRESS=10.102.1.11 On CSM versions prior to 1.2, the CAN IP can be determined by querying the IP address of the vlan007 interface.\n$ ssh ncn-m001 ip addr show vlan007 13: vlan007@bond0: \u0026lt;BROADCAST,MULTICAST,UP,LOWER_UP\u0026gt; mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007 valid_lft forever preferred_lft forever inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link valid_lft forever preferred_lft forever $ IP_ADDRESS=10.102.1.10 Once the IP address is determined, add an entry to /etc/hosts mapping the IP address to the hostname kubernetes.\n$ echo \u0026#34;${IP_ADDRESS} kubernetes\u0026#34; | sudo tee -a /etc/hosts 10.102.1.11 kubernetes Modify ~/.kube/config to set the cluster server address.\nThe value of the server key for the kubernetes cluster under the clusters section should be set to https://kubernetes:6443.\n--- clusters: - cluster: certificate-authority-data: REDACTED server: https://kubernetes:6443 name: kubernetes ... Confirm that kubectl can access the CSM Kubernetes cluster.\n$ kubectl get nodes NAME STATUS ROLES AGE VERSION ncn-m001 Ready master 135d v1.19.9 ncn-m002 Ready master 136d v1.19.9 ncn-m003 Ready master 136d v1.19.9 ncn-w001 Ready \u0026lt;none\u0026gt; 136d v1.19.9 ncn-w002 Ready \u0026lt;none\u0026gt; 136d v1.19.9 ncn-w003 Ready \u0026lt;none\u0026gt; 136d v1.19.9 Use sat init to create a configuration file for SAT.\n$ sat init INFO: Configuration file \u0026#34;/home/user/.config/sat/sat.toml\u0026#34; generated. Copy the platform CA certificates from the management NCN and configure the certificates for use with SAT.\nIf a shell other than bash is in use, replace ~/.bash_profile with the appropriate profile path.\n$ scp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt . $ echo export REQUESTS_CA_BUNDLE=\\\u0026#34;$(realpath platform-ca-certs.crt)\\\u0026#34; \u0026gt;\u0026gt; ~/.bash_profile $ source ~/.bash_profile Edit the SAT configuration file to set the API and S3 hostnames.\nExternally available API endpoints are given domain names in PowerDNS, so the endpoints in the configuration file should each be set to subdomain.system-name.site-domain, where system-name and site-domain are replaced with the values specified during csi config init, and subdomain is the DNS name for the externally available service. For more information, refer to Externally Exposed Services in the Cray System Management Documentation.\nThe API gateway has the subdomain api, and S3 has the subdomain s3. The S3 endpoint runs on port 8080. The following options should be set in the SAT configuration file:\n[api_gateway] host = \u0026#34;api.system-name.site-domain\u0026#34; [s3] endpoint = \u0026#34;http://s3.system-name.site-domain:8080\u0026#34; Edit the SAT configuration file to specify the Keycloak user which will be accessing the REST API.\n[api_gateway] username = \u0026#34;user\u0026#34; Authenticate against the API gateway with sat auth.\nFor more information, see SAT Authentication.\nGenerate S3 credentials.\nFor more information, see Generate SAT S3 Credentials.\n" +}, +{ + "uri": "/docs-sat/en-24/usage/", + "title": "SAT Usage", + "tags": [], + "description": "", + "content": "SAT Usage SAT Bootprep Change the BOS Version " +}, +{ + "uri": "/docs-sat/en-24/usage/change_bos_version/", + "title": "Change the BOS Version", + "tags": [], + "description": "", + "content": "Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version one. You can select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command.\nYou can also configure the BOS version to use in the SAT config file. Do this under the api_version setting in the bos section of the config file. If the system is using an existing SAT config file from an older version of SAT, the bos section might not exist. In that case, add the bos section with the BOS version desired in the api_version setting.\nFind the SAT config file at ~/.config/sat/sat.toml, and look for a section like this:\n[bos] api_version = \u0026#34;v1\u0026#34; In this example, SAT is using BOS version \u0026quot;v1\u0026quot;.\nChange the line specifying the api_version to the BOS version desired (for example, \u0026quot;v2\u0026quot;).\n[bos] api_version = \u0026#34;v2\u0026#34; If applicable, uncomment the api_version line.\nIf the system is using an existing SAT config file from a recent version of SAT, the api_version line might be commented out like this:\n[bos] # api_version = \u0026#34;v2\u0026#34; If the line is commented out, SAT will still use the default BOS version. To ensure a different BOS version is used, uncomment the api_version line by removing # at the beginning of the line.\n" +}, +{ + "uri": "/docs-sat/en-24/introduction/", + "title": "Introduction to SAT", + "tags": [], + "description": "", + "content": "Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components.\nSAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.\nSix Kibana Dashboards are included with SAT. They provide organized output for system health information.\nAER Kibana Dashboard ATOM Kibana Dashboard Heartbeat Kibana Dashboard Kernel Kibana Dashboard MCE Kibana Dashboard RAS Daemon Kibana Dashboard Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and are reported through Redfish.\nGrafana Fabric Congestion Dashboard Grafana Fabric Errors Dashboard Grafana Fabric Port State Dashboard Grafana Fabric RFC3635 Dashboard In CSM 1.3 and newer, the sat command is automatically available on all the Kubernetes NCNs. For more information, see SAT in CSM. Older versions of CSM do not have the sat command automatically available, and SAT must be installed as a separate product.\nSystem Admin Toolkit Command Overview Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides instruction on the SAT Container Environment.\nSAT Command Line Utility The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes (ncn-m nodes).\nIt is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are similarities between SAT commands and xt commands used on the Cray XC platform.\nSAT Commands The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each have their own set of options.\nSAT Container Environment The sat command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on Kubernetes manager nodes. A few important points about the SAT container environment include the following:\nUsing either sat or sat bash always launches a container. The SAT container does not have access to the NCN file system. There are two ways to run sat.\nInteractive: Launching a container using sat bash, followed by a sat command. Non-interactive: Running a sat command directly on a Kubernetes manager node. In both of these cases, a container is launched in the background to execute the command. The first option, running sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the container is launched, executes the command, and upon the command\u0026rsquo;s completion the container exits. The following two examples show the same action, checking the system status, using interactive and non-interactive modes.\nInteractive ncn-m001# sat bash (CONTAINER-ID)sat-container# sat status Non-interactive ncn-m001# sat status Interactive Advantages Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral container storage. If multiple sat commands are being run in succession, then use sat bash to launch the container beforehand. This will save time because the container does not need to be launched for each sat command.\nNon-interactive Advantages The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of several steps that need to be executed from a management NCN.\nMan Pages - Interactive and Non-interactive Modes To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following example.\nncn-m001# sat-man status A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed either with man sat or man sat-podman from the manager node.\nncn-m001# man sat ncn-m001# man sat-podman Command Prompt Conventions in SAT The host name in a command prompt indicates where the command must be run. The account that must run the command is also indicated in the prompt.\nThe root or super-user account always has the # character at the end of the prompt and has the host name of the host in the prompt. Any non-root account is indicated with account@hostname\u0026gt;. A user account that is neither root nor crayadm is referred to as user. The command prompt inside the SAT container environment is indicated with the string as follows. It also has the \u0026ldquo;#\u0026rdquo; character at the end of the prompt. Command Prompt Meaning ncn-m001# Run on one of the Kubernetes Manager servers. (Non-interactive) (CONTAINER_ID) sat-container# Run the command inside the SAT container environment by first running sat bash. (Interactive) Examples of the sat status command used by an administrator:\nncn-m001# sat status ncn-m001# sat bash (CONTAINER_ID) sat-container# sat status SAT in CSM In CSM 1.3 and newer, the sat command is automatically available on all the Kubernetes NCNs, but it is still possible to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the sat command available in CSM. Installing the SAT product stream allows additional supporting components to be added:\nAn entry for SAT in the cray-product-catalog Kubernetes ConfigMap is only created by installing the SAT product stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev.\nThe sat-install-utility container image is only available with the full SAT product stream. This container image provides uninstall and activate functionality when used with the prodmgr command. (In SAT 2.3 and older, SAT was only available to install as a separate product stream. Because these versions were packaged with sat-install-utility, it is still possible to uninstall these versions of SAT.)\nThe docs-sat RPM package is only available with the full SAT product stream.\nThe sat-config-management git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is only available with the full SAT product stream.\nIf the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS configurations that apply to NCNs (for example, ncn-personalization) should not include a SAT layer.\nThe SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the Keycloak username that authenticates to the API gateway cannot be read by users other than root. Specifically, it it does the following:\nModifies the sat.toml configuration file which contains the username so that it is only readable by root.\nModifies the /root/.config/sat/tokens directory so that the directory is only readable by root. This is needed because the names of the files within the tokens directory contain the username.\nRegardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other users. These permission changes only apply to files created by previous installations of SAT. In the current version of SAT all files and directories are created with the appropriate permissions.\nSAT Dependencies Most sat subcommands depend on services or components from other products in the HPE Cray EX (Shasta) software stack. The following list shows these dependencies for each subcommand. Each service or component is listed under the product it belongs to.\nsat auth CSM Keycloak sat bmccreds CSM System Configuration Service (SCSD) sat bootprep CSM Boot Orchestration Service (BOS) Configuration Framework Service (CFS) Image Management Service (IMS) Version Control Service (VCS) Kubernetes S3 sat bootsys CSM Boot Orchestration Service (BOS) Cray Advanced Platform Monitoring and Control (CAPMC) Ceph Compute Rolling Upgrade Service (CRUS) Etcd Firmware Action Service (FAS) Hardware State Manager (HSM) Kubernetes S3 COS Node Memory Dump (NMD) sat diag CSM Hardware State Manager (HSM) CSM-Diags Fox sat firmware CSM Firmware Action Service (FAS) sat hwhist CSM Hardware State Manager (HSM) sat hwinv CSM Hardware State Manager (HSM) sat hwmatch CSM Hardware State Manager (HSM) sat init None\nsat k8s CSM Kubernetes sat nid2xname CSM Hardware State Manager (HSM) sat sensors CSM Hardware State Manager (HSM) HM Collector SMA Telemetry API sat setrev CSM S3 sat showrev CSM Hardware State Manager (HSM) Kubernetes S3 sat slscheck CSM Hardware State Manager (HSM) Kubernetes S3 sat status CSM Hardware State Manager (HSM) sat swap Slingshot Fabric Manager sat switch Deprecated: See sat swap\nsat xname2nid CSM Hardware State Manager (HSM) " +}, +{ + "uri": "/docs-sat/en-24/dashboards/sat_grafana_dashboards/", + "title": "SAT Grafana Dashboards", + "tags": [], + "description": "", + "content": "SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity.\nGrafana can be accessed via web browser at the following URL:\nhttps://sma-grafana.cmn.\u0026lt;site-domain\u0026gt; The value of site-domain can be obtained as follows:\nncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=\u0026#39;{.data.customizations\\.yaml}\u0026#39; | \\ base64 -d | grep \u0026#34;external:\u0026#34; That command will produce the following output, for example:\nexternal: EXAMPLE_DOMAIN.com This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com\nFor more information on accessing the Grafana Dashboards, refer to Access the Grafana Monitoring UI in the SMA product documentation.\nFor more information on the interpretation of metrics for the SAT Grafana Dashboards, refer to \u0026ldquo;Fabric Telemetry Kafka Topics\u0026rdquo; in the SMA product documentation.\nNavigate SAT Grafana Dashboards There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display telemetry in a tabular format.\nDashboard Name Display Type Fabric Congestion Chart Panels Fabric RFC3635 Chart Panels Fabric Errors Tabular Format Fabric Port State Tabular Format The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location during the time range selected, if any. The interval setting is not used for tabular dashboards.\nSAT Grafana Interval and Locations Options Shows the Interval and Locations Options for the available telemetry.\nThe value of the Interval option sets the time resolution of the received telemetry. This works a bit like a histogram, with the available telemetry in an interval of time going into a \u0026ldquo;bucket\u0026rdquo; and averaging out to a single point on the chart or table. The special value auto will choose an interval based on the time range selected.\nFor more information, refer to Grafana Templates and Variables.\nThe Locations option allows restriction of the telemetry shown by locations, either individual links or all links in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, which always has entries for all links and switches, although the errors shown are restricted to the selected time range.\nThe chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart\u0026rsquo;s legend or the trace on the chart.\nGrafana Fabric Congestion Dashboard SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in the system and assess the past and present health of the high-speed network. It also allows the ability to drill down to view data for specific ports on specific switches.\nThis dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, local, and global and correspond to the link\u0026rsquo;s relationship to the network topology. The locations presented in the panels are restricted to the values (any combination, defaults to \u0026ldquo;all\u0026rdquo;) selected.\nThe metric values for links of a given port type are similar in value to each other but very distinct from the values of other types. If the values for different port types are all plotted together, the values for links with lower values are indistinguishable from zero when plotted.\nThe port type of a link is reported as a port state \u0026ldquo;subtype\u0026rdquo; event when defined at port initialization.\nGrafana Fabric Errors Dashboard This dashboard reports error counters in a tabular format in three panels.\nThere is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nUnlike other dashboards, the locations presented are all locations in the system rather than having telemetry within the time range selected. However, the values are taken from telemetry within the time range.\nGrafana Fabric Port State Dashboard There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nThe Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours results in all states for all links in the system being shown.\nThe three columns named, group, switch, and port are not port state events, but extra information included with all port state events.\nGrafana Fabric RFC3635 Dashboard For more information on performance counters, refer to Definitions of Managed Objects for the Ethernet-like Interface Types, an Internet standards document.\nBecause these metrics are counters that only increase over time, the values plotted are the change in the counter\u0026rsquo;s value over the interval setting.\n" +}, +{ + "uri": "/docs-sat/en-24/cne_install/", + "title": "SAT Upgrade with CNE Installer", + "tags": [], + "description": "", + "content": "SAT Upgrade with CNE Installer Upgrade the System Admin Toolkit Product Stream Describes how to upgrade the System Admin Toolkit (SAT) product stream by using the Compute Node Environment (CNE) installer (cne-install). The CNE installer can be used only for upgrades and not for fresh installations. For installation instructions, see Install the System Admin Toolkit Product Stream.\nUpgrading SAT with cne-install is recommended because the process is both automated and logged to help you save time. The CNE installer can be used to upgrade SAT alone or with other supported products. For more information on cne-install and its options, refer to the HPE Cray EX System Software Getting Started Guide (S-8000).\nPrerequisites CSM is installed and verified. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace x.y.z with the version of the SAT product stream being upgraded. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. Pre-Upgrade Procedure Start a typescript and set the shell prompt.\nThe typescript will record the commands and the output from this upgrade. The prompt is set to include the date and time.\nncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt ncn-m001# export PS1=\u0026#39;\\u@\\H \\D{%Y-%m-%d} \\t \\w # \u0026#39; Upgrade Procedure Copy the release distribution gzipped tar file to ncn-m001.\nThe cne-install command installs all files in the media directory by default. If you are upgrading SAT alone, ensure only the SAT tarball is in the media directory.\nRun the CNE installer.\nIf you are upgrading SAT along with other supported products, run the following command.\nncn-m001# cne-install -m MEDIA_DIR install -B WORKING_BRANCH -bpc BOOTPREP_CONFIG_CN \\ -bpn BOOTPREP_CONFIG_NCN The cne-install command will use the provided BOOTPREP_CONFIG_CN and BOOTPREP_CONFIG_NCN files for the run.\nIf you are upgrading SAT alone, run the following commands.\nncn-m001# cne-install -m MEDIA_DIR install -B \u0026#39;{{product_type}}-{{version_x_y_z}}\u0026#39; \\ -bpn BOOTPREP_CONFIG_NCN -e update_working_branches ncn-m001# cne-install -m MEDIA_DIR install -B \u0026#39;{{product_type}}-{{version_x_y_z}}\u0026#39; \\ -bpn BOOTPREP_CONFIG_NCN -b sat_bootprep_ncn -e ncn_personalization Optional: Stop the typescript.\nNOTE: This step can be skipped if you wish to use the same typescript for the remainder of the SAT upgrade (see Next Steps).\nncn-m001# exit SAT version x.y.z is now upgraded, meaning the SAT x.y.z release has been loaded into the system software repository.\nSAT configuration content for this release has been uploaded to VCS. SAT content for this release has been uploaded to the CSM product catalog. SAT content for this release has been uploaded to Nexus repositories. The sat command is available. Next Steps At this point, the release distribution files can be removed from the system as described in Post-Upgrade Cleanup Procedure.\nIf other HPE Cray EX software products are being upgraded in conjunction with SAT, refer to the HPE Cray EX System Software Getting Started Guide (S-8000) to determine which step to execute next.\nIf no other HPE Cray EX software products are being upgraded at this time, execute the SAT Post-Upgrade procedures:\nRemove obsolete configuration file sections SAT Logging Post-Upgrade Cleanup Procedure Optional: Remove the SAT release distribution tar file and extracted directory.\nncn-m001# rm sat-x.y.z.tar.gz ncn-m001# rm -rf sat-x.y.z/ Remove Obsolete Configuration File Sections Prerequisites The Upgrade the System Admin Toolkit Product Stream procedure has been successfully completed. Procedure After upgrading SAT, if using the configuration file from a previous version, there may be configuration file sections no longer used in the new version. For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. In that case, the following warning may appear upon running sat commands.\nWARNING: Ignoring unknown section \u0026#39;redfish\u0026#39; in config file. Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.\n[redfish] username = \u0026#34;admin\u0026#34; password = \u0026#34;adminpass\u0026#34; Repeat this process for any configuration file sections for which there are \u0026ldquo;unknown section\u0026rdquo; warnings.\nSAT Logging As of SAT version 2.2, some command output that was previously printed to stdout is now logged to stderr. These messages are logged at the INFO level. The default logging threshold was changed from WARNING to INFO to accommodate this logging change. Additionally, some messages previously logged at the INFO are now logged at the DEBUG level.\nThese changes take effect automatically. However, if the default output threshold has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure that important output is shown in the terminal.\nUpdate Configuration In the following example, the stderr log level, logging.stderr_level, is set to WARNING, which will exclude INFO-level logging from terminal output.\nncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml [logging] ... stderr_level = \u0026#34;WARNING\u0026#34; To enable the new default behavior, comment this line out, delete it, or set the value to \u0026ldquo;INFO\u0026rdquo;.\nIf logging.stderr_level is commented out, its value will not affect logging behavior. However, it may be helpful set its value to INFO as a reminder of the new default behavior.\nAffected Commands The following commands trigger messages that have been changed from stdout print calls to INFO-level (or WARNING- or ERROR-level) log messages:\nsat bootsys --stage shutdown --stage session-checks sat sensors The following commands trigger messages that have been changed from INFO-level log messages to DEBUG-level log messages:\nsat nid2xname sat xname2nid sat swap " +}, +{ + "uri": "/docs-sat/en-24/usage/sat_bootprep/", + "title": "SAT Bootprep", + "tags": [], + "description": "", + "content": "SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates based on a given input file which defines how those configurations, images, and session templates should be created.\nThis automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similarly to other SAT commands.\nncn-m001# sat-man sat-bootprep SAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session.\nsat bootsys automates several portions of the boot and shutdown processes, including (but not limited to) performing BOS operations (such as creating BOS sessions), powering on and off cabinets, and checking the state of the system prior to shutdown.\nEditing a Bootprep Input File The input file provided to sat bootprep is a YAML-formatted file containing information which CFS, IMS, and BOS use to create configurations, images, and BOS session templates respectively. Writing and modifying these input files is the main task associated with using sat bootprep. An input file is composed of three main sections, one each for configurations, images, and session templates. These sections may be specified in any order, and any of the sections may be omitted if desired.\nProviding a Schema Version The sat bootprep input file is validated against a versioned schema definition. The input file should specify the version of the schema with which it is compatible under a schema_version key. For example:\n--- schema_version: 1.0.2 The current sat bootprep input file schema version can be viewed with the following command:\nncn-m001# sat bootprep view-schema | grep \u0026#39;^version:\u0026#39; version: \u0026#39;1.0.2\u0026#39; The sat bootprep run command validates the schema version specified in the input file. The command also makes sure that the schema version of the input file is compatible with the schema version understood by the current version of sat bootprep. For more information on schema version validation, refer to the schema_version property description in the bootprep input file schema. For more information on viewing the bootprep input file schema in either raw form or user-friendly HTML form, see Viewing the Exact Schema Specification or Generating User-Friendly Documentation.\nThe default sat bootprep input files provided by the hpc-csm-software-recipe release distribution already contain the correct schema version.\nDefining CFS Configurations The CFS configurations are defined under a configurations key. Under this key, you can list one or more configurations to create. For each configuration, give a name in addition to the list of layers that comprise the configuration.\nEach layer can be defined by a product name and optionally a version number, commit hash, or branch in the product\u0026rsquo;s configuration repository. If this method is used, the layer is created in CFS by looking up relevant configuration information (including the configuration repository and commit information) from the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be supplied. However, if it is absent, the version is assumed to be the latest version found in the cray-product-catalog.\nAlternatively, a configuration layer can be defined by explicitly referencing the desired configuration repository. You must then specify the intended version of the Ansible playbooks by providing a branch name or commit hash with branch or commit.\nThe following example shows a CFS configuration with two layers. The first layer is defined in terms of a product name and version, and the second layer is defined in terms of a Git clone URL and branch:\n--- configurations: - name: example-configuration layers: - name: example-product playbook: example.yml product: name: example version: 1.2.3 - name: another-example-product playbook: another-example.yml git: url: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34; branch: main When sat bootprep is run against an input file, a CFS configuration is created corresponding to each configuration in the configurations section. For example, the configuration created from an input file with the layers listed above might look something like the following:\n{ \u0026#34;lastUpdated\u0026#34;: \u0026#34;2022-02-07T21:47:49Z\u0026#34;, \u0026#34;layers\u0026#34;: [ { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;example.yml\u0026#34; }, { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;another example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;another-example.yml\u0026#34; } ], \u0026#34;name\u0026#34;: \u0026#34;example-configuration\u0026#34; } Defining IMS Images The IMS images are defined under an images key. Under the images key, the user may define one or more images to be created in a list. Each element of the list defines a separate IMS image to be built and/or configured. Images must contain a name key and a base key.\nThe name key defines the name of the resulting IMS image. The base key defines the base image to be configured or the base recipe to be built and optionally configured. One of the following keys must be present under the base key:\nUse an ims key to specify an existing image or recipe in IMS. Use a product key to specify an image or recipe provided by a particular version of a product. Note that this is only possible if the product provides a single image or recipe. Use an image_ref key to specify another image from the input file using its ref_name. Images may also contain the following keys:\nUse a configuration key to specify a CFS configuration with which to customize the built image. If a configuration is specified, then configuration groups must also be specified using the configuration_group_names key. Use a ref_name key to specify a unique name that can refer to this image within the input file in other images or in session templates. The ref_name key allows references to images from the input file that have dynamically generated names as described in Dynamic Variable Substitutions. Use a description key to describe the image in the bootprep input file. Note that this key is not currently used. Here is an example of an image using an existing IMS recipe as its base. This example builds an IMS image from that recipe. It then configures it with a CFS configuration named example-compute-config. The example-compute-config CFS configuration can be defined under the configurations key in the same input file, or it can be an existing CFS configuration. Running sat bootprep against this input file results in an image named example-compute-image.\nimages: - name: example-compute-image description: \u0026gt; An example compute node image built from an existing IMS recipe. base: ims: name: example-compute-image-recipe type: recipe configuration: example-compute-config configuration_group_names: - Compute Here is an example showing the definition of two images. The first image is built from a recipe provided by the cos product. The second image uses the first image as a base and configures it with a configuration named example-compute-config. The value of the first image\u0026rsquo;s ref_name key is used in the second image\u0026rsquo;s base.image_ref key to specify it as a dependency. Running sat bootprep against this input file results in two images, the first named example-cos-image and the second named example-compute-image.\nimages: - name: example-cos-image ref_name: example-cos-image description: \u0026gt; An example image built from a recipe provided by the COS product. base: product: name: cos version: 2.3.101 type: recipe - name: example-compute-image description: \u0026gt; An example image built from a recipe provided by the COS product. base: image_ref: example-cos-image configuration: example-compute-config configuration_group_names: - Compute Defining BOS Session Templates The BOS session templates are defined under the session_templates key. Each session template must provide values for the name, image, configuration, and bos_parameters keys. The name key defines the name of the resulting BOS session template. The image key defines the image to use in the BOS session template. One of the following keys must be present under the image key:\nUse an ims key to specify an existing image or recipe in IMS. Use an image_ref key to specify another image from the input file using its ref_name. The configuration key defines the CFS configuration specified in the BOS session template.\nThe bos_parameters key defines parameters that are passed through directly to the BOS session template. The bos_parameters key should contain a boot_sets key, and each boot set in the session template should be specified under boot_sets. Each boot set can contain the following keys, all of which are optional:\nUse a kernel_parameters key to specify the parameters passed to the kernel on the command line. Use a network key to specify the network over which the nodes boot. Use a node_list key to specify the nodes to add to the boot set. Use a node_roles_groups key to specify the HSM roles to add to the boot set. Use a node_groups key to specify the HSM groups to add to the boot set. Use a rootfs_provider key to specify the root file system provider. Use a rootfs_provider_passthrough key to specify the parameters to add to the rootfs= kernel parameter. As mentioned above, the parameters under bos_parameters are passed through directly to BOS. For more information on the properties of a BOS boot set, refer to BOS Session Templates in the Cray System Management Documentation.\nHere is an example of a BOS session template that refers to an existing IMS image by name:\nsession_templates: - name: example-session-template image: ims: name: example-image configuration: example-configuration bos_parameters: boot_sets: example_boot_set: kernel_parameters: ip=dhcp quiet node_roles_groups: - Compute rootfs_provider: cpss3 rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0 Here is an example of a BOS session template that refers to an image from the input file by its ref_name. This requires that an image defined in the input file specifies example-image as the value of its ref_name key.\nsession_templates: - name: example-session-template image: image_ref: example-image configuration: example-configuration bos_parameters: boot_sets: example_boot_set: kernel_parameters: ip=dhcp quiet node_roles_groups: - Compute rootfs_provider: cpss3 rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0 HPC CSM Software Recipe Variable Substitutions The HPC CSM Software Recipe provides a manifest defining the versions of each HPC software product included in the recipe. These product versions can be used in the sat bootprep input file with Jinja2 template syntax.\nSelecting an HPC CSM Software Recipe Version By default, the sat bootprep command uses the product versions from the latest installed version of the HPC CSM Software Recipe. However, you can override this with the --recipe-version command line argument to sat bootprep run.\nFor example, to explicitly select the 22.11.0 version of the HPC CSM Software Recipe, specify --recipe-version 22.11.0:\nncn-m001# sat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml Values Supporting Jinja2 Template Rendering The entire sat bootprep input file is not rendered by the Jinja2 template engine. Jinja2 template rendering of the input file is performed individually for each supported value. The values of the following keys support rendering as a Jinja2 template:\nThe name key of each configuration under the configurations key. The following keys of each layer under the layers key in a configuration: name git.branch product.version product.branch The following keys of each image under the images key: name base.product.version configuration The following keys of each session template under the session_templates key: name configuration You can use Jinja2 built-in filters in values of any of the keys listed above. In addition, Python string methods can be called on the string variables.\nViewing HPC CSM Software Recipe Variables HPC CSM Software Recipe variables are available, and you can use them in the values of the keys listed above. View these variables by cloning the hpc-csm-software-recipe repository from VCS and accessing the product_vars.yaml file on the branch that corresponds to the targeted version of the HPC CSM Software Recipe.\nSet up a shell script to access the password for the crayvcs user:\nncn-m001# cat \u0026gt; vcs-creds-helper.sh \u0026lt;\u0026lt;EOF #!/bin/bash kubectl get secret -n services vcs-user-credentials -o jsonpath={.data.vcs_password} | base64 -d EOF Ensure vcs-creds-helper.sh is executable:\nncn-m001# chmod u+x vcs-creds-helper.sh Set the GIT_ASKPASS environment variable to the path to the vcs-creds-helper.sh script:\nncn-m001# export GIT_ASKPASS=\u0026#34;$PWD/vcs-creds-helper.sh\u0026#34; Clone the hpc-csm-software-recipe repository:\nncn-m001# git clone https://crayvcs@api-gw-service-nmn.local/vcs/cray/hpc-csm-software-recipe.git Change the directory to the hpc-csm-software-recipe repository:\nncn-m001# cd hpc-csm-software-recipe View the versions of the HPC CSM Software Recipe on the system:\nncn-m001# git branch -r Check out the branch of the hpc-csm-software-recipe repository that corresponds to the targeted HPC CSM Software Recipe version. For example, for recipe version 22.11.0:\nncn-m001# git checkout cray/hpc-csm-software-recipe/22.11.0 View the contents of the file product_vars.yaml in the clone of the repository:\nncn-m001# cat product_vars.yaml The variables defined in the product_vars.yaml file can be used in the values that support Jinja2 templates. A variable is specified by a dot-separated path, with each component of the path representing a key in the YAML file. For example, a version of the COS product appears as follows in the product_vars.yaml file:\ncos: version: 2.4.76 This COS version can be used by specifying cos.version within a value in the input file.\nHPC CSM Software Recipe Variable Substitution Example The following example bootprep input file shows how a COS version can be used in a bootprep input file that creates a CFS configuration for computes. Only one layer is shown for brevity.\n--- configurations: - name: compute-{{recipe.version}} layers: - name: cos-compute-integration-{{cos.version}} playbook: cos-compute.yaml product: name: cos version: \u0026#34;{{cos.version}}\u0026#34; branch: integration-{{cos.version}} Note: When the value of a key in the bootprep input file is a Jinja2 expression, it must be quoted to pass YAML syntax checking.\nJinja2 expressions can also use filters and Python\u0026rsquo;s built-in string methods to manipulate the variable values. For example, suppose only the major and minor components of a COS version are to be used in the branch name for the COS layer of the CFS configuration. You can use the split string method to achieve this as follows:\n--- configurations: - name: compute-{{recipe.version}} layers: - name: cos-compute-integration-{{cos.version}} playbook: cos-compute.yaml product: name: cos version: \u0026#34;{{cos.version}}\u0026#34; branch: integration-{{cos.version.split(\u0026#39;.\u0026#39;)[0]}}-{{cos.version.split(\u0026#39;.\u0026#39;)[1]}} Dynamic Variable Substitutions Additional variables are available besides the product version variables provided by the HPC CSM Software Recipe. (For more information, see HPC CSM Software Recipe Variable Substitutions.) These additional variables are dynamic because their values are determined at run-time based on the context in which they appear. Available dynamic variables include the following:\nThe variable base.name can be used in the name of an image under the images key. The value of this variable is the name of the IMS image or recipe used as the base of this image. The variable image.name can be used in the name of a session template under the session_templates key. The value of this variable is the name of the IMS image used in this session template. These variables reduce the need to duplicate values throughout the sat bootprep input file and make the following use cases possible:\nYou want to build an image from a recipe provided by a product and use the name of the recipe in the name of the resulting image. You want to use the name of the image in the name of a session template, and the image is generated as described in the previous use case. Example Bootprep Input Files This section provides an example bootprep input file. It also gives instructions for obtaining the default bootprep input files delivered with a release of the HPC CSM Software Recipe.\nExample Bootprep Input File The following bootprep input file provides an example of using most of the features described in previous sections. It is not intended to be a complete bootprep file for the entire CSM product.\n--- configurations: - name: compute-{{recipe.version}} layers: - name: cos-compute-integration-{{cos.version}} playbook: site.yml product: name: cos version: \u0026#34;{{cos.version}}\u0026#34; branch: integration-{{cos.version}} - name: cpe-pe_deploy-integration-{{cpe.version}} playbook: pe_deploy.yml product: name: cpe version: \u0026#34;{{cpe.version}}\u0026#34; branch: integration-{{cpe.version}} images: - name: \u0026#34;{{base.name}}\u0026#34; ref_name: base_cos_image base: product: name: cos type: recipe version: \u0026#34;{{cos.version}}\u0026#34; - name: compute-{{base.name}} ref_name: compute_image base: image_ref: base_cos_image configuration: compute-{{recipe.version}} configuration_group_names: - Compute session_templates: - name: compute-{{recipe.version}} image: image_ref: compute_image configuration: compute-{{recipe.version}} bos_parameters: boot_sets: compute: kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN} node_roles_groups: - Compute rootfs_provider_passthrough: \u0026#34;dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0\u0026#34; Accessing Default Bootprep Input Files Default bootprep input files are delivered by the HPC CSM Software Recipe product. You can access these files by cloning the hpc-csm-software-recipe repository.\nTo do this, follow steps 1-7 of the procedure in Viewing HPC CSM Software Recipe Variables. Then, access the files in the bootprep directory of that repository:\nncn-m001# ls bootprep/ Generating an Example Bootprep Input File The sat bootprep generate-example command was not updated for recent bootprep schema changes. It is recommended that you instead use the default bootprep input files described in Accessing Default Bootprep Input Files. The sat bootprep generate-example command will be updated in a future release of SAT.\nEditing HPC CSM Software Recipe Defaults You might need to edit the default bootprep input files delivered by the HPC CSM Software Recipe for your system. Here are some examples of how to edit the files.\nEditing Default Branch Names Before running sat bootprep, HPE recommends reading the bootprep input files and paying specific attention to the branch parameters. Some HPE Cray EX products require system-specific changes on a working branch of VCS. For these products, the default bootprep input files assume certain naming conventions for the VCS branches. The files refer to a particular branch of a product\u0026rsquo;s configuration management repository.\nThus, it is important to confirm that the bootprep input files delivered by the HPC CSM Software Recipe match the actual system branch names. For example, the COS product\u0026rsquo;s CFS configuration layer is defined as follows in the default management-bootprep.yaml bootprep input file.\n- name: cos-ncn-integration-{{cos.version}} playbook: ncn.yml product: name: cos version: \u0026#34;{{cos.version}}\u0026#34; branch: integration-{{cos.version}} The default file is assuming that system-specific Ansible configuration changes for the COS product in VCS are stored in a branch named integration-{{cos.version}}. If the version being installed is COS 2.4.99, sat bootprep looks for a branch named integration-2.4.99 from which to create CFS configuration layers.\nYou can create VCS working branches that are not the default bootprep input file branch names. A simple example of this is using cne-install to update working VCS branches. If you use cne-install to update working VCS branches, (namely in the update_working_branches stage), you create or update the branches specified by the -B WORKING_BRANCH command line option. For example, consider the following cne-install command.\nncn-m001# ./cne-install install \\ -B integration \\ -s deploy_products \\ -e update_working_branches Products installed with this cne-install example use the working branch integration for system-specific changes to VCS. The branch specified by the -B option must match the branch specified in the bootprep input file.\nIn another example, to use the branch integration for COS instead of integration-{{cos.version}}, edit the bootprep input file so it reads as follows.\n- name: cos-ncn-integration-{{cos.version}} playbook: ncn.yml product: name: cos version: \u0026#34;{{cos.version}}\u0026#34; branch: integration Editing Default Management CFS Configuration Names The default bootprep input file for management CFS configurations (management-bootprep.yaml) creates configurations that have names specified within the input file. For example, in the bootprep input files included in the 22.11 HPC CSM Software Recipe, the following configurations are named:\nncn-personalization ncn-image-customization These default management CFS configuration names might be acceptable for your system. However, it is possible to create other names. sat bootprep creates whatever configurations are specified in the input file. For example, to create a NCN node personalization configuration named ncn-personalization-test, edit the file as follows.\nconfigurations: - name: ncn-personalization-test layers: ... For management configurations, use sat status to identify the current desired configuration for each of the management nodes.\nncn-m001# sat status --fields xname,role,subrole,desiredconfig --filter role=management +----------------+------------+---------+---------------------+ | xname | Role | SubRole | Desired Config | +----------------+------------+---------+---------------------+ | x3000c0s1b0n0 | Management | Master | ncn-personalization | | x3000c0s3b0n0 | Management | Master | ncn-personalization | | x3000c0s5b0n0 | Management | Master | ncn-personalization | | x3000c0s7b0n0 | Management | Worker | ncn-personalization | | x3000c0s9b0n0 | Management | Worker | ncn-personalization | | x3000c0s11b0n0 | Management | Worker | ncn-personalization | | x3000c0s13b0n0 | Management | Worker | ncn-personalization | | x3000c0s17b0n0 | Management | Storage | ncn-personalization | | x3000c0s19b0n0 | Management | Storage | ncn-personalization | | x3000c0s21b0n0 | Management | Storage | ncn-personalization | | x3000c0s25b0n0 | Management | Worker | ncn-personalization | +----------------+------------+---------+---------------------+ To overwrite the desired configuration using sat bootprep, ensure the bootprep input file specifies to create a configuration with the same name (ncn-personalization in the example above). To create a different configuration, ensure the bootprep input file specifies to create a configuration with a different name than the desired configuration (different than ncn-personalization in the example above).\nUpgrading a Single Product and Overriding its Default Version When working with a given HPC CSM Software Recipe, it might be necessary to upgrade a single HPE Cray EX product past the default version given in the recipe. However, you might still want to use the other default product versions contained in that recipe. To do this, first upgrade the single product. For more information, refer to the upgrade instructions in that product\u0026rsquo;s documentation.\nAfter the product is upgraded, you must override its default version in subsequent runs of sat bootprep. The following process explains how to do this. In this example, all the default product versions from the 22.11 software recipe are used except for COS. The COS default product version is overridden to version 2.4.199 instead, and the CFS configurations in management-bootprep.yaml are created.\nEnsure you have a local copy of the default bootprep input files.\nFor more information, see Accessing Default Bootprep Input Files.\nEdit the product_vars.yaml file to change the default product version.\nncn-m001# vim product_vars.yaml Confirm the new product version in the edited product_vars.yaml file.\nncn-m001# grep -A1 cos: `product_vars.yaml`: cos: version: 2.4.199 Use the --vars-file option when running sat bootprep to override the default product version.\nYou must run this command from the directory containing the product_vars.yaml file. The product_vars.yaml file must also be specified when using the --vars-file option. It is not sufficient to just edit the file.\nncn-m001# sat bootprep run --vars-file product_vars.yaml bootprep/management-bootprep.yaml Note: This example is specific to creating the configurations defined in management-bootprep.yaml. Review what configurations, images, or session templates you intend to create by viewing the input file.\nViewing Built-in Generated Documentation The contents of the YAML input files described above must conform to a schema which defines the structure of the data. The schema definition is written using the JSON Schema format. (Although the format is named \u0026ldquo;JSON Schema\u0026rdquo;, the schema itself is written in YAML as well.) More information, including introductory materials and a formal specification of the JSON Schema metaschema, can be found on the JSON Schema website.\nViewing the Exact Schema Specification To view the exact schema specification, run sat bootprep view-schema.\nncn-m001# sat bootprep view-schema --- $schema: \u0026#34;https://json-schema.org/draft/2020-12/schema\u0026#34; ... title: Bootprep Input File description: \u0026gt; A description of the set of CFS configurations to create, the set of IMS images to create and optionally customize with the defined CFS configurations, and the set of BOS session templates to create that reference the defined images and configurations. type: object additionalProperties: false properties: ... Generating User-Friendly Documentation The raw schema definition can be difficult to understand without experience working with JSON Schema specifications. For this reason, a feature is included that generates user-friendly HTML documentation for the input file schema. This HTML documentation can be browsed with your preferred web browser.\nCreate a documentation tarball using sat bootprep.\nncn-m001# sat bootprep generate-docs INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz An alternate output directory can be specified with the --output-dir option. The generated tarball is always named bootprep-schema-docs.tar.gz.\nncn-m001# sat bootprep generate-docs --output-dir /tmp INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz From another machine, copy the tarball to a local directory.\nanother-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz . Extract the contents of the tarball and open the contained index.html.\nanother-machine$ tar xzvf bootprep-schema-docs.tar.gz x bootprep-schema-docs/ x bootprep-schema-docs/index.html x bootprep-schema-docs/schema_doc.css x bootprep-schema-docs/schema_doc.min.js another-machine$ open bootprep-schema-docs/index.html " +}, +{ + "uri": "/docs-sat/en-24/dashboards/sat_kibana_dashboards/", + "title": "SAT Kibana Dashboards", + "tags": [], + "description": "", + "content": "SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information.\nKibana can be accessed via web browser at the following URL:\nhttps://sma-kibana.cmn.\u0026lt;site-domain\u0026gt; The value of site-domain can be obtained as follows:\nncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=\u0026#39;{.data.customizations\\.yaml}\u0026#39; | \\ base64 -d | grep \u0026#34;external:\u0026#34; That command will produce the following output, for example:\nexternal: EXAMPLE_DOMAIN.com This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com\nFor more information on accessing the Kibana Dashboards, refer to View Logs Via Kibana in the SMA product documentation.\nAdditional details about the AER, ATOM, Heartbeat, Kernel, MCE, and RAS Daemon Kibana Dashboards are included in this table.\nDashboard Short Description Long Description Kibana Visualization and Search Name sat-aer AER corrected Corrected Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-corrected Search: sat-aer-corrected sat-aer AER fatal Fatal Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-fatal Search: sat-aer-fatal sat-atom ATOM failures Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. sat-atom-failed sat-atom ATOM admindown Application Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch. sat-atom-admindown sat-heartbeat Heartbeat loss events Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. sat-heartbeat sat-kernel Kernel assertions The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. sat-kassertions sat-kernel Kernel panics The kernel panics when something is seriously wrong. The node goes down. sat-kernel-panic sat-kernel Lustre bugs (LBUGs) The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. sat-lbug sat-kernel CPU stalls CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. sat-cpu-stall sat-kernel Out of memory An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. sat-oom sat-mce MCE Machine Check Exceptions (MCE) are errors detected at the processor level. sat-mce sat-rasdaemon rasdaemon errors Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the Linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. sat-rasdaemon-error sat-rasdaemon rasdaemon messages All messages from the rasdaemon service on nodes. sat-rasdaemon Disable Search Highlighting in Kibana Dashboard By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.\nThe Kibana Dashboard should be open on your system.\nNavigate to Management\nNavigate to Advanced Settings in the Kibana section, below the Elastic search section\nScroll down to the Discover section\nChange Highlight results from on to off\nClick Save to save changes\nAER Kibana Dashboard The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors are split up into separate visualizations depending on whether they are fatal or corrected errors.\nView the AER Kibana Dashboard Go to the dashboard section.\nSelect sat-aer dashboard.\nChoose the time range of interest.\nView the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nATOM Kibana Dashboard The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health checks and application test failures. Some test failures are of possible interest even though a node is not marked admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide clues if a node otherwise fails. They might also show application problems.\nView the ATOM Kibana Dashboard HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.\nGo to the dashboard section.\nSelect sat-atom dashboard.\nChoose the time range of interest.\nView any nodes marked admindown and any ATOM test failures. These failures occur during health checks and application test failures. Test failures marked admindown are important to note. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nHeartbeat Kibana Dashboard The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss.\nView the Heartbeat Kibana Dashboard Go to the dashboard section.\nSelect sat-heartbeat dashboard.\nChoose the time range of interest.\nView the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss. View the matching log messages in the panel.\nKernel Kibana Dashboard The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using too much memory.\nView the Kernel Kibana Dashboard Go to the dashboard section.\nSelect sat-kernel dashboard.\nChoose the time range of interest.\nView the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nMCE Kibana Dashboard The MCE Dashboard displays CPU detected processor-level hardware errors.\nView the MCE Kibana Dashboard Go to the dashboard section.\nSelect sat-mce dashboard.\nChoose the time range of interest.\nView the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nRAS Daemon Kibana Dashboard The RAS Daemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. This service collects all hardware error events reported by the Linux kernel, including PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one for only messages of severity emerg or err and another for all messages from rasdaemon.\nView the RAS Daemon Kibana Dashboard Go to the dashboard section.\nSelect sat-rasdaemon dashboard.\nChoose the time range of interest.\nView the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\n" +}, +{ + "uri": "/docs-sat/en-24/release_notes/", + "title": "SAT Release Notes", + "tags": [], + "description": "", + "content": "SAT Release Notes Summary of Changes in SAT 2.4 The 2.4.13 version of the SAT product includes:\nVersion 3.19.3 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.5.5 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. Because of installation refactoring efforts, the following two components are no longer delivered with SAT:\nsat-cfs-install container image sat-cfs-install Helm chart Inclusion of SAT in CSM A version of the cray-sat container image is now included in CSM. For more information, see SAT in CSM.\nSAT Installation Improvements The SAT install.sh script no longer uses a sat-cfs-install Helm chart and container image to upload its Ansible content to the sat-config-management repository in VCS. Instead, it uses Podman to run the cf-gitea-import container directly. Some of the benefits of this change include the following:\nFewer container images that need to be managed by the SAT product Simplified SAT installation without Helm charts or Loftsman manifests Reduced SAT installation time Decoupling of cray-sat container image and cray-sat-podman package Decoupling of cray-sat Container Image and cray-sat-podman Package In older SAT releases, the sat wrapper script that was provided by the cray-sat-podman package installed on Kubernetes master NCNs included a hard-coded version of the cray-sat container image. As a result, every new version of the cray-sat image required a corresponding new version of the cray-sat-podman package.\nIn this release, this tight coupling of the cray-sat-podman package and the cray-sat container image was removed. The sat wrapper script provided by the cray-sat-podman package now looks for the version of the cray-sat container image in the /opt/cray/etc/sat/version file. This file is populated with the correct version of the cray-sat container image by the SAT layer of the CFS configuration that is applied to management NCNs. If the version file does not exist, the wrapper script defaults to the version of the cray-sat container image delivered with the latest version of CSM installed on the system.\nImproved NCN Personalization Automation The steps for performing NCN personalization as part of the SAT installation were moved out of the install.sh script and into a new update-mgmt-ncn-cfs-config.sh script that is provided in the SAT release distribution. The new script provides additional flexibility in how it modifies the NCN personalization CFS configuration for SAT. It can modify an existing CFS configuration by name, a CFS configuration being built in a JSON file, or an existing CFS configuration that applies to certain components. For more information, see Perform NCN Personalization.\nNew sat bootprep Features The following new features were added to the sat bootprep command:\nVariable substitutions using Jinja2 templates in certain fields of the sat bootprep input file\nFor more information, see HPC CSM Software Recipe Variable Substitutions and Dynamic Variable Substitutions.\nSchema version validation in the sat bootprep input files\nFor more information, see Providing a Schema Version.\nAbility to look up images and recipes provided by products\nFor more information, see Defining IMS Images.\nThe schema of the sat bootprep input files was also changed to support these new features:\nThe base recipe or image used by an image in the input file should now be specified under a base key instead of under an ims key. The old ims key is deprecated. To specify an image that depends on another image in the input file, the dependent image should specify the dependency under base.image_ref. You should no longer use the IMS name of the image on which it depends. The image used by a session template should now be specified under image.ims.name, image.ims.id, or image.image_ref. Specifying a string value directly under the image key is deprecated. For more information on defining IMS images and BOS session templates in the sat bootprep input file, see Defining IMS Images and Defining BOS Session Templates.\nAdded Blade Swap Support to sat swap The sat swap command was updated to support swapping compute and UAN blades with sat swap blade. This functionality is described in the following processes of the Cray System Management Documentation:\nAdding a Liquid-cooled blade to a System Using SAT Removing a Liquid-cooled blade from a System Using SAT Replace a Compute Blade Using SAT Swap a Compute Blade with a Different System Using SAT Support for BOS v2 A new v2 version of the Boot Orchestration Service (BOS) is available in CSM 1.3.0. SAT has added support for BOS v2. This impacts the following commands that interact with BOS:\nsat bootprep sat bootsys sat status By default, SAT uses BOS v1. However, you can choose the BOS version you want to use. For more information, see Change the BOS Version.\nAdded BOS Fields to sat status When using BOS v2, sat status outputs additional fields. These fields show the most recent BOS session, session template, booted image, and boot status for each node. An additional --bos-fields option was added to limit the output of sat status to these fields. The fields are not displayed when using BOS v1.\nOpen Source Repositories This is the first release of SAT built from open source code repositories. As a result, build infrastructure was changed to use an external Jenkins instance, and artifacts are now published to an external Artifactory instance. These changes should not impact the functionality of the SAT product in any way.\nSecurity CVE Mitigation The paramiko Python package version was updated from 2.9.2 to 2.10.1 to mitigate CVE-2022-24302. The oauthlib Python package version was updated from 3.2.0 to 3.2.1 to mitigate CVE-2022-36087. Restricted Permissions on SAT Config Files and Directories SAT stores information used to authenticate to the API gateway with Keycloak. Token files are stored in the ~/.config/sat/tokens/ directory. Those files have always had permissions appropriately set to restrict them to be readable only by the user.\nKeycloak usernames used to authenticate to the API gateway are stored in the SAT config file at /.config/sat/sat.toml. Keycloak usernames are also used in the file names of tokens stored in /.config/sat/tokens. As an additional security measure, SAT now restricts the permissions of the SAT config file to be readable and writable only by the user. It also restricts the tokens directory and the entire SAT config directory ~/.config/sat to be accessible only by the user. This prevents other users on the system from viewing Keycloak usernames used to authenticate to the API gateway.\nBug Fixes Fixed an issue where sat init did not print a message confirming a new configuration file was created. Fixed an issue where sat showrev exited with a traceback if the file /opt/cray/etc/site_info.yaml existed but was empty. This could occur if the user exited sat setrev with Ctrl-C. Fixed outdated information in the sat bootsys man page, and added a description of the command stages. Summary of Changes in SAT 2.3 The 2.3.4 version of the SAT product includes:\nVersion 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None.\nCurrent Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share.\nFiles in the current working directory must be specified using relative paths to that directory, because the current working directory is always mounted on /sat/share. Absolute paths should be avoided, and paths that are outside of $HOME or $PWD are never accessible to the container environment.\nThe home directory is still mounted on the same path inside the container as it is on the host.\nChanges to sat bootsys The following options were added to sat bootsys.\n--bos-limit --recursive The --bos-limit option passes a given limit string to a BOS session. The --recursive option specifies a slot or other higher-level component in the limit string\nChanges to sat bootprep The --delete-ims-jobs option was added to sat bootprep run. It deletes IMS jobs after sat bootprep is run. Jobs are no longer deleted by default.\nChanges to sat status sat status now includes information about nodes\u0026rsquo; CFS configuration statuses, such as desired configuration, configuration status, and error count.\nThe output of sat status now splits different component types into different report tables.\nThe following options were added to sat status.\n--hsm-fields, --sls-fields, --cfs-fields --bos-template The --hsm-fields, --sls-fields, --cfs-fields options limit the output columns according to specified CSM services.\nThe --bos-template option filters the status report according to the specified session template\u0026rsquo;s boot sets.\nCompatibility with CSM 1.2 The following components were modified to be compatible with CSM 1.2.\nsat-cfs-install container image and Helm chart sat-install-utility container image SAT product installer GPG Checking The sat-ncn Ansible role provided by sat-cfs-install was modified to enable GPG checks on packages while leaving GPG checks disabled on repository metadata.\nSecurity Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and refreshed Python dependency versions.\nBug Fixes Minor bug fixes were made in each of the repositories. For full change lists, refer to each repository’s CHANGELOG.md file.\nThe known issues listed under the SAT 2.2 release were fixed.\nSummary of Changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022.\nThis version of the SAT product included:\nVersion 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components:\nVersion 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release.\nKnown Issues in SAT 2.2 sat Command Unavailable in sat bash Shell After launching a shell within the SAT container with sat bash, the sat command will not be found. For example:\n(CONTAINER-ID) sat-container:~ # sat status bash: sat: command not found This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be prepended to the $PATH environment variable:\n(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH (CONTAINER-ID) sat-container:~ # sat status Or, the file /sat/venv/bin/activate may be sourced:\n(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate (CONTAINER-ID) sat-container:~ # sat status Tab Completion Unavailable in sat bash Shell After launching a shell within the SAT container with sat bash, tab completion for sat commands does not work.\nThis can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash:\nsource /etc/bash_completion.d/sat-completion.bash OCI Runtime Permission Error when Running sat in Root Directory sat commands will not work if the current directory is /. For example:\nncn-m001:/ # sat --help Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error To resolve, run sat in another directory.\nDuplicate Mount Error when Running sat in Config Directory sat commands will not work if the current directory is ~/.config/sat. For example:\nncn-m001:~/.config/sat # sat --help Error: /root/.config/sat: duplicate mount destination To resolve, run sat in another directory.\nNew sat Commands sat bootprep automates the creation of CFS configurations, the build and customization of IMS images, and the creation of BOS session templates. For more information, see SAT Bootprep. sat slscheck performs a check for consistency between the System Layout Service (SLS) and the Hardware State Manager (HSM). sat bmccreds provides a simple interface for interacting with the System Configuration Service (SCSD) to set BMC Redfish credentials. sat hwhist displays hardware component history by XName (location) or by its Field-Replaceable Unit ID (FRUID). This command queries the Hardware State Manager (HSM) API to obtain this information. Since the sat hwhist command supports querying for the history of a component by its FRUID, the FRUID of components has been added to the output of sat hwinv. Additional Install Automation The following automation has been added to the install script, install.sh:\nWait for the completion of the sat-config-import Kubernetes job, which is started when the sat-cfs-install Helm chart is deployed. Automate the modification of the CFS configuration, which applies to master management NCNs (for example, ncn-personalization). Changes to Product Catalog Data Schema The SAT product uploads additional information to the cray-product-catalog Kubernetes ConfigMap detailing the components it provides, including container (Docker) images, Helm charts, RPMs, and package repositories.\nThis information is used to support uninstall and activation of SAT product versions moving forward.\nSupport for Uninstall and Activation of SAT Versions Beginning with the 2.2 release, SAT now provides partial support for the uninstall and activation of the SAT product stream.\nFor more information, see Uninstall: Removing a Version of SAT and Activate: Switching Between Versions.\nImprovements to sat status A Subrole column has been added to the output of sat status. This allows you to easily differentiate between master, worker, and storage nodes in the management role, for example.\nHostname information from SLS has been added to sat status output.\nAdded Support for JSON Output Support for JSON-formatted output has been added to commands which currently support the --format option, such as hwinv, status, and showrev.\nUsability Improvements Many usability improvements have been made to multiple sat commands, mostly related to filtering command output. The following are some highlights:\nAdded --fields option to display only specific fields for subcommands which display tabular reports. Added ability to filter on exact matches of a field name. Improved handling of multiple matches of a field name in --filter queries so that the first match is used, similar to --sort-by. Added support for --filter, --fields, and --reverse for summaries displayed by sat hwinv. Added borders to summary tables generated by sat hwinv. Improved documentation in the man pages. Default Log Level Changed The default log level for stderr has been changed from \u0026ldquo;WARNING\u0026rdquo; to \u0026ldquo;INFO\u0026rdquo;. For more information, see SAT Logging.\nMore Granular Log Level Configuration Options With the command-line options --loglevel-stderr and --loglevel-file, the log level can now be configured separately for stderr and the log file.\nThe existing --loglevel option is now an alias for the --loglevel-stderr option.\nPodman Wrapper Script Improvements The Podman wrapper script is the script installed at /usr/bin/sat on the master management NCNs by the cray-sat-podman RPM that runs the cray-sat container in podman. The following subsections detail improvements that were made to the wrapper script in this release.\nMounting of $HOME and Current Directories in cray-sat Container The Podman wrapper script that launches the cray-sat container with podman has been modified to mount the user\u0026rsquo;s current directory and home directory into the cray-sat container to provide access to local files in the container.\nPodman Wrapper Script Documentation Improvements The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:\nEnvironment variables that affect execution of the wrapper script Host files and directories mounted in the container Fixes to Podman Wrapper Script Output Redirection Fixed issues with redirecting stdout and stderr, and piping output to commands, such as awk, less, and more.\nConfigurable HTTP Timeout A new sat option has been added to configure the HTTP timeout length for requests to the API gateway. For more information, refer to sat-man sat.\nsat bootsys Improvements Many improvements and fixes have been made to sat bootsys. The following are some highlights:\nAdded the --excluded-ncns option, which can be used to omit NCNs from the platform-services and ncn-power stages in case they are inaccessible. Disruptive shutdown stages in sat bootsys shutdown now prompt the user to continue before proceeding. A new option, --disruptive, will bypass this. Improvements to Ceph service health checks and restart during the platform-services stage of sat bootsys boot. sat xname2nid Improvements sat xname2nid can now recursively expand slot, chassis, and cabinet XNames to a list of NIDs in those locations.\nA new --format option has been added to sat xname2nid. It sets the output format to either \u0026ldquo;range\u0026rdquo; (the default) or \u0026ldquo;NID\u0026rdquo;. The \u0026ldquo;range\u0026rdquo; format displays NIDs in a compressed range format suitable for use with a workload manager like Slurm.\nUsage of v2 HSM API The commands which interact with HSM (for example, sat status and sat hwinv) now use the v2 HSM API.\nsat diag Limited to HSN Switches sat diag will now only operate against HSN switches by default. These are the only controllers that support running diagnostics with HMJTD.\nsat showrev Enhancements A column has been added to the output of sat showrev that indicates whether a product version is \u0026ldquo;active\u0026rdquo;. The definition of \u0026ldquo;active\u0026rdquo; varies across products, and not all products may set an \u0026ldquo;active\u0026rdquo; version.\nFor SAT, the active version is the one with its hosted-type package repository in Nexus set as the member of the group-type package repository in Nexus, meaning that it will be used when installing the cray-sat-podman RPM.\ncray-sat Container Image Size Reduction The size of the cray-sat container image has been approximately cut in half by leveraging multi-stage builds. This also improved the repeatability of the unit tests by running them in the container.\nBug Fixes Minor bug fixes were made in cray-sat and in cray-sat-podman. For full change lists, refer to each repository\u0026rsquo;s CHANGELOG.md file.\nSummary of SAT Changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5.\nThis version of the SAT product included:\nVersion 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component:\nVersion 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release.\nInstall Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. The cray-sat-podman RPM is no longer installed in the management non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on all master management NCNs via an Ansible playbook which is referenced by a layer of the CFS configuration that applies to management NCNs. This CFS configuration is typically named ncn-personalization.\nThe SAT product now includes a Docker image and a Helm chart named sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart with Loftsman. This helm chart deploys a Kubernetes job that imports the SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. This repository is referenced by the layer added to the NCN personalization CFS configuration.\nRemoval of Direct Redfish Access All commands which used to access Redfish directly have either been removed or modified to use higher-level service APIs. This includes the following commands:\nsat sensors sat diag sat linkhealth The sat sensors command has been rewritten to use the SMA telemetry API to obtain the latest sensor values. The command\u0026rsquo;s usage has changed slightly, but legacy options work as before, so it is backwards compatible. Additionally, new commands have been added.\nThe sat diag command has been rewritten to use a new service called Fox, which is delivered with the CSM-Diags product. The sat diag command now launches diagnostics using the Fox service, which launches the corresponding diagnostic programs on controllers using the Hardware Management Job and Task Daemon (HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start diagnostics over Redfish.\nThe sat linkhealth command has been removed. Its functionality has been replaced by functionality from the Slingshot Topology Tool (STT) in the fabric manager pod.\nThe Redfish username and password command line options and config file options have been removed. For more information, see Remove Obsolete Configuration File Sections.\nAdditional Fields in sat setrev and sat showrev sat setrev now collects the following information from the admin, which is then displayed by sat showrev:\nSystem description Product number Company name Country code Additional guidance and validation has been added to each field collected by sat setrev. This sets the stage for sdu setup to stop collecting this information and instead collect it from sat showrev or its S3 bucket.\nImprovements to sat bootsys The platform-services stage of the sat bootsys boot command has been improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph health in the correct order. The ceph-check stage has been removed as it is no longer needed.\nThe platform-services stage of sat bootsys boot now prompts for confirmation of the storage NCN hostnames in addition to the Kubernetes masters and workers.\nBug Fixes and Security Fixes Improved error handling in sat firmware. Incremented version of Alpine Linux to 3.13.2 to address a security vulnerability. Other Notable Changes Ansible has been removed from the cray-sat container image. Support for the Firmware Update Service (FUS) has been removed from the sat firmware command. Summary of SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1.\nThis version of the SAT product included:\nVersion 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release.\nNew Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames:\nsat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API.\nBug Fixes Fixed a problem in sat swap where creating the offline port policy failed. Changed sat bootsys shutdown --stage bos-operations to no longer forcefully power off all compute nodes and application nodes using CAPMC when BOS sessions complete or time out. Fixed an issue with the command sat bootsys boot --stage cabinet-power. Summary of SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4.\nThis version of the SAT product included the following components:\nVersion 3.4.0 of the sat python package and CLI It also added the following new component:\nVersion 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release.\nSAT as an Independent Product SAT is now packaged and released as an independent product. The product deliverable is called a \u0026ldquo;release distribution\u0026rdquo;. The release distribution is a gzipped tar file containing an install script. This install script loads the cray/cray-sat container image into the Docker registry in Nexus and loads the cray-sat-podman RPM into a package repository in Nexus.\nIn this release, the cray-sat-podman package is still installed in the master and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in Shasta v1.5.\nSAT Running in a Container Under Podman The sat command now runs in a container under Podman. The sat executable is now installed on all nodes in the Kubernetes management cluster (workers and masters). This executable is a wrapper script that starts a SAT container in Podman and invokes the sat Python CLI within that container. The admin can run individual sat commands directly on the master or worker NCNs as before, or they can run sat commands inside the SAT container after using sat bash to enter an interactive shell inside the SAT container.\nTo view man pages for sat commands, the user can run sat-man SAT_COMMAND, replacing SAT_COMMAND with the name of the sat command. Alternatively, the user can enter the sat container with sat bash and use the man command.\nNew sat init Command and Config File Location Change The default location of the SAT config file has been changed from /etc/sat.toml to ~/.config/sat/sat.toml. A new command, sat init, has been added that initializes a configuration file in the new default directory. This better supports individual users on the system who want their own config files.\n~/.config/sat is mounted into the container that runs under Podman, so changes are persistent across invocations of the sat container. If desired, an alternate configuration directory can be specified with the SAT_CONFIG_DIR environment variable.\nAdditionally, if a config file does not yet exist when a user runs a sat command, one is generated automatically.\nAdditional Types Added to sat hwinv Additional functionality has been added to sat hwinv including:\nList node enclosure power supplies with the --list-node-enclosure-power-supplies option. List node accelerators (for example, GPUs) with the --list-node-accels option. The count of node accelerators is also included for each node. List node accelerator risers (for example, Redstone modules) with the --list-node-accel-risers option. The count of node accelerator risers is also included for each node. List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics option. The count of HSN NICs is also included for each node. Documentation for these new options has been added to the man page for sat hwinv.\nSite Information Stored by sat setrev in S3 The sat setrev and sat showrev commands now use S3 to store and obtain site information, including system name, site name, serial number, install date, and system type. Since the information is stored in S3, it will now be consistent regardless of the node on which sat is executed.\nAs a result of this change, S3 credentials must be configured for SAT. For more information, see Generate SAT S3 Credentials.\nProduct Version Information Shown by sat showrev sat showrev now shows product information from the cray-product-catalog ConfigMap in Kubernetes.\nAdditional Changes to sat showrev The output from sat showrev has also been changed in the following ways:\nThe --docker and --packages options were considered misleading and have been removed. Information pertaining to only to the local host, where the command is run, has been moved to the output of the --local option. Removal of sat cablecheck The sat cablecheck command has been removed. To verify that the system\u0026rsquo;s Slingshot network is cabled correctly, admins should now use the show cables command in the Slingshot Topology Tool (STT).\nsat swap Command Compatibility with Next-gen Fabric Controller The sat swap command was added in Shasta v1.3.2. This command used the Fabric Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the Fabric Controller API, so this command has been rewritten to use the new backwards-incompatible API. Usage of the command did not change.\nsat bootsys Functionality Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken by changes introduced in Shasta v1.4, which removed the Ansible inventory and playbooks.\nThe functionality in the platform-services stage of sat bootsys has been re-implemented to use python directly instead of Ansible. This resulted in a more robust procedure with better logging to the sat log file. Failures to stop containers on Kubernetes nodes are handled more gracefully, and more information about the containers that failed to stop, including how to debug the problem, is included.\nImprovements were made to console logging setup for non-compute nodes (NCNs) when they are shut down and booted.\nThe following improvements were made to the bos-operations stage of sat bootsys:\nMore information about the BOS sessions, BOA jobs, and BOA pods is printed. A command-line option, --bos-templates, and a corresponding config-file option, bos_templates, were added, and the --cle-bos-template and --uan-bos-template options and their corresponding config file options were deprecated. The following functionality has been removed from sat bootsys:\nThe hsn-bringup stage of sat bootsys boot has been removed due to removal of the underlying Ansible playbook. The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is now a manual procedure. Log File Location Change The location of the sat log file has changed from /var/log/cray/sat.log to /var/log/cray/sat/sat.log. This change simplifies mounting this file into the sat container running under Podman.\nSummary of SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.\nThe following sections detail the changes in this release.\nsat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable.\nThe sat swap switch command is equivalent to sat switch. The sat switch command will be removed in a future release.\nAddition of Stages to sat bootsys Command The sat bootsys command now has multiple stages for both the boot and shutdown actions. Please refer to the \u0026ldquo;System Power On Procedures\u0026rdquo; and \u0026ldquo;System Power Off Procedures\u0026rdquo; sections of the Cray Shasta Administration Guide (S-8001) for more details on using this command in the context of a full system power off and power on.\nSummary of SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI.\nThis version of the sat CLI contained the following commands:\nauth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch For more information on each of these commands, see the System Admin Toolkit Command Overview and the table of commands in the SAT Authentication section of this document.\n" +}, +{ + "uri": "/docs-sat/en-24/categories/", + "title": "Categories", + "tags": [], + "description": "", + "content": "" +}, +{ + "uri": "/docs-sat/en-24/tags/", + "title": "Tags", + "tags": [], + "description": "", + "content": "" +}] \ No newline at end of file diff --git a/en-24/index.xml b/en-24/index.xml new file mode 100644 index 0000000000..0a4f52069d --- /dev/null +++ b/en-24/index.xml @@ -0,0 +1,68 @@ + + + + HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + /docs-sat/en-24/ + Recent content in HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-24 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Installation + /docs-sat/en-24/install/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/install/ + SAT Installation Install the System Admin Toolkit Product Stream Describes how to install or upgrade the System Admin Toolkit (SAT) product stream. Prerequisites CSM is installed and verified. There must be at least 2 gigabytes of free space on the manager NCN on which the procedure is run. Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace x.y.z with the version of the SAT product stream being installed. + + + Change the BOS Version + /docs-sat/en-24/usage/change_bos_version/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/usage/change_bos_version/ + Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version one. You can select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command. You can also configure the BOS version to use in the SAT config file. Do this under the api_version setting in the bos section of the config file. + + + Introduction to SAT + /docs-sat/en-24/introduction/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/introduction/ + Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview. + + + SAT Grafana Dashboards + /docs-sat/en-24/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.cmn.&lt;site-domain&gt; The value of site-domain can be obtained as follows: ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=&#39;{.data.customizations\.yaml}&#39; | \ base64 -d | grep &#34;external:&#34; That command will produce the following output, for example: + + + SAT Upgrade with CNE Installer + /docs-sat/en-24/cne_install/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/cne_install/ + SAT Upgrade with CNE Installer Upgrade the System Admin Toolkit Product Stream Describes how to upgrade the System Admin Toolkit (SAT) product stream by using the Compute Node Environment (CNE) installer (cne-install). The CNE installer can be used only for upgrades and not for fresh installations. For installation instructions, see Install the System Admin Toolkit Product Stream. Upgrading SAT with cne-install is recommended because the process is both automated and logged to help you save time. + + + SAT Bootprep + /docs-sat/en-24/usage/sat_bootprep/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/usage/sat_bootprep/ + SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates based on a given input file which defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similarly to other SAT commands. ncn-m001# sat-man sat-bootprep SAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session. + + + SAT Kibana Dashboards + /docs-sat/en-24/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + SAT Release Notes + /docs-sat/en-24/release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/release_notes/ + SAT Release Notes Summary of Changes in SAT 2.4 The 2.4.13 version of the SAT product includes: Version 3.19.3 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.5.5 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. Because of installation refactoring efforts, the following two components are no longer delivered with SAT: sat-cfs-install container image sat-cfs-install Helm chart Inclusion of SAT in CSM A version of the cray-sat container image is now included in CSM. + + + diff --git a/en-24/install/index.html b/en-24/install/index.html new file mode 100644 index 0000000000..420af9130e --- /dev/null +++ b/en-24/install/index.html @@ -0,0 +1,1836 @@ + + + + + + + + + + + + SAT Installation :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Installation

+

Install the System Admin Toolkit Product Stream

+

Describes how to install or upgrade the System Admin Toolkit (SAT) product +stream.

+

Prerequisites

+
    +
  • CSM is installed and verified.
  • +
  • There must be at least 2 gigabytes of free space on the manager NCN on which the +procedure is run.
  • +
+

Notes on the Procedures

+
    +
  • +

    Ellipses (...) in shell output indicate omitted lines.

    +
  • +
  • +

    In the examples below, replace x.y.z with the version of the SAT product stream +being installed.

    +
  • +
  • +

    ‘manager’ and ‘master’ are used interchangeably in the steps below.

    +
  • +
  • +

    To upgrade SAT, execute the pre-installation, installation, and post-installation +procedures for a newer distribution. The newly installed version will become +the default.

    +

    In SAT 2.4, you can instead upgrade the product stream by using the +Compute Node Environment (CNE) installer. It is recommended that you upgrade +SAT with the CNE installer because the process is both automated and logged +to help you save time. For more information, see +SAT Upgrade with CNE Installer.

    +
  • +
+

Pre-Installation Procedure

+
    +
  1. +

    Start a typescript and set the shell prompt.

    +

    The typescript will record the commands and the output from this installation. +The prompt is set to include the date and time.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
+

Installation Procedure

+
    +
  1. +

    Copy the release distribution gzipped tar file to ncn-m001.

    +
  2. +
  3. +

    Unzip and extract the release distribution.

    +
    ncn-m001# tar -xvzf sat-x.y.z.tar.gz
    +
  4. +
  5. +

    Change directory to the extracted release distribution directory.

    +
    ncn-m001# cd sat-x.y.z
    +
  6. +
  7. +

    Run the installer: install.sh.

    +

    The script produces a lot of output. A successful install ends with “SAT +version x.y.z has been installed”, where x.y.z is the SAT product version.

    +
    ncn-m001# ./install.sh
    +====> Installing System Admin Toolkit version x.y.z
    +...
    +====> Waiting 300 seconds for sat-config-import-x.y.z to complete
    +...
    +====> SAT version x.y.z has been installed.
    +
  8. +
  9. +

    Optional: Stop the typescript.

    +

    NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT install (see Next Steps).

    +
    ncn-m001# exit
    +
  10. +
+

SAT version x.y.z is now installed/upgraded, meaning the SAT x.y.z release +has been loaded into the system software repository.

+
    +
  • SAT configuration content for this release has been uploaded to VCS.
  • +
  • SAT content for this release has been uploaded to the CSM product catalog.
  • +
  • SAT content for this release has been uploaded to Nexus repositories.
  • +
  • The sat command won’t be available until the NCN Personalization +procedure has been executed.
  • +
+

Next Steps

+

If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +(S-8000) to determine which step to +execute next.

+

If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the sections listed below.

+

NOTE: The procedures in Configure SAT are only required during the +first installation of SAT. However, the NCN Personalization procedure +is required both when installing and upgrading SAT.

+

If performing a fresh install, execute the Configure SAT procedures:

+ +

Execute the NCN Personalization procedure:

+ +

If performing an upgrade, execute the SAT Post-Upgrade procedures:

+ +

NOTE: The Set System Revision Information procedure is not required +after upgrading from SAT 2.1 or later.

+

Configure SAT

+

SAT Authentication

+

Initially, as part of the installation and configuration, SAT authentication is set up so SAT commands can be used in +later steps of the install process. The admin account used to authenticate with sat auth must be enabled in +Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see +Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation. +For additional information on SAT authentication, see System Security and Authentication in the CSM +documentation.

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Description of SAT Command Authentication Types

+

Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to +the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to +be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket, +the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be +done on every Kubernetes manager node where SAT commands are run.

+

Below is a table describing SAT commands and the types of authentication they require.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SAT SubcommandAuthentication/Credentials RequiredMan PageDescription
sat authResponsible for authenticating to the API gateway and storing a token.sat-authAuthenticate to the API gateway and save the token.
sat bmccredsRequires authentication to the API gateway.sat-bmccredsSet BMC passwords.
sat bootprepRequires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install.sat-bootprepPrepare to boot nodes with images and configurations.
sat bootsysRequires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages.sat-bootsysBoot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software.
sat diagRequires authentication to the API gateway.sat-diagLaunch diagnostics on the HSN switches and generate a report.
sat firmwareRequires authentication to the API gateway.sat-firmwareReport firmware version.
sat hwhistRequires authentication to the API gateway.sat-hwhistReport hardware component history.
sat hwinvRequires authentication to the API gateway.sat-hwinvGive a listing of the hardware of the HPE Cray EX system.
sat hwmatchRequires authentication to the API gateway.sat-hwmatchReport hardware mismatches.
sat initNonesat-initCreate a default SAT configuration file.
sat k8sRequires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install.sat-k8sReport on Kubernetes replica sets that have co-located (on the same node) replicas.
sat linkhealthThis command has been deprecated.
sat nid2xnameRequires authentication to the API gateway.sat-nid2xnameTranslate node IDs to node XNames.
sat sensorsRequires authentication to the API gateway.sat-sensorsReport current sensor data.
sat setrevRequires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-setrevSet HPE Cray EX system revision information.
sat showrevRequires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-showrevPrint revision information for the HPE Cray EX system.
sat slscheckRequires authentication to the API gateway.sat-slscheckPerform a cross-check between SLS and HSM.
sat statusRequires authentication to the API gateway.sat-statusReport node status across the HPE Cray EX system.
sat swapRequires authentication to the API gateway.sat-swapPrepare HSN switch or cable for replacement and bring HSN switch or cable into service.
sat xname2nidRequires authentication to the API gateway.sat-xname2nidTranslate node and node BMC XNames to node IDs.
sat switchThis command has been deprecated. It has been replaced by sat swap.
+

In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password +on the command line. The username value is obtained from the following locations, in order of higher precedence to lower +precedence:

+
    +
  • The --username global command-line option.
  • +
  • The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml.
  • +
  • The name of currently logged in user running the sat command.
  • +
+

If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to +~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above, +and will use the token for that username if it has been obtained and saved by sat auth.

+

Prerequisites

+ +

Procedure

+

The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:

+
    +
  1. +

    Generate a default SAT configuration file, if one does not exist.

    +
    ncn-m001# sat init
    +Configuration file "/root/.config/sat/sat.toml" generated.
    +

    Note: If the config file already exists, it will print out an error:

    +
    ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
    +Not generating configuration file.
    +
  2. +
  3. +

    Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file. For +example:

    +
    username = "crayadmin"
    +
  4. +
  5. +

    Run sat auth. Enter your password when prompted. For example:

    +
    ncn-m001# sat auth
    +Password for crayadmin:
    +Succeeded!
    +
  6. +
  7. +

    Other sat commands are now authenticated to make requests to the API gateway. For example:

    +
    ncn-m001# sat status
    +
  8. +
+

Generate SAT S3 Credentials

+

Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT +S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. +This must be done on every Kubernetes master node where SAT commands are run.

+

SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev +(see: Set System Revision Information).

+

NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Ensure the files are readable only by root.

    +
    ncn-m001# touch /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
    ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
  2. +
  3. +

    Write the credentials to local files using kubectl.

    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.access_key}' | base64 -d > \
    +    /root/.config/sat/s3_access_key
    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.secret_key}' | base64 -d > \
    +    /root/.config/sat/s3_secret_key
    +
  4. +
  5. +

    Verify the S3 endpoint specified in the SAT configuration file is correct.

    +
      +
    1. +

      Get the SAT configuration file’s endpoint value.

      +

      NOTE: If the command’s output is commented out, indicated by an initial # +character, the SAT configuration will take the default value – "https://rgw-vip.nmn".

      +
      ncn-m001# grep endpoint ~/.config/sat/sat.toml
      +# endpoint = "https://rgw-vip.nmn"
      +
    2. +
    3. +

      Get the sat-s3-credentials secret’s endpoint value.

      +
      ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
      +    jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
      +https://rgw-vip.nmn
      +
    4. +
    5. +

      Compare the two endpoint values.

      +

      If the values differ, change the SAT configuration file’s endpoint value to match the secret’s.

      +
    6. +
    +
  6. +
  7. +

    Copy SAT configurations to each manager node on the system.

    +
    ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
    +    mkdir -p /root/.config/sat; \
    +    scp -pr /root/.config/sat ${i}:/root/.config; done
    +

    NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may +be different. This example assumes three manager nodes, where the configuration files must be +copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is +ncn-m002 and ncn-m003.

    +
  8. +
+

Set System Revision Information

+

HPE service representatives use system revision information data to identify +systems in support cases.

+

Prerequisites

+ +

Notes on the Procedure

+
    +
  • This procedure is required after a fresh install of SAT.
  • +
  • After an upgrade of SAT, this procedure is not required if SAT was upgraded +from 2.1 (Shasta v1.5) or later. It is required if SAT was upgraded from +2.0 (Shasta v1.4) or earlier.
  • +
+

Procedure

+
    +
  1. +

    Set System Revision Information.

    +

    Run sat setrev and follow the prompts to set the following site-specific values:

    +
      +
    • Serial number
    • +
    • System name
    • +
    • System type
    • +
    • System description
    • +
    • Product number
    • +
    • Company name
    • +
    • Site name
    • +
    • Country code
    • +
    • System install date
    • +
    +

    TIP: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.

    +
    ncn-m001# sat setrev
    +--------------------------------------------------------------------------------
    +Setting:        Serial number
    +Purpose:        System identification. This will affect how snapshots are
    +                identified in the HPE backend services.
    +Description:    This is the top-level serial number which uniquely identifies
    +                the system. It can be requested from an HPE representative.
    +Valid values:   Alpha-numeric string, 4 - 20 characters.
    +Type:           <class 'str'>
    +Default:        None
    +Current value:  None
    +--------------------------------------------------------------------------------
    +Please do one of the following to set the value of the above setting:
    +    - Input a new value
    +    - Press CTRL-C to exit
    +...
    +
  2. +
  3. +

    Verify System Revision Information.

    +

    Run sat showrev and verify the output shown in the “System Revision Information table.”

    +

    The following example shows sample table output.

    +
    ncn-m001# sat showrev
    +################################################################################
    +System Revision Information
    +################################################################################
    ++---------------------+---------------+
    +| component           | data          |
    ++---------------------+---------------+
    +| Company name        | HPE           |
    +| Country code        | US            |
    +| Interconnect        | Sling         |
    +| Product number      | R4K98A        |
    +| Serial number       | 12345         |
    +| Site name           | HPE           |
    +| Slurm version       | slurm 20.02.5 |
    +| System description  | Test System   |
    +| System install date | 2021-01-29    |
    +| System name         | eniac         |
    +| System type         | EX-1C         |
    ++---------------------+---------------+
    +################################################################################
    +Product Revision Information
    +################################################################################
    ++--------------+-----------------+------------------------------+------------------------------+
    +| product_name | product_version | images                       | image_recipes                |
    ++--------------+-----------------+------------------------------+------------------------------+
    +| csm          | 0.8.14          | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
    +| sat          | 2.0.1           | -                            | -                            |
    +| sdu          | 1.0.8           | -                            | -                            |
    +| slingshot    | 0.8.0           | -                            | -                            |
    +| sma          | 1.4.12          | -                            | -                            |
    ++--------------+-----------------+------------------------------+------------------------------+
    +################################################################################
    +Local Host Operating System
    +################################################################################
    ++-----------+----------------------+
    +| component | version              |
    ++-----------+----------------------+
    +| Kernel    | 5.3.18-24.15-default |
    +| SLES      | SLES 15-SP2          |
    ++-----------+----------------------+
    +
  4. +
+

Perform NCN Personalization

+

A new CFS configuration layer must be added to the CFS configuration used on +management NCNs. It is required following SAT installation and configuration. +This procedure describes how to add that layer.

+

Prerequisites

+ +

Notes on the Procedure

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace x.y.z with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
  • If upgrading SAT, the existing configuration will likely include other Cray EX product +entries. Update the SAT entry as described in this procedure. The HPE Cray EX System +Software Getting Started Guide (S-8000) +provides guidance on how and when to update the entries for the other products.
  • +
+

Pre-NCN-Personalization Procedure

+
    +
  1. +

    Start a typescript if not already using one, and set the shell prompt.

    +

    The typescript will record the commands and the output from this installation. +The prompt is set to include the date and time.

    +
    ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
    +ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
    +
  2. +
+

Procedure to Update CFS Configuration

+

The SAT release distribution includes a script, update-mgmt-ncn-cfs-config.sh, +that updates a CFS configuration to include the SAT layer required to +install and configure SAT on the management NCNs.

+

The script supports modifying a named CFS configuration in CFS, a CFS +configuration defined in a JSON file, or the CFS configuration +currently applied to particular components in CFS.

+

The script also includes options for specifying:

+
    +
  • how the modified CFS configuration should be saved.
  • +
  • the git commit hash or branch specified in the SAT layer.
  • +
+

This procedure is split into three alternatives, which cover common use cases:

+ +

If none of these alternatives fit your use case, see Advanced Options for +Updating CFS Configurations.

+

Update Active CFS Configuration

+

Use this alternative if there is already a CFS configuration assigned to the +management NCNs and you would like to update it in place for the new version of +SAT.

+
    +
  1. +

    Run the script with the following options:

    +
    ncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-query role=Management,type=Node --save
    +
  2. +
  3. +

    Examine the output to ensure the CFS configuration was updated.

    +

    For example, if there is a single CFS configuration that applies to NCNs, and if +that configuration does not have a layer yet for any version of SAT, the +output will look like this:

    +
    ====> Updating CFS configuration(s)
    +INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, ..., x3000c0s9b0n0
    +INFO: Found configuration "ncn-personalization" for component x3000c0s1b0n0
    +...
    +INFO: Found configuration "ncn-personalization" for component x3000c0s9b0n0
    +...
    +INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found.
    +INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end.
    +INFO: Successfully saved CFS configuration "ncn-personalization"
    +INFO: Successfully saved 1 changed CFS configurations.
    +====> Completed CFS configuration(s)
    +====> Cleaning up install dependencies
    +

    Alternatively, if the CFS configuration already contains a layer for +SAT that just needs to be updated, the output will look like this:

    +
    ====> Updating CFS configuration(s)
    +INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, ..., x3000c0s9b0n0
    +INFO: Found configuration "ncn-personalization" for component x3000c0s1b0n0
    +...
    +INFO: Found configuration "ncn-personalization" for component x3000c0s9b0n0
    +...
    +INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml
    +INFO: Property "commit" of layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from 01ae28c92b9b4740e9e0e01ae01216c6c2d89a65 to bcbd6db0803cc4137c7558df9546b0faab303cbd
    +INFO: Property "name" of layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-2.2.16 to sat-sat-ncn-bcbd6db-20220608T170152
    +INFO: Successfully saved CFS configuration "ncn-personalization"
    +INFO: Successfully saved 1 changed CFS configurations.
    +====> Completed CFS configuration(s)
    +====> Cleaning up install dependencies
    +
  4. +
+

Update CFS Configuration in a JSON File

+

Use this alternative if you are constructing a new CFS configuration for +management NCNs in a JSON file.

+
    +
  1. +

    Run the script with the following options, where JSON_FILE is an +environment variable set to the path of the JSON file to modify:

    +
    ncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-file $JSON_FILE --save
    +
  2. +
  3. +

    Examine the output to ensure the JSON file was updated.

    +

    For example, if the configuration defined in the JSON file does not have a layer yet for any +version of SAT, the output will look like this:

    +
    ====> Updating CFS configuration(s)
    +INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found.
    +INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end.
    +INFO: Successfully saved 1 changed CFS configurations.
    +====> Completed CFS configuration(s)
    +====> Cleaning up install dependencies
    +
  4. +
+

Update Existing CFS Configuration by Name

+

Use this alternative if you are updating a specific named CFS configuration. +This may be the case if you are constructing a new CFS configuration during an +install or upgrade of multiple products.

+
    +
  1. +

    Run the script with the following options, where CFS_CONFIG_NAME is an +environment variable set to the name of the CFS configuration to update.

    +
    ncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-config $CFS_CONFIG_NAME --save
    +
  2. +
  3. +

    Examine the output to ensure the CFS configuration was updated.

    +

    For example, if the CFS configuration does not have a layer yet for any version of SAT, +the output will look like this:

    +
    ====> Updating CFS configuration(s)
    +INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found.
    +INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end.
    +INFO: Successfully saved CFS configuration "CFS_CONFIG_NAME"
    +INFO: Successfully saved 1 changed CFS configurations.
    +====> Completed CFS configuration(s)
    +====> Cleaning up install dependencies
    +
  4. +
+

Advanced Options for Updating CFS Configurations

+

If none of the alternatives described in the previous sections apply, view the +full description of the options accepted by the update-mgmt-ncn-cfs-config.sh +script by invoking it with the --help option.

+
ncn-m001# ./update-mgmt-ncn-cfs-config.sh --help
+

Procedure to Apply CFS Configuration

+

After the CFS configuration that applies to management NCNs has been updated as +described in the Procedure to Update CFS Configuration, +execute the following steps to ensure the modified CFS configuration is re-applied to the management NCNs.

+
    +
  1. +

    Set an environment variable that refers to the name of the CFS configuration +to be applied to the management NCNs.

    +
    ncn-m001# export CFS_CONFIG_NAME="ncn-personalization"
    +

    Note: If the Update Active CFS Configuration +section was followed above, the name of the updated CFS configuration will +have been logged in the following format. If multiple CFS configurations +were modified, any one of them can be used in this procedure.

    +
    INFO: Successfully saved CFS configuration "ncn-personalization"
    +
  2. +
  3. +

    Obtain the name of the CFS configuration layer for SAT and save it in an +environment variable:

    +
    ncn-m001# export SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \
    +    | jq -r '.layers | map(select(.cloneUrl | contains("sat-config-management.git")))[0].name')
    +
  4. +
  5. +

    Create a CFS session that executes only the SAT layer of the given CFS +configuration.

    +

    The --configuration-limit option limits the configuration session to run +only the SAT layer of the configuration.

    +
    ncn-m001# cray cfs sessions create --name "sat-session-${CFS_CONFIG_NAME}" --configuration-name \
    +    "${CFS_CONFIG_NAME}" --configuration-limit "${SAT_LAYER_NAME}"
    +
  6. +
  7. +

    Monitor the progress of the CFS session.

    +

    Set an environment variable to name of the Ansible container within the pod +for the CFS session:

    +
    ncn-m001# export ANSIBLE_CONTAINER=$(kubectl get pod -n services \
    +    --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \
    +    -o json | jq -r '.items[0].spec.containers | map(select(.name | contains("ansible"))) | .[0].name')
    +

    Next, get the logs for the Ansible container.

    +
    ncn-m001# kubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \
    +    --selector=cfsession=sat-session-${CFS_CONFIG_NAME}
    +

    Ansible plays, which are run by the CFS session, will install SAT on all the +master management NCNs on the system. A summary of results can be found at +the end of the log output. The following example shows a successful session.

    +
    ...
    +PLAY RECAP *********************************************************************
    +x3000c0s1b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s3b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s5b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +

    NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.

    +
  8. +
  9. +

    Verify that SAT was successfully configured.

    +

    If sat is configured, the --version command will indicate which version +is installed. If sat is not properly configured, the command will fail.

    +

    NOTE: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the sat Python package, +which is different from the version number of the overall SAT release distribution.

    +
    ncn-m001# sat --version
    +sat 3.7.0
    +

    NOTE: Upon first running sat, you may see additional output while the sat +container image is downloaded. This will occur the first time sat is run on +each manager NCN. For example, if you run sat for the first time on ncn-m001 +and then for the first time on ncn-m002, you will see this additional output +both times.

    +
    Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
    +Getting image source signatures
    +Copying blob da64e8df3afc done
    +Copying blob 0f36fd81d583 done
    +Copying blob 12527cf455ba done
    +...
    +sat 3.7.0
    +
  10. +
  11. +

    Stop the typescript.

    +
    ncn-m001# exit
    +
  12. +
+

SAT version x.y.z is now installed and configured:

+
    +
  • The SAT RPM package is installed on the associated NCNs.
  • +
+

Note on Procedure to Apply CFS Configuration

+

The previous procedure is not always necessary because the CFS Batcher service +automatically detects configuration changes and will automatically create new +sessions to apply configuration changes according to certain rules. For more +information on these rules, refer to Configuration Management with +the CFS Batcher in the Cray System Management Documentation.

+

The main scenario in which the CFS batcher will not automatically re-apply the +SAT layer is when the commit hash of the sat-config-management git repository +has not changed between SAT versions. The previous procedure ensures the +configuration is re-applied in all cases, and it is harmless if the batcher has +already applied an updated configuration.

+

Next Steps

+

At this point, the release distribution files can be removed from the system as +described in Post-Installation Cleanup Procedure.

+

If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +(S-8000) to determine which step +to execute next.

+

If no other HPE Cray EX software products are being installed at this time, +the installation process is complete. If no other HPE Cray EX software products +are being upgraded at this time, proceed to the remaining SAT Post-Upgrade +procedures:

+ +

NOTE: The Set System Revision Information procedure is not required after upgrading from SAT 2.1 or later.

+

Post-Installation Cleanup Procedure

+
    +
  1. +

    Optional: Remove the SAT release distribution tar file and extracted directory.

    +
    ncn-m001# rm sat-x.y.z.tar.gz
    +ncn-m001# rm -rf sat-x.y.z/
    +
  2. +
+

SAT Post-Upgrade

+

Remove Obsolete Configuration File Sections

+

Prerequisites

+ +

Procedure

+

After upgrading SAT, if using the configuration file from a previous version, there may be +configuration file sections no longer used in the new version. For example, when upgrading +from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. +In that case, the following warning may appear upon running sat commands.

+
WARNING: Ignoring unknown section 'redfish' in config file.
+

Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.

+
[redfish]
+username = "admin"
+password = "adminpass"
+

Repeat this process for any configuration file sections for which there are “unknown section” warnings.

+

SAT Logging

+

As of SAT version 2.2, some command output that was previously printed to stdout +is now logged to stderr. These messages are logged at the INFO level. The +default logging threshold was changed from WARNING to INFO to accommodate +this logging change. Additionally, some messages previously logged at the INFO +are now logged at the DEBUG level.

+

These changes take effect automatically. However, if the default output threshold +has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure +that important output is shown in the terminal.

+

Update Configuration

+

In the following example, the stderr log level, logging.stderr_level, is set to +WARNING, which will exclude INFO-level logging from terminal output.

+
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+

To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.

+

If logging.stderr_level is commented out, its value will not affect logging +behavior. However, it may be helpful set its value to INFO as a reminder of +the new default behavior.

+

Affected Commands

+

The following commands trigger messages that have been changed from stdout +print calls to INFO-level (or WARNING- or ERROR-level) log messages:

+
    +
  • sat bootsys --stage shutdown --stage session-checks
  • +
  • sat sensors
  • +
+

The following commands trigger messages that have been changed from INFO-level +log messages to DEBUG-level log messages:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
  • sat swap
  • +
+

SAT Uninstall and Downgrade

+

Uninstall: Removing a Version of SAT

+

This procedure can be used to uninstall a version of SAT.

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be uninstalled with prodmgr.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    Use sat showrev to list versions of SAT.

    +

    NOTE: It is not recommended to uninstall a version designated as “active”. +If the active version is uninstalled, then the activate procedure must be executed +on a remaining version.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +| product_name | product_version | active | images            | image_recipes         |
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +| sat          | 2.3.3           | True   | -                 | -                     |
    +| sat          | 2.2.10          | False  | -                 | -                     |
    ++--------------+-----------------+--------+-------------------+-----------------------+
    +
  2. +
  3. +

    Use prodmgr to uninstall a version of SAT.

    +

    This command will do three things:

    +
      +
    • Remove all hosted-type package repositories associated with the given version of SAT. Group-type +repositories are not removed.
    • +
    • Remove all container images associated with the given version of SAT.
    • +
    • Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up +in the output of sat showrev.
    • +
    +
    ncn-m001# prodmgr uninstall sat 2.2.10
    +Repository sat-2.2.10-sle-15sp2 has been removed.
    +Removed Docker image cray/cray-sat:3.9.0
    +Removed Docker image cray/sat-cfs-install:1.0.2
    +Removed Docker image cray/sat-install-utility:1.4.0
    +Deleted sat-2.2.10 from product catalog.
    +
  4. +
+

Activate: Switching Between Versions

+

This procedure can be used to downgrade the active version of SAT.

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be activated. Older versions must be activated manually.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    Use sat showrev to list versions of SAT.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| product_name | product_version | active | images             | image_recipes         |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| sat          | 2.3.3           | True   | -                  | -                     |
    +| sat          | 2.2.10          | False  | -                  | -                     |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +
  2. +
  3. +

    Use prodmgr to activate a different version of SAT.

    +

    This command will do three things:

    +
      +
    • For all hosted-type package repositories associated with this version of SAT, set them as the sole member +of their corresponding group-type repository. For example, activating SAT version 2.2.10 +sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group.
    • +
    • Set the version 2.2.10 as active within the product catalog, so that it appears active in the output of +sat showrev.
    • +
    • Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are +associated with NCNs with the role “Management” and subrole “Master” (for example, the CFS configuration +ncn-personalization). Specifically, it will ensure that the layer refers to the version of SAT CFS +configuration content associated with the version of SAT being activated.
    • +
    +
    ncn-m001# prodmgr activate sat 2.2.10
    +Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2.
    +Set sat-2.2.10 as active in product catalog.
    +Updated CFS configurations: [ncn-personalization]
    +
  4. +
  5. +

    Verify that the chosen version is marked as active.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| product_name | product_version | active | images             | image_recipes         |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +| sat          | 2.3.3           | False  | -                  | -                     |
    +| sat          | 2.2.10          | True   | -                  | -                     |
    ++--------------+-----------------+--------+--------------------+-----------------------+
    +
  6. +
  7. +

    Apply the modified CFS configuration to the management NCNs.

    +

    At this point, Nexus package repositories have been modified to set a +particular package repository as active, but the SAT package may not have +been updated on management NCNs.

    +

    To ensure that management NCNs have been updated to use the active SAT +version, follow the Procedure to Apply CFS Configuration. +Refer to the output from the prodmgr activate command to find the name of +the modified CFS configuration. If more than one CFS configuration was +modified, use the first one.

    +
  8. +
+

Optional: Installing and Configuring SAT on an External System

+

SAT can optionally be installed and configured on an external system to interact with CSM over the CAN.

+

Limitations

+

Most SAT subcommands work by accessing APIs which are reachable via the CAN. However, certain SAT commands depend on +host-based functionality on the management NCNs and will not work from an external system. This includes the following:

+
    +
  • The platform-services and ncn-power stages of sat bootsys
  • +
  • The local host information displayed by the --local option of sat showrev
  • +
+

Installing SAT on an external system is not an officially supported configuration. These instructions are provided +“as-is” with the hope that they can useful for users who desire additional flexibility.

+

Certain additional steps may need to be taken to install and configure SAT depending on the configuration of the +external system in use. These additional steps may include provisioning virtual machines, installing packages, or +configuring TLS certificates, and these steps are outside the scope of this documentation. This section covers only the +steps needed to configure SAT to use externally-accessible API endpoints exposed by CSM.

+

Prerequisites

+
    +
  • The external system must be on the Customer Access Network (CAN).
  • +
  • Python 3.7 or newer is installed on the system.
  • +
  • kubectl, openssh, git, and curl are installed on the external system.
  • +
  • The root CA certificates used when installing CSM have been added to the external system’s trust store such that +authenticated TLS connections can be made to the CSM REST API gateway. For more information, refer to Certificate +Authority in the Cray System Management Documentation.
  • +
+

Procedure

+
    +
  1. +

    Create a Python virtual environment.

    +
    $ SAT_VENV_PATH="$(pwd)/venv"
    +$ python3 -m venv ${SAT_VENV_PATH}
    +$ . ${SAT_VENV_PATH}/bin/activate
    +
  2. +
  3. +

    Clone the SAT source code.

    +

    Note: To use SAT version 3.19, this example clones the release/3.19 branch of +Cray-HPE/sat. However, for better clarity, these instructions include steps that apply only to +versions newer than 3.19. Specifically, the instructions include references to the +csm-api-client package, which was not a dependency of SAT in version 3.19.

    +
    (venv) $ git clone --branch=release/3.19 https://github.com/Cray-HPE/sat.git
    +
  4. +
  5. +

    Set up the SAT CSM Python dependencies to be installed from their source code.

    +

    SAT CSM Python dependency packages are not currently distributed publicly as +source packages or binary distributions. They must be installed from +their source code hosted on GitHub. Also, to install the cray-product-catalog +Python package, you must first clone it locally. Use the following steps to +modify the SAT CSM Python dependencies so they can be installed from their source code.

    +
      +
    1. +

      Clone the source code for cray-product-catalog.

      +
      (venv) $ git clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog
      +
    2. +
    3. +

      In the cray-product-catalog directory, create a file named .version +that contains the version of cray-product-catalog.

      +
      (venv) $ echo 1.6.0 > cray-product-catalog/.version
      +
    4. +
    5. +

      Open the “locked” requirements file in a text editor.

      +
      (venv) $ vim sat/requirements.lock.txt
      +
    6. +
    7. +

      Update the line containing cray-product-catalog so that it reflects the local path +to cray-product-catalog.

      +

      It should read as follows:

      +
      ./cray-product-catalog
      +
    8. +
    9. +

      For versions of SAT newer than 3.19, change the line containing csm-api-client to +read as follows:

      +
      csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
      +
    10. +
    11. +

      (Optional) Confirm that requirements.lock.txt is modified as expected.

      +

      Note: For versions newer than 3.19, you will see both cray-product-catalog and csm-api-client. +For version 3.19 and older, you will only see cray-product-catalog.

      +
      (venv) $ grep -E 'cray-product-catalog|csm-api-client' sat/requirements.lock.txt
      +./cray-product-catalog
      +csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
      +
    12. +
    +
  6. +
  7. +

    Install the modified SAT dependencies.

    +
    (venv) $ pip install -r sat/requirements.lock.txt
    +...
    +
  8. +
  9. +

    Install the SAT Python package.

    +
    (venv) $ pip install ./sat
    +...
    +
  10. +
  11. +

    Optional: Add the sat virtual environment to the user’s PATH environment variable.

    +

    If a shell other than bash is in use, replace ~/.bash_profile with the appropriate profile path.

    +

    If the virtual environment is not added to the user’s PATH environment variable, then +source ${SAT_VENV_PATH}/bin/activate will need to be run before running any SAT commands.

    +
    (venv) $ deactivate
    +$ echo export PATH=\"${SAT_VENV_PATH}/bin:${PATH}\" >> ~/.bash_profile
    +$ source ~/.bash_profile
    +
  12. +
  13. +

    Copy the file /etc/kubernetes/admin.conf from ncn-m001 to ~/.kube/config on the external system.

    +

    Note that this file contains credentials to authenticate against the Kubernetes API as the administrative user, so +it should be treated as sensitive.

    +
    $ mkdir -p ~/.kube
    +$ scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config
    +admin.conf                                       100% 5566   3.0MB/s   00:00
    +
  14. +
  15. +

    Add a new entry for the hostname kubernetes to the external system’s /etc/hosts file.

    +

    The kubernetes hostname should correspond to the CAN IP address on ncn-m001. On CSM 1.2, this can be determined +by querying the IP address of the bond0.cmn0 interface.

    +
    $ ssh ncn-m001 ip addr show bond0.cmn0
    +13: bond0.cmn0@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    +link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
    +inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007
    +   valid_lft forever preferred_lft forever
    +inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
    +   valid_lft forever preferred_lft forever
    +$ IP_ADDRESS=10.102.1.11
    +

    On CSM versions prior to 1.2, the CAN IP can be determined by querying the IP address of the vlan007 interface.

    +
    $ ssh ncn-m001 ip addr show vlan007
    +13: vlan007@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    +link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
    +inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007
    +   valid_lft forever preferred_lft forever
    +inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
    +   valid_lft forever preferred_lft forever
    +$ IP_ADDRESS=10.102.1.10
    +

    Once the IP address is determined, add an entry to /etc/hosts mapping the IP address to the hostname kubernetes.

    +
    $ echo "${IP_ADDRESS} kubernetes" | sudo tee -a /etc/hosts
    +10.102.1.11 kubernetes
    +
  16. +
  17. +

    Modify ~/.kube/config to set the cluster server address.

    +

    The value of the server key for the kubernetes cluster under the clusters section should be set to +https://kubernetes:6443.

    +
    ---
    +clusters:
    +- cluster:
    +    certificate-authority-data: REDACTED
    +    server: https://kubernetes:6443
    +  name: kubernetes
    +...
    +
  18. +
  19. +

    Confirm that kubectl can access the CSM Kubernetes cluster.

    +
    $ kubectl get nodes
    +NAME       STATUS   ROLES    AGE    VERSION
    +ncn-m001   Ready    master   135d   v1.19.9
    +ncn-m002   Ready    master   136d   v1.19.9
    +ncn-m003   Ready    master   136d   v1.19.9
    +ncn-w001   Ready    <none>   136d   v1.19.9
    +ncn-w002   Ready    <none>   136d   v1.19.9
    +ncn-w003   Ready    <none>   136d   v1.19.9
    +
  20. +
  21. +

    Use sat init to create a configuration file for SAT.

    +
    $ sat init
    +INFO: Configuration file "/home/user/.config/sat/sat.toml" generated.
    +
  22. +
  23. +

    Copy the platform CA certificates from the management NCN and configure the certificates for use with SAT.

    +

    If a shell other than bash is in use, replace ~/.bash_profile with the appropriate profile path.

    +
    $ scp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt .
    +$ echo export REQUESTS_CA_BUNDLE=\"$(realpath platform-ca-certs.crt)\" >> ~/.bash_profile
    +$ source ~/.bash_profile
    +
  24. +
  25. +

    Edit the SAT configuration file to set the API and S3 hostnames.

    +

    Externally available API endpoints are given domain names in PowerDNS, so the endpoints in the configuration file +should each be set to subdomain.system-name.site-domain, where system-name and site-domain are replaced with +the values specified during csi config init, and subdomain is the DNS name for the externally available service. +For more information, refer to Externally Exposed Services in the Cray System Management Documentation.

    +

    The API gateway has the subdomain api, and S3 has the subdomain s3. The S3 endpoint runs on port 8080. The +following options should be set in the SAT configuration file:

    +
    [api_gateway]
    +host = "api.system-name.site-domain"
    +
    +[s3]
    +endpoint = "http://s3.system-name.site-domain:8080"
    +
  26. +
  27. +

    Edit the SAT configuration file to specify the Keycloak user which will be accessing the REST API.

    +
    [api_gateway]
    +username = "user"
    +
  28. +
  29. +

    Authenticate against the API gateway with sat auth.

    +

    For more information, see SAT Authentication.

    +
  30. +
  31. +

    Generate S3 credentials.

    +

    For more information, see Generate SAT S3 Credentials.

    +
  32. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/introduction/index.html b/en-24/introduction/index.html new file mode 100644 index 0000000000..c8ce9b3e91 --- /dev/null +++ b/en-24/introduction/index.html @@ -0,0 +1,991 @@ + + + + + + + + + + + + Introduction to SAT :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Introduction to SAT

+

About System Admin Toolkit (SAT)

+

The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.

+

SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.

+

Six Kibana Dashboards are included with SAT. They provide organized output for system health information.

+ +

Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.

+ +

In CSM 1.3 and newer, the sat command is automatically available on all the +Kubernetes NCNs. For more information, see SAT in CSM. Older +versions of CSM do not have the sat command automatically available, and SAT +must be installed as a separate product.

+

System Admin Toolkit Command Overview

+

Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.

+

SAT Command Line Utility

+

The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes +(ncn-m nodes).

+

It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.

+

SAT Commands

+

The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.

+

SAT Container Environment

+

The sat command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on +Kubernetes manager nodes. A few important points about the SAT container environment include the following:

+
    +
  • Using either sat or sat bash always launches a container.
  • +
  • The SAT container does not have access to the NCN file system.
  • +
+

There are two ways to run sat.

+
    +
  • Interactive: Launching a container using sat bash, followed by a sat command.
  • +
  • Non-interactive: Running a sat command directly on a Kubernetes manager node.
  • +
+

In both of these cases, a container is launched in the background to execute the command. The first option, running +sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the +container is launched, executes the command, and upon the command’s completion the container exits. The following two +examples show the same action, checking the system status, using interactive and non-interactive modes.

+

Interactive

+
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+

Non-interactive

+
ncn-m001# sat status
+

Interactive Advantages

+

Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral +container storage. If multiple sat commands are being run in succession, then use sat bash to launch the +container beforehand. This will save time because the container does not need to be launched for each sat command.

+

Non-interactive Advantages

+

The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of +several steps that need to be executed from a management NCN.

+

Man Pages - Interactive and Non-interactive Modes

+

To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following +example.

+
ncn-m001# sat-man status
+

A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed +either with man sat or man sat-podman from the manager node.

+
ncn-m001# man sat
+
ncn-m001# man sat-podman
+

Command Prompt Conventions in SAT

+

The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.

+
    +
  • The root or super-user account always has the # character at the end of the prompt and has the host name of the +host in the prompt.
  • +
  • Any non-root account is indicated with account@hostname>. A user account that is neither root nor crayadm is +referred to as user.
  • +
  • The command prompt inside the SAT container environment is indicated with the string as follows. It also has the “#” +character at the end of the prompt.
  • +
+ + + + + + + + + + + + + + + + + +
Command PromptMeaning
ncn-m001#Run on one of the Kubernetes Manager servers. (Non-interactive)
(CONTAINER_ID) sat-container#Run the command inside the SAT container environment by first running sat bash. (Interactive)
+

Examples of the sat status command used by an administrator:

+
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+

SAT in CSM

+

In CSM 1.3 and newer, the sat command is automatically available on all the Kubernetes NCNs, but it is still possible +to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the +sat command available in CSM. Installing the SAT product stream allows additional supporting components to be added:

+
    +
  • +

    An entry for SAT in the cray-product-catalog Kubernetes ConfigMap is only created by installing the SAT product +stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev.

    +
  • +
  • +

    The sat-install-utility container image is only available with the full SAT product stream. This container image +provides uninstall and activate functionality when used with the prodmgr command. (In SAT 2.3 and older, SAT was +only available to install as a separate product stream. Because these versions were packaged with +sat-install-utility, it is still possible to uninstall these versions of SAT.)

    +
  • +
  • +

    The docs-sat RPM package is only available with the full SAT product stream.

    +
  • +
  • +

    The sat-config-management git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is +only available with the full SAT product stream.

    +
  • +
+

If the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS +configurations that apply to NCNs (for example, ncn-personalization) should not include a SAT layer.

+

The SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the +Keycloak username that authenticates to the API gateway cannot be read by users other than root. Specifically, it +it does the following:

+
    +
  • +

    Modifies the sat.toml configuration file which contains the username so that it is only readable by root.

    +
  • +
  • +

    Modifies the /root/.config/sat/tokens directory so that the directory is only readable by root. This is needed +because the names of the files within the tokens directory contain the username.

    +
  • +
+

Regardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other +users. These permission changes only apply to files created by previous installations of SAT. In the current version of +SAT all files and directories are created with the appropriate permissions.

+

SAT Dependencies

+

Most sat subcommands depend on services or components from other products in the +HPE Cray EX (Shasta) software stack. The following list shows these dependencies +for each subcommand. Each service or component is listed under the product it belongs to.

+

sat auth

+

CSM

+
    +
  • Keycloak
  • +
+

sat bmccreds

+

CSM

+
    +
  • System Configuration Service (SCSD)
  • +
+

sat bootprep

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Configuration Framework Service (CFS)
  • +
  • Image Management Service (IMS)
  • +
  • Version Control Service (VCS)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat bootsys

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Cray Advanced Platform Monitoring and Control (CAPMC)
  • +
  • Ceph
  • +
  • Compute Rolling Upgrade Service (CRUS)
  • +
  • Etcd
  • +
  • Firmware Action Service (FAS)
  • +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

COS

+
    +
  • Node Memory Dump (NMD)
  • +
+

sat diag

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

CSM-Diags

+
    +
  • Fox
  • +
+

sat firmware

+

CSM

+
    +
  • Firmware Action Service (FAS)
  • +
+

sat hwhist

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwinv

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwmatch

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat init

+

None

+

sat k8s

+

CSM

+
    +
  • Kubernetes
  • +
+

sat nid2xname

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat sensors

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • HM Collector
  • +
+

SMA

+
    +
  • Telemetry API
  • +
+

sat setrev

+

CSM

+
    +
  • S3
  • +
+

sat showrev

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat slscheck

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat status

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat swap

+

Slingshot

+
    +
  • Fabric Manager
  • +
+

sat switch

+

Deprecated: See sat swap

+

sat xname2nid

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/release_notes/index.html b/en-24/release_notes/index.html new file mode 100644 index 0000000000..ec90a2abec --- /dev/null +++ b/en-24/release_notes/index.html @@ -0,0 +1,1420 @@ + + + + + + + + + + + + SAT Release Notes :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Release Notes

+

Summary of Changes in SAT 2.4

+

The 2.4.13 version of the SAT product includes:

+
    +
  • Version 3.19.3 of the sat python package and CLI.
  • +
  • Version 2.0.0-1 of the sat-podman wrapper script.
  • +
  • Version 1.5.5 of the sat-install-utility container image.
  • +
  • Version 3.3.1 of the cfs-config-util container image.
  • +
+

Because of installation refactoring efforts, the following two components +are no longer delivered with SAT:

+
    +
  • sat-cfs-install container image
  • +
  • sat-cfs-install Helm chart
  • +
+

Inclusion of SAT in CSM

+

A version of the cray-sat container image is now included in CSM. For more +information, see SAT in CSM.

+

SAT Installation Improvements

+

The SAT install.sh script no longer uses a sat-cfs-install Helm chart and +container image to upload its Ansible content to the sat-config-management +repository in VCS. Instead, it uses Podman to run the cf-gitea-import container +directly. Some of the benefits of this change include the following:

+ +

Decoupling of cray-sat Container Image and cray-sat-podman Package

+

In older SAT releases, the sat wrapper script that was provided by the +cray-sat-podman package installed on Kubernetes master NCNs included a +hard-coded version of the cray-sat container image. As a result, every new +version of the cray-sat image required a corresponding new version of the +cray-sat-podman package.

+

In this release, this tight coupling of the cray-sat-podman package and the +cray-sat container image was removed. The sat wrapper script provided +by the cray-sat-podman package now looks for the version of the cray-sat +container image in the /opt/cray/etc/sat/version file. This file is populated +with the correct version of the cray-sat container image by the SAT layer of +the CFS configuration that is applied to management NCNs. If the version file +does not exist, the wrapper script defaults to the version of the cray-sat +container image delivered with the latest version of CSM installed on the system.

+

Improved NCN Personalization Automation

+

The steps for performing NCN personalization as part of the SAT installation +were moved out of the install.sh script and into a new +update-mgmt-ncn-cfs-config.sh script that is provided in the SAT release +distribution. The new script provides additional flexibility in how it modifies +the NCN personalization CFS configuration for SAT. It can modify an existing CFS +configuration by name, a CFS configuration being built in a JSON file, or an +existing CFS configuration that applies to certain components. For more information, +see Perform NCN Personalization.

+

New sat bootprep Features

+

The following new features were added to the sat bootprep command:

+ +

The schema of the sat bootprep input files was also changed to support these +new features:

+
    +
  • The base recipe or image used by an image in the input file should now be +specified under a base key instead of under an ims key. The old ims +key is deprecated.
  • +
  • To specify an image that depends on another image in the input file, the +dependent image should specify the dependency under base.image_ref. +You should no longer use the IMS name of the image on which it depends.
  • +
  • The image used by a session template should now be specified under +image.ims.name, image.ims.id, or image.image_ref. Specifying a string +value directly under the image key is deprecated.
  • +
+

For more information on defining IMS images and BOS session templates in the +sat bootprep input file, see Defining IMS Images +and Defining BOS Session Templates.

+

Added Blade Swap Support to sat swap

+

The sat swap command was updated to support swapping compute and UAN blades +with sat swap blade. This functionality is described in the following processes +of the Cray System Management Documentation:

+
    +
  • Adding a Liquid-cooled blade to a System Using SAT
  • +
  • Removing a Liquid-cooled blade from a System Using SAT
  • +
  • Replace a Compute Blade Using SAT
  • +
  • Swap a Compute Blade with a Different System Using SAT
  • +
+

Support for BOS v2

+

A new v2 version of the Boot Orchestration Service (BOS) is available in CSM +1.3.0. SAT has added support for BOS v2. This impacts the following commands +that interact with BOS:

+
    +
  • sat bootprep
  • +
  • sat bootsys
  • +
  • sat status
  • +
+

By default, SAT uses BOS v1. However, you can choose the BOS version you want +to use. For more information, see Change the BOS Version.

+

Added BOS Fields to sat status

+

When using BOS v2, sat status outputs additional fields. These fields show +the most recent BOS session, session template, booted image, and boot status for +each node. An additional --bos-fields option was added to limit the output of +sat status to these fields. The fields are not displayed when using BOS v1.

+

Open Source Repositories

+

This is the first release of SAT built from open source code repositories. +As a result, build infrastructure was changed to use an external Jenkins instance, +and artifacts are now published to an external Artifactory instance. These +changes should not impact the functionality of the SAT product in any way.

+

Security

+

CVE Mitigation

+
    +
  • The paramiko Python package version was updated from 2.9.2 to 2.10.1 to +mitigate CVE-2022-24302.
  • +
  • The oauthlib Python package version was updated from 3.2.0 to 3.2.1 to +mitigate CVE-2022-36087.
  • +
+

Restricted Permissions on SAT Config Files and Directories

+

SAT stores information used to authenticate to the API gateway with Keycloak. +Token files are stored in the ~/.config/sat/tokens/ directory. Those files +have always had permissions appropriately set to restrict them to be readable +only by the user.

+

Keycloak usernames used to authenticate to the API gateway are stored in the +SAT config file at /.config/sat/sat.toml. Keycloak usernames are also used in +the file names of tokens stored in /.config/sat/tokens. As an additional +security measure, SAT now restricts the permissions of the SAT config file +to be readable and writable only by the user. It also restricts the tokens +directory and the entire SAT config directory ~/.config/sat to be accessible +only by the user. This prevents other users on the system from viewing +Keycloak usernames used to authenticate to the API gateway.

+

Bug Fixes

+
    +
  • Fixed an issue where sat init did not print a message confirming a new +configuration file was created.
  • +
  • Fixed an issue where sat showrev exited with a traceback if the file +/opt/cray/etc/site_info.yaml existed but was empty. This could occur if the +user exited sat setrev with Ctrl-C.
  • +
  • Fixed outdated information in the sat bootsys man page, and added a +description of the command stages.
  • +
+

Summary of Changes in SAT 2.3

+

The 2.3.4 version of the SAT product includes:

+
    +
  • Version 3.15.4 of the sat python package and CLI
  • +
  • Version 1.6.11 of the sat-podman wrapper script
  • +
  • Version 1.2.0 of the sat-cfs-install container image
  • +
  • Version 2.0.0 of the sat-cfs-install Helm chart
  • +
  • Version 1.5.0 of the sat-install-utility container image
  • +
  • Version 2.0.3 of the cfs-config-util container image
  • +
+

New sat Commands

+

None.

+

Current Working Directory in SAT Container

+

When running sat commands, the current working directory is now mounted in the +container as /sat/share, and the current working directory within the container +is also /sat/share.

+

Files in the current working directory must be specified using relative paths to +that directory, because the current working directory is always mounted on /sat/share. +Absolute paths should be avoided, and paths that are outside of $HOME or $PWD +are never accessible to the container environment.

+

The home directory is still mounted on the same path inside the container as it +is on the host.

+

Changes to sat bootsys

+

The following options were added to sat bootsys.

+
    +
  • --bos-limit
  • +
  • --recursive
  • +
+

The --bos-limit option passes a given limit string to a BOS session. The --recursive +option specifies a slot or other higher-level component in the limit string

+

Changes to sat bootprep

+

The --delete-ims-jobs option was added to sat bootprep run. It deletes IMS +jobs after sat bootprep is run. Jobs are no longer deleted by default.

+

Changes to sat status

+

sat status now includes information about nodes’ CFS configuration statuses, such +as desired configuration, configuration status, and error count.

+

The output of sat status now splits different component types into different report tables.

+

The following options were added to sat status.

+
    +
  • --hsm-fields, --sls-fields, --cfs-fields
  • +
  • --bos-template
  • +
+

The --hsm-fields, --sls-fields, --cfs-fields options limit the output columns +according to specified CSM services.

+

The --bos-template option filters the status report according to the specified +session template’s boot sets.

+

Compatibility with CSM 1.2

+

The following components were modified to be compatible with CSM 1.2.

+
    +
  • sat-cfs-install container image and Helm chart
  • +
  • sat-install-utility container image
  • +
  • SAT product installer
  • +
+

GPG Checking

+

The sat-ncn Ansible role provided by sat-cfs-install was modified to enable +GPG checks on packages while leaving GPG checks disabled on repository metadata.

+

Security

+

Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and refreshed +Python dependency versions.

+

Bug Fixes

+

Minor bug fixes were made in each of the repositories. For full change lists, +refer to each repository’s CHANGELOG.md file.

+

The known issues listed under the SAT 2.2 release were fixed.

+

Summary of Changes in SAT 2.2

+

SAT 2.2.16 was released on February 25th, 2022.

+

This version of the SAT product included:

+
    +
  • Version 3.14.0 of the sat python package and CLI
  • +
  • Version 1.6.4 of the sat-podman wrapper script
  • +
  • Version 1.0.4 of the sat-cfs-install container image and Helm chart
  • +
+

It also added the following new components:

+
    +
  • Version 1.4.3 of the sat-install-utility container image
  • +
  • Version 2.0.2 of the cfs-config-util container image
  • +
+

The following sections detail the changes in this release.

+

Known Issues in SAT 2.2

+

sat Command Unavailable in sat bash Shell

+

After launching a shell within the SAT container with sat bash, the sat command will not +be found. For example:

+
(CONTAINER-ID) sat-container:~ # sat status
+bash: sat: command not found
+

This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be prepended to the +$PATH environment variable:

+
(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH
+(CONTAINER-ID) sat-container:~ # sat status
+

Or, the file /sat/venv/bin/activate may be sourced:

+
(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate
+(CONTAINER-ID) sat-container:~ # sat status
+

Tab Completion Unavailable in sat bash Shell

+

After launching a shell within the SAT container with sat bash, tab completion for sat +commands does not work.

+

This can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash:

+
source /etc/bash_completion.d/sat-completion.bash
+

OCI Runtime Permission Error when Running sat in Root Directory

+

sat commands will not work if the current directory is /. For example:

+
ncn-m001:/ # sat --help
+Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+

To resolve, run sat in another directory.

+

Duplicate Mount Error when Running sat in Config Directory

+

sat commands will not work if the current directory is ~/.config/sat. For example:

+
ncn-m001:~/.config/sat # sat --help
+Error: /root/.config/sat: duplicate mount destination
+

To resolve, run sat in another directory.

+

New sat Commands

+
    +
  • sat bootprep automates the creation of CFS configurations, the build and +customization of IMS images, and the creation of BOS session templates. For +more information, see SAT Bootprep.
  • +
  • sat slscheck performs a check for consistency between the System Layout +Service (SLS) and the Hardware State Manager (HSM).
  • +
  • sat bmccreds provides a simple interface for interacting with the System +Configuration Service (SCSD) to set BMC Redfish credentials.
  • +
  • sat hwhist displays hardware component history by XName (location) or by +its Field-Replaceable Unit ID (FRUID). This command queries the Hardware +State Manager (HSM) API to obtain this information. Since the sat hwhist +command supports querying for the history of a component by its FRUID, the +FRUID of components has been added to the output of sat hwinv.
  • +
+

Additional Install Automation

+

The following automation has been added to the install script, install.sh:

+
    +
  • Wait for the completion of the sat-config-import Kubernetes job, which is +started when the sat-cfs-install Helm chart is deployed.
  • +
  • Automate the modification of the CFS configuration, which applies to master +management NCNs (for example, ncn-personalization).
  • +
+

Changes to Product Catalog Data Schema

+

The SAT product uploads additional information to the cray-product-catalog +Kubernetes ConfigMap detailing the components it provides, including container +(Docker) images, Helm charts, RPMs, and package repositories.

+

This information is used to support uninstall and activation of SAT product +versions moving forward.

+

Support for Uninstall and Activation of SAT Versions

+

Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and activation of the SAT product stream.

+

For more information, see Uninstall: Removing a Version of +SAT and Activate: Switching +Between Versions.

+

Improvements to sat status

+

A Subrole column has been added to the output of sat status. This allows you +to easily differentiate between master, worker, and storage nodes in the +management role, for example.

+

Hostname information from SLS has been added to sat status output.

+

Added Support for JSON Output

+

Support for JSON-formatted output has been added to commands which currently +support the --format option, such as hwinv, status, and showrev.

+

Usability Improvements

+

Many usability improvements have been made to multiple sat commands, +mostly related to filtering command output. The following are some highlights:

+
    +
  • Added --fields option to display only specific fields for subcommands which +display tabular reports.
  • +
  • Added ability to filter on exact matches of a field name.
  • +
  • Improved handling of multiple matches of a field name in --filter queries +so that the first match is used, similar to --sort-by.
  • +
  • Added support for --filter, --fields, and --reverse for summaries +displayed by sat hwinv.
  • +
  • Added borders to summary tables generated by sat hwinv.
  • +
  • Improved documentation in the man pages.
  • +
+

Default Log Level Changed

+

The default log level for stderr has been changed from “WARNING” to “INFO”. For +more information, see SAT Logging.

+

More Granular Log Level Configuration Options

+

With the command-line options --loglevel-stderr and --loglevel-file, the log level +can now be configured separately for stderr and the log file.

+

The existing --loglevel option is now an alias for the --loglevel-stderr option.

+

Podman Wrapper Script Improvements

+

The Podman wrapper script is the script installed at /usr/bin/sat on the +master management NCNs by the cray-sat-podman RPM that runs the cray-sat +container in podman. The following subsections detail improvements that were +made to the wrapper script in this release.

+

Mounting of $HOME and Current Directories in cray-sat Container

+

The Podman wrapper script that launches the cray-sat container with podman +has been modified to mount the user’s current directory and home directory into +the cray-sat container to provide access to local files in the container.

+

Podman Wrapper Script Documentation Improvements

+

The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:

+
    +
  • Environment variables that affect execution of the wrapper script
  • +
  • Host files and directories mounted in the container
  • +
+

Fixes to Podman Wrapper Script Output Redirection

+

Fixed issues with redirecting stdout and stderr, and piping output to commands, +such as awk, less, and more.

+

Configurable HTTP Timeout

+

A new sat option has been added to configure the HTTP timeout length for +requests to the API gateway. For more information, refer to sat-man sat.

+

sat bootsys Improvements

+

Many improvements and fixes have been made to sat bootsys. The following are some +highlights:

+
    +
  • Added the --excluded-ncns option, which can be used to omit NCNs +from the platform-services and ncn-power stages in case they are +inaccessible.
  • +
  • Disruptive shutdown stages in sat bootsys shutdown now prompt the user to +continue before proceeding. A new option, --disruptive, will bypass this.
  • +
  • Improvements to Ceph service health checks and restart during the platform-services +stage of sat bootsys boot.
  • +
+

sat xname2nid Improvements

+

sat xname2nid can now recursively expand slot, chassis, and cabinet XNames to +a list of NIDs in those locations.

+

A new --format option has been added to sat xname2nid. It sets the output format to +either “range” (the default) or “NID”. The “range” format displays NIDs in a +compressed range format suitable for use with a workload manager like Slurm.

+

Usage of v2 HSM API

+

The commands which interact with HSM (for example, sat status and sat hwinv) now +use the v2 HSM API.

+

sat diag Limited to HSN Switches

+

sat diag will now only operate against HSN switches by default. These are the +only controllers that support running diagnostics with HMJTD.

+

sat showrev Enhancements

+

A column has been added to the output of sat showrev that indicates whether a +product version is “active”. The definition of “active” varies across products, +and not all products may set an “active” version.

+

For SAT, the active version is the one with its hosted-type package repository in +Nexus set as the member of the group-type package repository in Nexus, +meaning that it will be used when installing the cray-sat-podman RPM.

+

cray-sat Container Image Size Reduction

+

The size of the cray-sat container image has been approximately cut in half by +leveraging multi-stage builds. This also improved the repeatability of the unit +tests by running them in the container.

+

Bug Fixes

+

Minor bug fixes were made in cray-sat and in cray-sat-podman. For full change lists, +refer to each repository’s CHANGELOG.md file.

+

Summary of SAT Changes in Shasta v1.5

+

We released version 2.1.16 of the SAT product in Shasta v1.5.

+

This version of the SAT product included:

+
    +
  • Version 3.7.4 of the sat python package and CLI
  • +
  • Version 1.4.10 of the sat-podman wrapper script
  • +
+

It also added the following new component:

+
    +
  • Version 1.0.3 of the sat-cfs-install docker image and helm chart
  • +
+

The following sections detail the changes in this release.

+

Install Changes to Separate Product from CSM

+

This release further decouples the installation of the SAT product from the CSM +product. The cray-sat-podman RPM is no longer installed in the management +non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on +all master management NCNs via an Ansible playbook which is referenced by a +layer of the CFS configuration that applies to management NCNs. This CFS +configuration is typically named ncn-personalization.

+

The SAT product now includes a Docker image and a Helm chart named +sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart +with Loftsman. This helm chart deploys a Kubernetes job that imports the +SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. +This repository is referenced by the layer added to the NCN personalization +CFS configuration.

+

Removal of Direct Redfish Access

+

All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:

+
    +
  • sat sensors
  • +
  • sat diag
  • +
  • sat linkhealth
  • +
+

The sat sensors command has been rewritten to use the SMA telemetry API to +obtain the latest sensor values. The command’s usage has changed slightly, but +legacy options work as before, so it is backwards compatible. Additionally, new +commands have been added.

+

The sat diag command has been rewritten to use a new service called Fox, which +is delivered with the CSM-Diags product. The sat diag command now launches +diagnostics using the Fox service, which launches the corresponding diagnostic +programs on controllers using the Hardware Management Job and Task Daemon +(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start +diagnostics over Redfish.

+

The sat linkhealth command has been removed. Its functionality has been +replaced by functionality from the Slingshot Topology Tool (STT) in the +fabric manager pod.

+

The Redfish username and password command line options and config file options +have been removed. For more information, see Remove Obsolete Configuration +File Sections.

+

Additional Fields in sat setrev and sat showrev

+

sat setrev now collects the following information from the admin, which is then displayed by sat showrev:

+
    +
  • System description
  • +
  • Product number
  • +
  • Company name
  • +
  • Country code
  • +
+

Additional guidance and validation has been added to each field collected by +sat setrev. This sets the stage for sdu setup to stop collecting this +information and instead collect it from sat showrev or its S3 bucket.

+

Improvements to sat bootsys

+

The platform-services stage of the sat bootsys boot command has been +improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph +health in the correct order. The ceph-check stage has been removed as it is no +longer needed.

+

The platform-services stage of sat bootsys boot now prompts for confirmation +of the storage NCN hostnames in addition to the Kubernetes masters and workers.

+

Bug Fixes and Security Fixes

+
    +
  • Improved error handling in sat firmware.
  • +
  • Incremented version of Alpine Linux to 3.13.2 to address a security +vulnerability.
  • +
+

Other Notable Changes

+
    +
  • Ansible has been removed from the cray-sat container image.
  • +
  • Support for the Firmware Update Service (FUS) has been removed from the sat firmware command.
  • +
+

Summary of SAT Changes in Shasta v1.4.1

+

We released version 2.0.4 of the SAT product in Shasta v1.4.1.

+

This version of the SAT product included:

+
    +
  • Version 3.5.0 of the sat python package and CLI.
  • +
  • Version 1.4.3 of the sat-podman wrapper script.
  • +
+

The following sections detail the changes in this release.

+

New Commands to Translate Between NIDs and XNames

+

Two new commands were added to translate between NIDs and XNames:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
+

These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.

+

Bug Fixes

+
    +
  • Fixed a problem in sat swap where creating the offline port policy failed.
  • +
  • Changed sat bootsys shutdown --stage bos-operations to no longer forcefully +power off all compute nodes and application nodes using CAPMC when BOS +sessions complete or time out.
  • +
  • Fixed an issue with the command sat bootsys boot --stage cabinet-power.
  • +
+

Summary of SAT Changes in Shasta v1.4

+

In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.

+

This version of the SAT product included the following components:

+
    +
  • Version 3.4.0 of the sat python package and CLI
  • +
+

It also added the following new component:

+
    +
  • Version 1.4.2 of the sat-podman wrapper script
  • +
+

The following sections detail the changes in this release.

+

SAT as an Independent Product

+

SAT is now packaged and released as an independent product. The product +deliverable is called a “release distribution”. The release distribution is a +gzipped tar file containing an install script. This install script loads the +cray/cray-sat container image into the Docker registry in Nexus and loads the +cray-sat-podman RPM into a package repository in Nexus.

+

In this release, the cray-sat-podman package is still installed in the master +and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in +Shasta v1.5.

+

SAT Running in a Container Under Podman

+

The sat command now runs in a container under Podman. The sat executable is +now installed on all nodes in the Kubernetes management cluster (workers and +masters). This executable is a wrapper script that starts a SAT container in +Podman and invokes the sat Python CLI within that container. The admin can run +individual sat commands directly on the master or worker NCNs as before, or +they can run sat commands inside the SAT container after using sat bash to +enter an interactive shell inside the SAT container.

+

To view man pages for sat commands, the user can run sat-man SAT_COMMAND, +replacing SAT_COMMAND with the name of the sat command. Alternatively, +the user can enter the sat container with sat bash and use the man command.

+

New sat init Command and Config File Location Change

+

The default location of the SAT config file has been changed from /etc/sat.toml +to ~/.config/sat/sat.toml. A new command, sat init, has been added that +initializes a configuration file in the new default directory. This better supports +individual users on the system who want their own config files.

+

~/.config/sat is mounted into the container that runs under Podman, so changes +are persistent across invocations of the sat container. If desired, an alternate +configuration directory can be specified with the SAT_CONFIG_DIR environment variable.

+

Additionally, if a config file does not yet exist when a user runs a sat +command, one is generated automatically.

+

Additional Types Added to sat hwinv

+

Additional functionality has been added to sat hwinv including:

+
    +
  • List node enclosure power supplies with the --list-node-enclosure-power-supplies option.
  • +
  • List node accelerators (for example, GPUs) with the --list-node-accels option. The +count of node accelerators is also included for each node.
  • +
  • List node accelerator risers (for example, Redstone modules) with the --list-node-accel-risers +option. The count of node accelerator risers is also included for each node.
  • +
  • List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics +option. The count of HSN NICs is also included for each node.
  • +
+

Documentation for these new options has been added to the man page for sat hwinv.

+

Site Information Stored by sat setrev in S3

+

The sat setrev and sat showrev commands now use S3 to store and obtain site +information, including system name, site name, serial number, install date, and +system type. Since the information is stored in S3, it will now be consistent +regardless of the node on which sat is executed.

+

As a result of this change, S3 credentials must be configured for SAT. For more +information, see Generate SAT S3 Credentials.

+

Product Version Information Shown by sat showrev

+

sat showrev now shows product information from the cray-product-catalog +ConfigMap in Kubernetes.

+

Additional Changes to sat showrev

+

The output from sat showrev has also been changed in the following ways:

+
    +
  • The --docker and --packages options were considered misleading and have +been removed.
  • +
  • Information pertaining to only to the local host, where the command is run, +has been moved to the output of the --local option.
  • +
+

Removal of sat cablecheck

+

The sat cablecheck command has been removed. To verify that the system’s Slingshot +network is cabled correctly, admins should now use the show cables command in the +Slingshot Topology Tool (STT).

+

sat swap Command Compatibility with Next-gen Fabric Controller

+

The sat swap command was added in Shasta v1.3.2. This command used the Fabric +Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the +Fabric Controller API, so this command has been rewritten to use the new +backwards-incompatible API. Usage of the command did not change.

+

sat bootsys Functionality

+

Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken +by changes introduced in Shasta v1.4, which removed the Ansible inventory +and playbooks.

+

The functionality in the platform-services stage of sat bootsys has been +re-implemented to use python directly instead of Ansible. This resulted in +a more robust procedure with better logging to the sat log file. Failures +to stop containers on Kubernetes nodes are handled more gracefully, and +more information about the containers that failed to stop, including how to +debug the problem, is included.

+

Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.

+

The following improvements were made to the bos-operations stage +of sat bootsys:

+
    +
  • More information about the BOS sessions, BOA jobs, and BOA pods is printed.
  • +
  • A command-line option, --bos-templates, and a corresponding config-file +option, bos_templates, were added, and the --cle-bos-template and +--uan-bos-template options and their corresponding config file options were +deprecated.
  • +
+

The following functionality has been removed from sat bootsys:

+
    +
  • The hsn-bringup stage of sat bootsys boot has been removed due to removal +of the underlying Ansible playbook.
  • +
  • The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is +now a manual procedure.
  • +
+

Log File Location Change

+

The location of the sat log file has changed from /var/log/cray/sat.log to +/var/log/cray/sat/sat.log. This change simplifies mounting this file into the +sat container running under Podman.

+

Summary of SAT Changes in Shasta v1.3.2

+

Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.

+

The following sections detail the changes in this release.

+

sat swap Command for Switch and Cable Replacement

+

The sat switch command which supported operations for replacing a switch has +been deprecated and replaced with the sat swap command, which now supports +replacing a switch OR cable.

+

The sat swap switch command is equivalent to sat switch. The sat switch +command will be removed in a future release.

+

Addition of Stages to sat bootsys Command

+

The sat bootsys command now has multiple stages for both the boot and +shutdown actions. Please refer to the “System Power On Procedures” and “System +Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001) +for more details on using this command in the context of a full system power off +and power on.

+

Summary of SAT Changes in Shasta v1.3

+

Shasta v1.3 included version 2.2.3 of the sat python package and CLI.

+

This version of the sat CLI contained the following commands:

+
    +
  • auth
  • +
  • bootsys
  • +
  • cablecheck
  • +
  • diag
  • +
  • firmware
  • +
  • hwinv
  • +
  • hwmatch
  • +
  • k8s
  • +
  • linkhealth
  • +
  • sensors
  • +
  • setrev
  • +
  • showrev
  • +
  • status
  • +
  • swap
  • +
  • switch
  • +
+

For more information on each of these commands, see the System Admin Toolkit Command +Overview and the table +of commands in the SAT Authentication section +of this document.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/sitemap.xml b/en-24/sitemap.xml new file mode 100644 index 0000000000..c94c124078 --- /dev/null +++ b/en-24/sitemap.xml @@ -0,0 +1,347 @@ + + + + /docs-sat/en-24/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-24/dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-24/install/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-24/usage/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-24/usage/change_bos_version/ + 2024-12-11T03:40:00+00:00 + + + + + /docs-sat/en-24/introduction/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-24/dashboards/sat_grafana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-24/cne_install/ + 2024-12-11T03:40:00+00:00 + + /docs-sat/en-24/usage/sat_bootprep/ + 2024-12-11T03:40:00+00:00 + + + + + /docs-sat/en-24/dashboards/sat_kibana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-24/release_notes/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-24/categories/ + + + + + + + + /docs-sat/en-24/tags/ + + + + + + + + diff --git a/en-24/tags/index.html b/en-24/tags/index.html new file mode 100644 index 0000000000..12358999a4 --- /dev/null +++ b/en-24/tags/index.html @@ -0,0 +1,703 @@ + + + + + + + + + + + + Tags :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + tag :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-24/tags/index.xml b/en-24/tags/index.xml new file mode 100644 index 0000000000..d47073ce7e --- /dev/null +++ b/en-24/tags/index.xml @@ -0,0 +1,11 @@ + + + + Tags on System Admin Toolkit (SAT) + /docs-sat/en-24/tags/ + Recent content in Tags on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-24 + + + diff --git a/en-24/usage/change_bos_version/index.html b/en-24/usage/change_bos_version/index.html new file mode 100644 index 0000000000..4285ed2f2e --- /dev/null +++ b/en-24/usage/change_bos_version/index.html @@ -0,0 +1,682 @@ + + + + + + + + + + + + Change the BOS Version :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Change the BOS Version

+

By default, SAT uses Boot Orchestration Service (BOS) version one. You can +select the BOS version to use for individual commands with the --bos-version +option. For more information on this option, refer to the man page for a specific +command.

+

You can also configure the BOS version to use in the SAT config file. Do this +under the api_version setting in the bos section of the config file. If +the system is using an existing SAT config file from an older version of SAT, +the bos section might not exist. In that case, add the bos section with the +BOS version desired in the api_version setting.

+
    +
  1. +

    Find the SAT config file at ~/.config/sat/sat.toml, and look for a section +like this:

    +
    [bos]
    +api_version = "v1"
    +

    In this example, SAT is using BOS version "v1".

    +
  2. +
  3. +

    Change the line specifying the api_version to the BOS version desired (for +example, "v2").

    +
    [bos]
    +api_version = "v2"
    +
  4. +
  5. +

    If applicable, uncomment the api_version line.

    +

    If the system is using an existing SAT config file from a recent version of +SAT, the api_version line might be commented out like this:

    +
    [bos]
    +# api_version = "v2"
    +

    If the line is commented out, SAT will still use the default BOS +version. To ensure a different BOS version is used, uncomment the +api_version line by removing # at the beginning of the line.

    +
  6. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-24/usage/index.html b/en-24/usage/index.html new file mode 100644 index 0000000000..ab9a5433f7 --- /dev/null +++ b/en-24/usage/index.html @@ -0,0 +1,688 @@ + + + + + + + + + + + + SAT Usage :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Usage

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-24/usage/index.xml b/en-24/usage/index.xml new file mode 100644 index 0000000000..2372847f87 --- /dev/null +++ b/en-24/usage/index.xml @@ -0,0 +1,26 @@ + + + + SAT Usage on System Admin Toolkit (SAT) + /docs-sat/en-24/usage/ + Recent content in SAT Usage on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-24 + Wed, 11 Dec 2024 03:40:00 +0000 + + + Change the BOS Version + /docs-sat/en-24/usage/change_bos_version/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/usage/change_bos_version/ + Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version one. You can select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command. You can also configure the BOS version to use in the SAT config file. Do this under the api_version setting in the bos section of the config file. + + + SAT Bootprep + /docs-sat/en-24/usage/sat_bootprep/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-24/usage/sat_bootprep/ + SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates based on a given input file which defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similarly to other SAT commands. ncn-m001# sat-man sat-bootprep SAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session. + + + diff --git a/en-24/usage/sat_bootprep/index.html b/en-24/usage/sat_bootprep/index.html new file mode 100644 index 0000000000..873fbd9a82 --- /dev/null +++ b/en-24/usage/sat_bootprep/index.html @@ -0,0 +1,1319 @@ + + + + + + + + + + + + SAT Bootprep :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Bootprep

+

SAT provides an automated solution for creating CFS configurations, building +and configuring images in IMS, and creating BOS session templates based on a +given input file which defines how those configurations, images, and session +templates should be created.

+

This automated process centers around the sat bootprep command. Man page +documentation for sat bootprep can be viewed similarly to other SAT commands.

+
ncn-m001# sat-man sat-bootprep
+

SAT Bootprep vs SAT Bootsys

+

sat bootprep is used to create CFS configurations, build and +rename IMS images, and create BOS session templates which tie the +configurations and images together during a BOS session.

+

sat bootsys automates several portions of the boot and shutdown processes, +including (but not limited to) performing BOS operations (such as creating BOS +sessions), powering on and off cabinets, and checking the state of the system +prior to shutdown.

+

Editing a Bootprep Input File

+

The input file provided to sat bootprep is a YAML-formatted file containing +information which CFS, IMS, and BOS use to create configurations, images, and +BOS session templates respectively. Writing and modifying these input files is +the main task associated with using sat bootprep. An input file is composed of +three main sections, one each for configurations, images, and session templates. +These sections may be specified in any order, and any of the sections may be +omitted if desired.

+

Providing a Schema Version

+

The sat bootprep input file is validated against a versioned schema +definition. The input file should specify the version of the schema with which +it is compatible under a schema_version key. For example:

+
---
+schema_version: 1.0.2
+

The current sat bootprep input file schema version can be viewed with the +following command:

+
ncn-m001# sat bootprep view-schema | grep '^version:'
+version: '1.0.2'
+

The sat bootprep run command validates the schema version specified +in the input file. The command also makes sure that the schema version +of the input file is compatible with the schema version understood by the +current version of sat bootprep. For more information on schema version +validation, refer to the schema_version property description in the bootprep +input file schema. For more information on viewing the bootprep input file +schema in either raw form or user-friendly HTML form, see Viewing the Exact +Schema Specification or +Generating User-Friendly Documentation.

+

The default sat bootprep input files provided by the hpc-csm-software-recipe +release distribution already contain the correct schema version.

+

Defining CFS Configurations

+

The CFS configurations are defined under a configurations key. Under this +key, you can list one or more configurations to create. For each +configuration, give a name in addition to the list of layers that +comprise the configuration.

+

Each layer can be defined by a product name and optionally a version number, +commit hash, or branch in the product’s configuration repository. If this +method is used, the layer is created in CFS by looking up relevant configuration +information (including the configuration repository and commit information) from +the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be +supplied. However, if it is absent, the version is assumed to be the latest +version found in the cray-product-catalog.

+

Alternatively, a configuration layer can be defined by explicitly referencing +the desired configuration repository. You must then specify the intended version +of the Ansible playbooks by providing a branch name or commit hash with branch +or commit.

+

The following example shows a CFS configuration with two layers. The first +layer is defined in terms of a product name and version, and the second layer +is defined in terms of a Git clone URL and branch:

+
---
+configurations:
+- name: example-configuration
+  layers:
+  - name: example-product
+    playbook: example.yml
+    product:
+      name: example
+      version: 1.2.3
+  - name: another-example-product
+    playbook: another-example.yml
+    git:
+      url: "https://vcs.local/vcs/another-example-config-management.git"
+      branch: main
+

When sat bootprep is run against an input file, a CFS configuration is created +corresponding to each configuration in the configurations section. For +example, the configuration created from an input file with the layers listed +above might look something like the following:

+
{
+    "lastUpdated": "2022-02-07T21:47:49Z",
+    "layers": [
+        {
+            "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "example product",
+            "playbook": "example.yml"
+        },
+        {
+            "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "another example product",
+            "playbook": "another-example.yml"
+        }
+    ],
+    "name": "example-configuration"
+}
+

Defining IMS Images

+

The IMS images are defined under an images key. Under the images key, the +user may define one or more images to be created in a list. Each element of the +list defines a separate IMS image to be built and/or configured. Images must +contain a name key and a base key.

+

The name key defines the name of the resulting IMS image. The base key +defines the base image to be configured or the base recipe to be built and +optionally configured. One of the following keys must be present under the +base key:

+
    +
  • Use an ims key to specify an existing image or recipe in IMS.
  • +
  • Use a product key to specify an image or recipe provided by a +particular version of a product. Note that this is only possible if the +product provides a single image or recipe.
  • +
  • Use an image_ref key to specify another image from the input file +using its ref_name.
  • +
+

Images may also contain the following keys:

+
    +
  • Use a configuration key to specify a CFS configuration with which to +customize the built image. If a configuration is specified, then configuration +groups must also be specified using the configuration_group_names key.
  • +
  • Use a ref_name key to specify a unique name that can refer to this image +within the input file in other images or in session templates. The ref_name +key allows references to images from the input file that have dynamically +generated names as described in +Dynamic Variable Substitutions.
  • +
  • Use a description key to describe the image in the bootprep input file. +Note that this key is not currently used.
  • +
+

Here is an example of an image using an existing IMS recipe as its base. This +example builds an IMS image from that recipe. It then configures it with +a CFS configuration named example-compute-config. The example-compute-config +CFS configuration can be defined under the configurations key in the same +input file, or it can be an existing CFS configuration. Running sat bootprep +against this input file results in an image named example-compute-image.

+
images:
+- name: example-compute-image
+  description: >
+    An example compute node image built from an existing IMS recipe.    
+  base:
+    ims:
+      name: example-compute-image-recipe
+      type: recipe
+  configuration: example-compute-config
+  configuration_group_names:
+  - Compute
+

Here is an example showing the definition of two images. The first image is +built from a recipe provided by the cos product. The second image uses the +first image as a base and configures it with a configuration named +example-compute-config. The value of the first image’s ref_name key is used +in the second image’s base.image_ref key to specify it as a dependency. +Running sat bootprep against this input file results in two images, the +first named example-cos-image and the second named example-compute-image.

+
images:
+- name: example-cos-image
+  ref_name: example-cos-image
+  description: >
+    An example image built from a recipe provided by the COS product.    
+  base:
+    product:
+      name: cos
+      version: 2.3.101
+      type: recipe
+- name: example-compute-image
+  description: >
+    An example image built from a recipe provided by the COS product.    
+  base:
+    image_ref: example-cos-image
+  configuration: example-compute-config
+  configuration_group_names:
+  - Compute
+

Defining BOS Session Templates

+

The BOS session templates are defined under the session_templates key. Each +session template must provide values for the name, image, configuration, +and bos_parameters keys. The name key defines the name of the resulting BOS +session template. The image key defines the image to use in the BOS session +template. One of the following keys must be present under the image key:

+
    +
  • Use an ims key to specify an existing image or recipe in IMS.
  • +
  • Use an image_ref key to specify another image from the input file +using its ref_name.
  • +
+

The configuration key defines the CFS configuration specified +in the BOS session template.

+

The bos_parameters key defines parameters that are passed through directly to +the BOS session template. The bos_parameters key should contain a boot_sets +key, and each boot set in the session template should be specified under +boot_sets. Each boot set can contain the following keys, all of +which are optional:

+
    +
  • Use a kernel_parameters key to specify the parameters passed to the kernel on the command line.
  • +
  • Use a network key to specify the network over which the nodes boot.
  • +
  • Use a node_list key to specify the nodes to add to the boot set.
  • +
  • Use a node_roles_groups key to specify the HSM roles to add to the boot set.
  • +
  • Use a node_groups key to specify the HSM groups to add to the boot set.
  • +
  • Use a rootfs_provider key to specify the root file system provider.
  • +
  • Use a rootfs_provider_passthrough key to specify the parameters to add to the rootfs= +kernel parameter.
  • +
+

As mentioned above, the parameters under bos_parameters are passed through +directly to BOS. For more information on the properties of a BOS boot set, +refer to BOS Session Templates in the Cray +System Management Documentation.

+

Here is an example of a BOS session template that refers to an existing IMS +image by name:

+
session_templates:
+- name: example-session-template
+  image:
+    ims:
+      name: example-image
+  configuration: example-configuration
+  bos_parameters:
+    boot_sets:
+      example_boot_set:
+        kernel_parameters: ip=dhcp quiet
+        node_roles_groups:
+        - Compute
+        rootfs_provider: cpss3
+        rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+

Here is an example of a BOS session template that refers to an image from the +input file by its ref_name. This requires that an image defined in the input +file specifies example-image as the value of its ref_name key.

+
session_templates:
+- name: example-session-template
+  image:
+    image_ref: example-image
+  configuration: example-configuration
+  bos_parameters:
+    boot_sets:
+      example_boot_set:
+        kernel_parameters: ip=dhcp quiet
+        node_roles_groups:
+        - Compute
+        rootfs_provider: cpss3
+        rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+

HPC CSM Software Recipe Variable Substitutions

+

The HPC CSM Software Recipe provides a manifest defining the versions of each +HPC software product included in the recipe. These product versions can be used +in the sat bootprep input file with Jinja2 template syntax.

+

Selecting an HPC CSM Software Recipe Version

+

By default, the sat bootprep command uses the product versions from the +latest installed version of the HPC CSM Software Recipe. However, you can +override this with the --recipe-version command line argument to sat bootprep run.

+

For example, to explicitly select the 22.11.0 version of the HPC CSM Software +Recipe, specify --recipe-version 22.11.0:

+
ncn-m001# sat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml
+

Values Supporting Jinja2 Template Rendering

+

The entire sat bootprep input file is not rendered by the Jinja2 template +engine. Jinja2 template rendering of the input file is performed individually +for each supported value. The values of the following keys support rendering as +a Jinja2 template:

+
    +
  • The name key of each configuration under the configurations key.
  • +
  • The following keys of each layer under the layers key in a +configuration: +
      +
    • name
    • +
    • git.branch
    • +
    • product.version
    • +
    • product.branch
    • +
    +
  • +
  • The following keys of each image under the images key: +
      +
    • name
    • +
    • base.product.version
    • +
    • configuration
    • +
    +
  • +
  • The following keys of each session template under the +session_templates key: +
      +
    • name
    • +
    • configuration
    • +
    +
  • +
+

You can use Jinja2 built-in filters in values of any of the keys listed above. +In addition, Python string methods can be called on the string variables.

+

Viewing HPC CSM Software Recipe Variables

+

HPC CSM Software Recipe variables are available, and you can use them in the values +of the keys listed above. View these variables by cloning the hpc-csm-software-recipe +repository from VCS and accessing the product_vars.yaml file on the branch that +corresponds to the targeted version of the HPC CSM Software Recipe.

+
    +
  1. +

    Set up a shell script to access the password for the crayvcs user:

    +
    ncn-m001# cat > vcs-creds-helper.sh <<EOF
    +#!/bin/bash
    +kubectl get secret -n services vcs-user-credentials -o jsonpath={.data.vcs_password} | base64 -d
    +EOF
    +
  2. +
  3. +

    Ensure vcs-creds-helper.sh is executable:

    +
    ncn-m001# chmod u+x vcs-creds-helper.sh
    +
  4. +
  5. +

    Set the GIT_ASKPASS environment variable to the path to the +vcs-creds-helper.sh script:

    +
    ncn-m001# export GIT_ASKPASS="$PWD/vcs-creds-helper.sh"
    +
  6. +
  7. +

    Clone the hpc-csm-software-recipe repository:

    +
    ncn-m001# git clone https://crayvcs@api-gw-service-nmn.local/vcs/cray/hpc-csm-software-recipe.git
    +
  8. +
  9. +

    Change the directory to the hpc-csm-software-recipe repository:

    +
    ncn-m001# cd hpc-csm-software-recipe
    +
  10. +
  11. +

    View the versions of the HPC CSM Software Recipe on the system:

    +
    ncn-m001# git branch -r
    +
  12. +
  13. +

    Check out the branch of the hpc-csm-software-recipe repository that corresponds to +the targeted HPC CSM Software Recipe version. For example, for recipe version +22.11.0:

    +
    ncn-m001# git checkout cray/hpc-csm-software-recipe/22.11.0
    +
  14. +
  15. +

    View the contents of the file product_vars.yaml in the clone of the +repository:

    +
    ncn-m001# cat product_vars.yaml
    +
  16. +
+

The variables defined in the product_vars.yaml file can be used in the values +that support Jinja2 templates. A variable is specified by a dot-separated path, +with each component of the path representing a key in the YAML file. For +example, a version of the COS product appears as follows in the +product_vars.yaml file:

+
cos:
+  version: 2.4.76
+

This COS version can be used by specifying cos.version within a value in the +input file.

+

HPC CSM Software Recipe Variable Substitution Example

+

The following example bootprep input file shows how a COS version can be +used in a bootprep input file that creates a CFS configuration for computes. +Only one layer is shown for brevity.

+
---
+configurations:
+- name: compute-{{recipe.version}}
+  layers:
+  - name: cos-compute-integration-{{cos.version}}
+    playbook: cos-compute.yaml
+    product:
+      name: cos
+      version: "{{cos.version}}"
+      branch: integration-{{cos.version}}
+

Note: When the value of a key in the bootprep input file is a Jinja2 +expression, it must be quoted to pass YAML syntax checking.

+

Jinja2 expressions can also use filters and Python’s built-in string methods to +manipulate the variable values. For example, suppose only the major and minor +components of a COS version are to be used in the branch name for the COS +layer of the CFS configuration. You can use the split string method to +achieve this as follows:

+
---
+configurations:
+- name: compute-{{recipe.version}}
+  layers:
+  - name: cos-compute-integration-{{cos.version}}
+    playbook: cos-compute.yaml
+    product:
+      name: cos
+      version: "{{cos.version}}"
+      branch: integration-{{cos.version.split('.')[0]}}-{{cos.version.split('.')[1]}}
+

Dynamic Variable Substitutions

+

Additional variables are available besides the product version variables +provided by the HPC CSM Software Recipe. (For more information, see HPC +CSM Software Recipe Variable Substitutions.) +These additional variables are dynamic because their values are +determined at run-time based on the context in which they appear. Available +dynamic variables include the following:

+
    +
  • The variable base.name can be used in the name of an image under the +images key. The value of this variable is the name of the IMS image or +recipe used as the base of this image.
  • +
  • The variable image.name can be used in the name of a session template +under the session_templates key. The value of this variable is the name of +the IMS image used in this session template.
  • +
+

These variables reduce the need to duplicate values throughout the sat bootprep input file and make the following use cases possible:

+
    +
  • You want to build an image from a recipe provided by a product and use the +name of the recipe in the name of the resulting image.
  • +
  • You want to use the name of the image in the name of a session template, and +the image is generated as described in the previous use case.
  • +
+

Example Bootprep Input Files

+

This section provides an example bootprep input file. It also gives +instructions for obtaining the default bootprep input files delivered +with a release of the HPC CSM Software Recipe.

+

Example Bootprep Input File

+

The following bootprep input file provides an example of using most of the +features described in previous sections. It is not intended to be a complete +bootprep file for the entire CSM product.

+
---
+configurations:
+- name: compute-{{recipe.version}}
+  layers:
+  - name: cos-compute-integration-{{cos.version}}
+    playbook: site.yml
+    product:
+      name: cos
+      version: "{{cos.version}}"
+      branch: integration-{{cos.version}}
+  - name: cpe-pe_deploy-integration-{{cpe.version}}
+    playbook: pe_deploy.yml
+    product:
+      name: cpe
+      version: "{{cpe.version}}"
+      branch: integration-{{cpe.version}}
+
+images:
+- name: "{{base.name}}"
+  ref_name: base_cos_image
+  base:
+    product:
+      name: cos
+      type: recipe
+      version: "{{cos.version}}"
+
+- name: compute-{{base.name}}
+  ref_name: compute_image
+  base:
+    image_ref: base_cos_image
+  configuration: compute-{{recipe.version}}
+  configuration_group_names:
+  - Compute
+
+session_templates:
+- name: compute-{{recipe.version}}
+  image:
+    image_ref: compute_image
+  configuration: compute-{{recipe.version}}
+  bos_parameters:
+    boot_sets:
+      compute:
+        kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+        node_roles_groups:
+        - Compute
+        rootfs_provider_passthrough: "dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0"
+

Accessing Default Bootprep Input Files

+

Default bootprep input files are delivered by the HPC CSM Software Recipe +product. You can access these files by cloning the hpc-csm-software-recipe +repository.

+

To do this, follow steps 1-7 of the procedure in Viewing HPC CSM Software Recipe +Variables. Then, access the files in the +bootprep directory of that repository:

+
ncn-m001# ls bootprep/
+

Generating an Example Bootprep Input File

+

The sat bootprep generate-example command was not updated for +recent bootprep schema changes. It is recommended that you instead use the +default bootprep input files described in Accessing Default Bootprep Input +Files. The sat bootprep generate-example command will be updated in a future release of SAT.

+

Editing HPC CSM Software Recipe Defaults

+

You might need to edit the default bootprep input files delivered by the HPC +CSM Software Recipe for your system. Here are some examples of how to edit +the files.

+

Editing Default Branch Names

+

Before running sat bootprep, HPE recommends reading the bootprep input files +and paying specific attention to the branch parameters. Some HPE Cray EX +products require system-specific changes on a working branch of VCS. For these +products, the default bootprep input files assume certain naming conventions for +the VCS branches. The files refer to a particular branch of a product’s +configuration management repository.

+

Thus, it is important to confirm that the bootprep input files delivered by the +HPC CSM Software Recipe match the actual system branch names. For example, the +COS product’s CFS configuration layer is defined as follows in the default +management-bootprep.yaml bootprep input file.

+
- name: cos-ncn-integration-{{cos.version}}
+  playbook: ncn.yml
+  product:
+    name: cos
+    version: "{{cos.version}}"
+    branch: integration-{{cos.version}}
+

The default file is assuming that system-specific Ansible configuration changes +for the COS product in VCS are stored in a branch named +integration-{{cos.version}}. If the version being installed is COS 2.4.99, +sat bootprep looks for a branch named integration-2.4.99 from which to +create CFS configuration layers.

+

You can create VCS working branches that are not the default bootprep input file +branch names. A simple example of this is using cne-install to update working +VCS branches. If you use cne-install to update working VCS branches, (namely in +the update_working_branches stage), you create or update the branches specified +by the -B WORKING_BRANCH command line option. For example, consider the +following cne-install command.

+
ncn-m001# ./cne-install install \
+    -B integration \
+    -s deploy_products \
+    -e update_working_branches
+

Products installed with this cne-install example use the working branch +integration for system-specific changes to VCS. The branch specified by the +-B option must match the branch specified in the bootprep input file.

+

In another example, to use the branch integration for COS instead of +integration-{{cos.version}}, edit the bootprep input file so it reads as +follows.

+
- name: cos-ncn-integration-{{cos.version}}
+  playbook: ncn.yml
+  product:
+    name: cos
+    version: "{{cos.version}}"
+    branch: integration
+

Editing Default Management CFS Configuration Names

+

The default bootprep input file for management CFS configurations +(management-bootprep.yaml) creates configurations that have names specified +within the input file. For example, in the bootprep input files included in the +22.11 HPC CSM Software Recipe, the following configurations are named:

+
    +
  • ncn-personalization
  • +
  • ncn-image-customization
  • +
+

These default management CFS configuration names might be acceptable for your +system. However, it is possible to create other names. sat bootprep creates +whatever configurations are specified in the input file. For example, to +create a NCN node personalization configuration named +ncn-personalization-test, edit the file as follows.

+
configurations:
+- name: ncn-personalization-test
+  layers:
+  ...
+

For management configurations, use sat status to identify the current +desired configuration for each of the management nodes.

+
ncn-m001# sat status --fields xname,role,subrole,desiredconfig --filter role=management
++----------------+------------+---------+---------------------+
+| xname          | Role       | SubRole | Desired Config      |
++----------------+------------+---------+---------------------+
+| x3000c0s1b0n0  | Management | Master  | ncn-personalization |
+| x3000c0s3b0n0  | Management | Master  | ncn-personalization |
+| x3000c0s5b0n0  | Management | Master  | ncn-personalization |
+| x3000c0s7b0n0  | Management | Worker  | ncn-personalization |
+| x3000c0s9b0n0  | Management | Worker  | ncn-personalization |
+| x3000c0s11b0n0 | Management | Worker  | ncn-personalization |
+| x3000c0s13b0n0 | Management | Worker  | ncn-personalization |
+| x3000c0s17b0n0 | Management | Storage | ncn-personalization |
+| x3000c0s19b0n0 | Management | Storage | ncn-personalization |
+| x3000c0s21b0n0 | Management | Storage | ncn-personalization |
+| x3000c0s25b0n0 | Management | Worker  | ncn-personalization |
++----------------+------------+---------+---------------------+
+

To overwrite the desired configuration using sat bootprep, ensure the bootprep +input file specifies to create a configuration with the same name +(ncn-personalization in the example above). To create a different configuration, +ensure the bootprep input file specifies to create a configuration with a +different name than the desired configuration (different than ncn-personalization +in the example above).

+

Upgrading a Single Product and Overriding its Default Version

+

When working with a given HPC CSM Software Recipe, it might be necessary to +upgrade a single HPE Cray EX product past the default version given in the +recipe. However, you might still want to use the other default product versions +contained in that recipe. To do this, first upgrade the single product. For +more information, refer to the upgrade instructions in that product’s +documentation.

+

After the product is upgraded, you must override its default version in subsequent +runs of sat bootprep. The following process explains how to do this. In this +example, all the default product versions from the 22.11 software recipe are +used except for COS. The COS default product version is overridden to version +2.4.199 instead, and the CFS configurations in management-bootprep.yaml are +created.

+
    +
  1. +

    Ensure you have a local copy of the default bootprep input files.

    +

    For more information, see Accessing Default Bootprep Input +Files.

    +
  2. +
  3. +

    Edit the product_vars.yaml file to change the default product version.

    +
    ncn-m001# vim product_vars.yaml
    +
  4. +
  5. +

    Confirm the new product version in the edited product_vars.yaml file.

    +
    ncn-m001# grep -A1 cos: `product_vars.yaml`:
    +cos:
    +  version: 2.4.199
    +
  6. +
  7. +

    Use the --vars-file option when running sat bootprep to override the +default product version.

    +

    You must run this command from the directory containing the product_vars.yaml +file. The product_vars.yaml file must also be specified when using the +--vars-file option. It is not sufficient to just edit the file.

    +
    ncn-m001# sat bootprep run --vars-file product_vars.yaml bootprep/management-bootprep.yaml
    +

    Note: This example is specific to creating the configurations defined in +management-bootprep.yaml. Review what configurations, images, or session templates +you intend to create by viewing the input file.

    +
  8. +
+

Viewing Built-in Generated Documentation

+

The contents of the YAML input files described above must conform to a schema +which defines the structure of the data. The schema definition is written using +the JSON Schema format. (Although the format is named “JSON Schema”, the schema +itself is written in YAML as well.) More information, including introductory +materials and a formal specification of the JSON Schema metaschema, can be found +on the JSON Schema website.

+

Viewing the Exact Schema Specification

+

To view the exact schema specification, run sat bootprep view-schema.

+
ncn-m001# sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft/2020-12/schema"
+...
+title: Bootprep Input File
+description: >
+  A description of the set of CFS configurations to create, the set of IMS
+  images to create and optionally customize with the defined CFS configurations,
+  and the set of BOS session templates to create that reference the defined
+  images and configurations.
+type: object
+additionalProperties: false
+properties:
+  ...
+

Generating User-Friendly Documentation

+

The raw schema definition can be difficult to understand without experience +working with JSON Schema specifications. For this reason, a feature is included +that generates user-friendly HTML documentation for the input file schema. This +HTML documentation can be browsed with your preferred web browser.

+
    +
  1. +

    Create a documentation tarball using sat bootprep.

    +
    ncn-m001# sat bootprep generate-docs
    +INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
    +

    An alternate output directory can be specified with the --output-dir +option. The generated tarball is always named bootprep-schema-docs.tar.gz.

    +
    ncn-m001# sat bootprep generate-docs --output-dir /tmp
    +INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
    +
  2. +
  3. +

    From another machine, copy the tarball to a local directory.

    +
    another-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz .
    +
  4. +
  5. +

    Extract the contents of the tarball and open the contained index.html.

    +
    another-machine$ tar xzvf bootprep-schema-docs.tar.gz
    +x bootprep-schema-docs/
    +x bootprep-schema-docs/index.html
    +x bootprep-schema-docs/schema_doc.css
    +x bootprep-schema-docs/schema_doc.min.js
    +another-machine$ open bootprep-schema-docs/index.html
    +
  6. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/404.html b/en-25/404.html new file mode 100644 index 0000000000..8ecbe94b97 --- /dev/null +++ b/en-25/404.html @@ -0,0 +1,59 @@ + + + + + + + + + 404 Page not found + + + + + + + + + + + + + + + + + + +
+
+
+
+

+

+

+

+

+

+

Page not found!

+
+
+ +
+ + + diff --git a/en-25/categories/index.html b/en-25/categories/index.html new file mode 100644 index 0000000000..825636410e --- /dev/null +++ b/en-25/categories/index.html @@ -0,0 +1,1142 @@ + + + + + + + + + + + + Categories :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + category :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-25/categories/index.xml b/en-25/categories/index.xml new file mode 100644 index 0000000000..3d4e90be62 --- /dev/null +++ b/en-25/categories/index.xml @@ -0,0 +1,11 @@ + + + + Categories on System Admin Toolkit (SAT) + /docs-sat/en-25/categories/ + Recent content in Categories on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-25 + + + diff --git a/en-25/dashboards/index.html b/en-25/dashboards/index.html new file mode 100644 index 0000000000..0b47986efe --- /dev/null +++ b/en-25/dashboards/index.html @@ -0,0 +1,1127 @@ + + + + + + + + + + + + SAT Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Dashboards

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-25/dashboards/index.xml b/en-25/dashboards/index.xml new file mode 100644 index 0000000000..b2009f4665 --- /dev/null +++ b/en-25/dashboards/index.xml @@ -0,0 +1,26 @@ + + + + SAT Dashboards on System Admin Toolkit (SAT) + /docs-sat/en-25/dashboards/ + Recent content in SAT Dashboards on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-25 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Grafana Dashboards + /docs-sat/en-25/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.cmn.&lt;site-domain&gt; The value of site-domain can be obtained as follows: ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=&#39;{.data.customizations\.yaml}&#39; | \ base64 -d | grep &#34;external:&#34; That command will produce the following output, for example: + + + SAT Kibana Dashboards + /docs-sat/en-25/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + diff --git a/en-25/dashboards/sat_grafana_dashboards/index.html b/en-25/dashboards/sat_grafana_dashboards/index.html new file mode 100644 index 0000000000..5320f516aa --- /dev/null +++ b/en-25/dashboards/sat_grafana_dashboards/index.html @@ -0,0 +1,1232 @@ + + + + + + + + + + + + SAT Grafana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Grafana Dashboards

+

The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.

+

Grafana can be accessed via web browser at the following URL:

+
    +
  • https://sma-grafana.cmn.<site-domain>
  • +
+

The value of site-domain can be obtained as follows:

+
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+    base64 -d | grep "external:"
+

That command will produce the following output, for example:

+
    external: EXAMPLE_DOMAIN.com
+

This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com

+

For more information on accessing the Grafana Dashboards, refer to Access the Grafana Monitoring UI in the +SMA product documentation.

+

For more information on the interpretation of metrics for the SAT Grafana Dashboards, refer to “Fabric Telemetry +Kafka Topics” in the SMA product documentation.

+ +

There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Dashboard NameDisplay Type
Fabric CongestionChart Panels
Fabric RFC3635Chart Panels
Fabric ErrorsTabular Format
Fabric Port StateTabular Format
+

The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.

+

SAT Grafana Interval and Locations Options

+

Shows the Interval and Locations Options for the available telemetry.

+

Grafana Interval and Locations Options

+

The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.

+

For more information, refer to Grafana Templates and Variables.

+

The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.

+

The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.

+

Grafana Fabric Congestion Dashboard

+

Grafana Fabric Congestion Dashboard

+

SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.

+

This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.

+

The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.

+

The port type of a link is reported as a port state “subtype” event when defined at port initialization.

+

Grafana Fabric Errors Dashboard

+

Grafana HSN Errors Dashboard

+

This dashboard reports error counters in a tabular format in three panels.

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.

+

Grafana Fabric Port State Dashboard

+

Grafana Fabric Port State Dashboard

+

There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.

+

The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.

+

The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.

+

Grafana Fabric RFC3635 Dashboard

+

Grafana Fabric RFC3635 Dashboard

+

For more information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.

+

Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/dashboards/sat_kibana_dashboards/index.html b/en-25/dashboards/sat_kibana_dashboards/index.html new file mode 100644 index 0000000000..2402c2d0b4 --- /dev/null +++ b/en-25/dashboards/sat_kibana_dashboards/index.html @@ -0,0 +1,1424 @@ + + + + + + + + + + + + SAT Kibana Dashboards :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Kibana Dashboards

+

Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.

+

Kibana can be accessed via web browser at the following URL:

+
    +
  • https://sma-kibana.cmn.<site-domain>
  • +
+

The value of site-domain can be obtained as follows:

+
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+    base64 -d | grep "external:"
+

That command will produce the following output, for example:

+
    external: EXAMPLE_DOMAIN.com
+

This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com

+

For more information on accessing the Kibana Dashboards, refer to View Logs Via Kibana in the SMA product +documentation.

+

Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and RAS Daemon Kibana Dashboards are included in this +table.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DashboardShort DescriptionLong DescriptionKibana Visualization and Search Name
sat-aerAER correctedCorrected Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-corrected Search: sat-aer-corrected
sat-aerAER fatalFatal Advanced Error Reporting messages from PCI Express devices on each node.Visualization: aer-fatal Search: sat-aer-fatal
sat-atomATOM failuresApplication Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged.sat-atom-failed
sat-atomATOM admindownApplication Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch.sat-atom-admindown
sat-heartbeatHeartbeat loss eventsHeartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system.sat-heartbeat
sat-kernelKernel assertionsThe kernel software performs a failed assertion when some condition represents a serious fault. The node goes down.sat-kassertions
sat-kernelKernel panicsThe kernel panics when something is seriously wrong. The node goes down.sat-kernel-panic
sat-kernelLustre bugs (LBUGs)The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down.sat-lbug
sat-kernelCPU stallsCPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric.sat-cpu-stall
sat-kernelOut of memoryAn Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided.sat-oom
sat-mceMCEMachine Check Exceptions (MCE) are errors detected at the processor level.sat-mce
sat-rasdaemonrasdaemon errorsErrors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the Linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future.sat-rasdaemon-error
sat-rasdaemonrasdaemon messagesAll messages from the rasdaemon service on nodes.sat-rasdaemon
+

Disable Search Highlighting in Kibana Dashboard

+

By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.

+

The Kibana Dashboard should be open on your system.

+
    +
  1. +

    Navigate to Management

    +
  2. +
  3. +

    Navigate to Advanced Settings in the Kibana section, below the Elastic search section

    +
  4. +
  5. +

    Scroll down to the Discover section

    +
  6. +
  7. +

    Change Highlight results from on to off

    +
  8. +
  9. +

    Click Save to save changes

    +
  10. +
+

AER Kibana Dashboard

+

The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.

+

View the AER Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-aer dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.

    +
  8. +
+

ATOM Kibana Dashboard

+

The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health +checks and application test failures. Some test failures are of possible interest even though a node is not marked +admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide +clues if a node otherwise fails. They might also show application problems.

+

View the ATOM Kibana Dashboard

+

HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-atom dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View any nodes marked admindown and any ATOM test failures. These failures occur during health checks and +application test failures. Test failures marked admindown are important to note. View the matching log messages +in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, +results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

Heartbeat Kibana Dashboard

+

The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd +pods are responsible for monitoring nodes in the system for heartbeat loss.

+

View the Heartbeat Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-heartbeat dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible +for monitoring nodes in the system for heartbeat loss. View the matching log messages in the panel.

    +
  8. +
+

Kernel Kibana Dashboard

+

The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.

+

View the Kernel Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-kernel dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.

    +
  8. +
+

MCE Kibana Dashboard

+

The MCE Dashboard displays CPU detected processor-level hardware errors.

+

View the MCE Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-mce dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.

    +
  8. +
+

RAS Daemon Kibana Dashboard

+

The RAS Daemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon +service on nodes in the system. This service collects all hardware error events reported by the Linux kernel, including +PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages +presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one +for only messages of severity emerg or err and another for all messages from rasdaemon.

+

View the RAS Daemon Kibana Dashboard

+
    +
  1. +

    Go to the dashboard section.

    +
  2. +
  3. +

    Select sat-rasdaemon dashboard.

    +
  4. +
  5. +

    Choose the time range of interest.

    +
  6. +
  7. +

    View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.

    +
  8. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/external_system/index.html b/en-25/external_system/index.html new file mode 100644 index 0000000000..acd3e12d72 --- /dev/null +++ b/en-25/external_system/index.html @@ -0,0 +1,1311 @@ + + + + + + + + + + + + SAT on an External System :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT on an External System

+

SAT can optionally be installed and configured on an external system to interact +with CSM over the CAN.

+

Limitations

+

Most SAT subcommands work by accessing APIs which are reachable via the CAN. +However, certain SAT commands depend on host-based functionality on the +management NCNs and will not work from an external system. This includes the +following:

+
    +
  • The platform-services and ncn-power stages of sat bootsys
  • +
  • The local host information displayed by the --local option of sat showrev
  • +
+

Installing SAT on an external system is not an officially supported configuration. +These instructions are provided “as-is” with the hope that they can useful for +users who desire additional flexibility.

+

Certain additional steps may need to be taken to install and configure SAT +depending on the configuration of the external system in use. These additional +steps may include provisioning virtual machines, installing packages, or +configuring TLS certificates, and these steps are outside the scope of this +documentation. This section covers only the steps needed to configure SAT to +use externally-accessible API endpoints exposed by CSM.

+

Install and Configure SAT

+

Prerequisites

+
    +
  • The external system must be on the Customer Access Network (CAN).
  • +
  • Python 3.7 or newer is installed on the system.
  • +
  • kubectl, openssh, git, and curl are installed on the external system.
  • +
  • The root CA certificates used when installing CSM have been added to the +external system’s trust store such that authenticated TLS connections can be +made to the CSM REST API gateway. For more information, refer to Certificate +Authority in the Cray System Management Documentation.
  • +
+

Procedure

+
    +
  1. +

    Create a Python virtual environment.

    +
    $ SAT_VENV_PATH="$(pwd)/venv"
    +$ python3 -m venv ${SAT_VENV_PATH}
    +$ . ${SAT_VENV_PATH}/bin/activate
    +
  2. +
  3. +

    Clone the SAT source code.

    +

    To use SAT version 3.21, this example clones the release/3.21 branch of +Cray-HPE/sat.

    +
    (venv) $ git clone --branch=release/3.21 https://github.com/Cray-HPE/sat.git
    +
  4. +
  5. +

    Set up the SAT CSM Python dependencies to be installed from their source code.

    +

    SAT CSM Python dependency packages are not currently distributed publicly as +source packages or binary distributions. They must be installed from +their source code hosted on GitHub. Also, to install the cray-product-catalog +Python package, you must first clone it locally. Use the following steps to +modify the SAT CSM Python dependencies so they can be installed from their source +code.

    +
      +
    1. +

      Clone the source code for cray-product-catalog.

      +
      (venv) $ git clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog
      +
    2. +
    3. +

      In the cray-product-catalog directory, create a file named .version +that contains the version of cray-product-catalog.

      +
      (venv) $ echo 1.6.0 > cray-product-catalog/.version
      +
    4. +
    5. +

      Open the “locked” requirements file in a text editor.

      +
      (venv) $ vim sat/requirements.lock.txt
      +
    6. +
    7. +

      Update the line containing cray-product-catalog so that it reflects the +local path to cray-product-catalog.

      +

      It should read as follows.

      +
      ./cray-product-catalog
      +
    8. +
    9. +

      For versions of SAT newer than 3.19, change the line containing csm-api-client +to read as follows.

      +
      csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
      +
    10. +
    11. +

      (Optional) Confirm that requirements.lock.txt is modified as expected.

      +

      Note: For versions newer than 3.19, you will see both cray-product-catalog +and csm-api-client. For version 3.19 and older, you will only see +cray-product-catalog.

      +
      (venv) $ grep -E 'cray-product-catalog|csm-api-client' sat/requirements.lock.txt
      +./cray-product-catalog
      +csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
      +
    12. +
    +
  6. +
  7. +

    Install the modified SAT dependencies.

    +
    (venv) $ pip install -r sat/requirements.lock.txt
    +...
    +
  8. +
  9. +

    Install the SAT Python package.

    +
    (venv) $ pip install ./sat
    +...
    +
  10. +
  11. +

    (Optional) Add the sat virtual environment to the user’s PATH environment +variable.

    +

    If a shell other than bash is in use, replace ~/.bash_profile with the +appropriate profile path.

    +

    If the virtual environment is not added to the user’s PATH environment +variable, then source ${SAT_VENV_PATH}/bin/activate will need to be run before +running any SAT commands.

    +
    (venv) $ deactivate
    +$ echo export PATH=\"${SAT_VENV_PATH}/bin:${PATH}\" >> ~/.bash_profile
    +$ source ~/.bash_profile
    +
  12. +
  13. +

    Copy the file /etc/kubernetes/admin.conf from ncn-m001 to ~/.kube/config +on the external system.

    +

    Note that this file contains credentials to authenticate against the Kubernetes +API as the administrative user, so it should be treated as sensitive.

    +
    $ mkdir -p ~/.kube
    +$ scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config
    +admin.conf                                       100% 5566   3.0MB/s   00:00
    +
  14. +
  15. +

    Add a new entry for the hostname kubernetes to the external system’s +/etc/hosts file.

    +

    The kubernetes hostname should correspond to the CAN IP address on ncn-m001. +On CSM 1.2, this can be determined by querying the IP address of the bond0.cmn0 +interface.

    +
    $ ssh ncn-m001 ip addr show bond0.cmn0
    +13: bond0.cmn0@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    +link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
    +inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007
    +   valid_lft forever preferred_lft forever
    +inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
    +   valid_lft forever preferred_lft forever
    +$ IP_ADDRESS=10.102.1.11
    +

    On CSM versions prior to 1.2, the CAN IP can be determined by querying the +IP address of the vlan007 interface.

    +
    $ ssh ncn-m001 ip addr show vlan007
    +13: vlan007@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    +link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
    +inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007
    +   valid_lft forever preferred_lft forever
    +inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
    +   valid_lft forever preferred_lft forever
    +$ IP_ADDRESS=10.102.1.10
    +

    Once the IP address is determined, add an entry to /etc/hosts mapping the +IP address to the hostname kubernetes.

    +
    $ echo "${IP_ADDRESS} kubernetes" | sudo tee -a /etc/hosts
    +10.102.1.11 kubernetes
    +
  16. +
  17. +

    Modify ~/.kube/config to set the cluster server address.

    +

    The value of the server key for the kubernetes cluster under the clusters +section should be set to https://kubernetes:6443.

    +
    ---
    +clusters:
    +- cluster:
    +    certificate-authority-data: REDACTED
    +    server: https://kubernetes:6443
    +  name: kubernetes
    +...
    +
  18. +
  19. +

    Confirm that kubectl can access the CSM Kubernetes cluster.

    +
    $ kubectl get nodes
    +NAME       STATUS   ROLES    AGE    VERSION
    +ncn-m001   Ready    master   135d   v1.19.9
    +ncn-m002   Ready    master   136d   v1.19.9
    +ncn-m003   Ready    master   136d   v1.19.9
    +ncn-w001   Ready    <none>   136d   v1.19.9
    +ncn-w002   Ready    <none>   136d   v1.19.9
    +ncn-w003   Ready    <none>   136d   v1.19.9
    +
  20. +
  21. +

    Use sat init to create a configuration file for SAT.

    +
    $ sat init
    +INFO: Configuration file "/home/user/.config/sat/sat.toml" generated.
    +
  22. +
  23. +

    Copy the platform CA certificates from the management NCN and configure the +certificates for use with SAT.

    +

    If a shell other than bash is in use, replace ~/.bash_profile with the +appropriate profile path.

    +
    $ scp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt .
    +$ echo export REQUESTS_CA_BUNDLE=\"$(realpath platform-ca-certs.crt)\" >> ~/.bash_profile
    +$ source ~/.bash_profile
    +
  24. +
  25. +

    Edit the SAT configuration file to set the API and S3 hostnames.

    +

    Externally available API endpoints are given domain names in PowerDNS, so the +endpoints in the configuration file should each be set to +subdomain.system-name.site-domain, where system-name and site-domain are +replaced with the values specified during csi config init, and subdomain +is the DNS name for the externally available service. For more information, +refer to Externally Exposed Services in the Cray System Management +Documentation.

    +

    The API gateway has the subdomain api, and S3 has the subdomain s3. The +S3 endpoint runs on port 8080. The following options should be set in the +SAT configuration file.

    +
    [api_gateway]
    +host = "api.system-name.site-domain"
    +
    +[s3]
    +endpoint = "http://s3.system-name.site-domain:8080"
    +
  26. +
  27. +

    Edit the SAT configuration file to specify the Keycloak user which will be +accessing the REST API.

    +
    [api_gateway]
    +username = "user"
    +
  28. +
  29. +

    Run sat auth. Enter your password when prompted.

    +

    The admin account used to authenticate with sat auth must be enabled in +Keycloak and must have its assigned role set to admin. For more +information on editing Role Mappings, see Create Internal User Accounts +in the Keycloak Shasta Realm in the +Cray System Management Documentation. +For more information on authentication types and authentication credentials, +see SAT Command Authentication.

    +
    $ sat auth
    +Password for user:
    +Succeeded!
    +
  30. +
  31. +

    Ensure the files are readable only by the current user.

    +
    $ touch ~/.config/sat/s3_access_key \
    +    ~/.config/sat/s3_secret_key
    +
    $ chmod 600 ~/.config/sat/s3_access_key \
    +    ~/.config/sat/s3_secret_key
    +
  32. +
  33. +

    Write the credentials to local files using kubectl.

    +

    Generate S3 credentials and write them to a local file so the SAT user can +access S3 storage. In order to use the SAT S3 bucket, the user must generate +the S3 access key and secret keys and write them to a local file. SAT uses +S3 storage for several purposes, most importantly to store the site-specific +information set with sat setrev.

    +
    $ kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.access_key}' | base64 -d > \
    +    ~/.config/sat/s3_access_key
    +
    $ kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.secret_key}' | base64 -d > \
    +    ~/.config/sat/s3_secret_key
    +
  34. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/img/Fabric_PortState_Locations_UI.png b/en-25/img/Fabric_PortState_Locations_UI.png new file mode 100644 index 0000000000..704511ebce Binary files /dev/null and b/en-25/img/Fabric_PortState_Locations_UI.png differ diff --git a/en-25/img/Grafana_Fabric_Congestion.png b/en-25/img/Grafana_Fabric_Congestion.png new file mode 100644 index 0000000000..dbf481d94c Binary files /dev/null and b/en-25/img/Grafana_Fabric_Congestion.png differ diff --git a/en-25/img/Grafana_HSN_Errors.png b/en-25/img/Grafana_HSN_Errors.png new file mode 100644 index 0000000000..f43b7d02a6 Binary files /dev/null and b/en-25/img/Grafana_HSN_Errors.png differ diff --git a/en-25/img/Grafana_rfc3635.png b/en-25/img/Grafana_rfc3635.png new file mode 100644 index 0000000000..dff176c82d Binary files /dev/null and b/en-25/img/Grafana_rfc3635.png differ diff --git a/en-25/img/SAT_Grafana_Fabric_Vars.png b/en-25/img/SAT_Grafana_Fabric_Vars.png new file mode 100644 index 0000000000..194d75b124 Binary files /dev/null and b/en-25/img/SAT_Grafana_Fabric_Vars.png differ diff --git a/en-25/index.html b/en-25/index.html new file mode 100644 index 0000000000..2fdb51c9d3 --- /dev/null +++ b/en-25/index.html @@ -0,0 +1,1139 @@ + + + + + + + + + + + + HPE Cray EX System Admin Toolkit (SAT) Guide :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ + + + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/index.json b/en-25/index.json new file mode 100644 index 0000000000..277a61fe6d --- /dev/null +++ b/en-25/index.json @@ -0,0 +1,176 @@ +[ +{ + "uri": "/docs-sat/en-25/usage/", + "title": "SAT Usage", + "tags": [], + "description": "", + "content": "SAT Usage SAT Bootprep SAT and IUF Change the BOS Version " +}, +{ + "uri": "/docs-sat/en-25/", + "title": "HPE Cray EX System Admin Toolkit (SAT) Guide", + "tags": [], + "description": "", + "content": "HPE Cray EX System Admin Toolkit (SAT) Guide Introduction to SAT About System Admin Toolkit (SAT) SAT Command Overview SAT Command Authentication Command Prompt Conventions in SAT SAT in CSM SAT Dependencies SAT Installation Install and Upgrade Framework IUF Stage Details for SAT Post-Installation Procedures SAT Upgrade Install and Upgrade Framework IUF Stage Details for SAT Post-Upgrade Procedures SAT Uninstall and Downgrade Uninstall: Remove a Version of SAT Downgrade: Switch Between SAT Versions SAT on an External System Limitations Install and Configure SAT Authenticate SAT Commands Generate SAT S3 Credentials SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards SAT Usage SAT Bootprep SAT and IUF Change the BOS Version SAT Release Notes Changes in SAT Version 2.x SAT Changes in Shasta Version 1.x " +}, +{ + "uri": "/docs-sat/en-25/dashboards/", + "title": "SAT Dashboards", + "tags": [], + "description": "", + "content": "SAT Dashboards SAT Kibana Dashboards SAT Grafana Dashboards " +}, +{ + "uri": "/docs-sat/en-25/install/", + "title": "SAT Installation", + "tags": [], + "description": "", + "content": "SAT Installation Install and Upgrade Framework The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM. IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product.\nThis document does not replicate install, upgrade, or deployment procedures detailed in the Cray System Management Documentation. This document provides details regarding software and configuration content specific to SAT which is needed when installing, upgrading, or deploying a SAT release. The Cray System Management Documentation will indicate when sections of this document should be referred to for detailed information.\nIUF will perform the following tasks for a release of SAT.\nIUF deliver-product stage: Uploads SAT configuration content to VCS Uploads SAT information to the CSM product catalog Uploads SAT content to Nexus repositories IUF update-vcs-config stage: Updates the VCS integration branch with new SAT configuration content if a working branch is specified IUF update-cfs-config stage: Creates a new CFS configuration for management nodes with new SAT configuration content IUF prepare-images stage: Creates updated management NCN and managed node images with new SAT content IUF management-nodes-rollout stage: Boots management NCNs with an image containing new SAT content IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF section of the Cray System Management Documentation describes how to use these tools directly if it is desirable to use them instead of IUF.\nIUF Stage Details for SAT This section describes SAT details that an administrator must be aware of before running IUF stages. Entries are prefixed with Information if no administrative action is required or Action if an administrator needs to perform tasks outside of IUF.\nupdate-vcs-config Information: This stage is only run if a VCS working branch is specified for SAT. By default, SAT does not create or specify a VCS working branch.\nupdate-cfs-config Information: This stage only applies to the management configuration and not to the managed configuration.\nprepare-images Information: This stage only applies to management images and not to managed images.\nPost-Installation Procedures After installing SAT with IUF, you must complete the following SAT configuration procedures before using SAT:\nAuthenticate SAT Commands Generate SAT S3 Credentials Set System Revision Information Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace x.y.z with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. Authenticate SAT Commands To run SAT commands on the manager NCNs, you must first set up authentication to the API gateway. The admin account used to authenticate with sat auth must be enabled in Keycloak and must have its assigned role set to admin. For more information on editing Role Mappings, see Create Internal User Accounts in the Keycloak Shasta Realm in the Cray System Management Documentation. For more information on authentication types and authentication credentials, see SAT Command Authentication.\nPrerequisites The sat CLI has been installed following the IUF section of the Cray System Management Documentation. Procedure The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway.\nGenerate a default SAT configuration file, if one does not exist.\nncn-m001# sat init Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; generated. Note: If the config file already exists, it will print out the following error.\nERROR: Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; already exists. Not generating configuration file. Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the config file.\nusername = \u0026#34;crayadmin\u0026#34; Run sat auth. Enter your password when prompted.\nncn-m001# sat auth Password for crayadmin: Succeeded! Other sat commands are now authenticated to make requests to the API gateway.\nncn-m001# sat status Generate SAT S3 Credentials Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes master node where SAT commands are run.\nSAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev (see Set System Revision Information).\nPrerequisites The SAT CLI has been installed following the IUF section of the Cray System Management Documentation. The SAT configuration file has been created (See Authenticate SAT Commands). CSM has been installed and verified. Procedure Ensure the files are readable only by root.\nncn-m001# touch /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key ncn-m001# chmod 600 /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key Write the credentials to local files using kubectl.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.access_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_access_key ncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.secret_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_secret_key Verify the S3 endpoint specified in the SAT configuration file is correct.\nGet the SAT configuration file\u0026rsquo;s endpoint value.\nNote: If the command\u0026rsquo;s output is commented out, indicated by an initial # character, the SAT configuration will take the default value – \u0026quot;https://rgw-vip.nmn\u0026quot;.\nncn-m001# grep endpoint ~/.config/sat/sat.toml # endpoint = \u0026#34;https://rgw-vip.nmn\u0026#34; Get the sat-s3-credentials secret\u0026rsquo;s endpoint value.\nncn-m001# kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.s3_endpoint}\u0026#39; | base64 -d | xargs https://rgw-vip.nmn Compare the two endpoint values.\nIf the values differ, change the SAT configuration file\u0026rsquo;s endpoint value to match the secret\u0026rsquo;s.\nCopy SAT configurations to each manager node on the system.\nncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \\ mkdir -p /root/.config/sat; \\ scp -pr /root/.config/sat ${i}:/root/.config; done Note: Depending on how many manager nodes are on the system, the list of manager nodes may be different. This example assumes three manager nodes, where the configuration files must be copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 and ncn-m003.\nSet System Revision Information HPE service representatives use system revision information data to identify systems in support cases.\nPrerequisites SAT authentication has been set up. See Authenticate SAT Commands. S3 credentials have been generated. See Generate SAT S3 Credentials. Procedure Set System Revision Information.\nRun sat setrev and follow the prompts to set the following site-specific values:\nSerial number System name System type System description Product number Company name Site name Country code System install date Tip: For \u0026ldquo;System type\u0026rdquo;, a system with any liquid-cooled components should be considered a liquid-cooled system. In other words, \u0026ldquo;System type\u0026rdquo; is EX-1C.\nncn-m001# sat setrev -------------------------------------------------------------------------------- Setting: Serial number Purpose: System identification. This will affect how snapshots are identified in the HPE backend services. Description: This is the top-level serial number which uniquely identifies the system. It can be requested from an HPE representative. Valid values: Alpha-numeric string, 4 - 20 characters. Type: \u0026lt;class \u0026#39;str\u0026#39;\u0026gt; Default: None Current value: None -------------------------------------------------------------------------------- Please do one of the following to set the value of the above setting: - Input a new value - Press CTRL-C to exit ... Verify System Revision Information.\nRun sat showrev and verify the output shown in the \u0026ldquo;System Revision Information table.\u0026rdquo;\nThe following example shows sample table output.\nncn-m001# sat showrev ################################################################################ System Revision Information ################################################################################ +---------------------+---------------+ | component | data | +---------------------+---------------+ | Company name | HPE | | Country code | US | | Interconnect | Sling | | Product number | R4K98A | | Serial number | 12345 | | Site name | HPE | | Slurm version | slurm 20.02.5 | | System description | Test System | | System install date | 2021-01-29 | | System name | eniac | | System type | EX-1C | +---------------------+---------------+ ################################################################################ Product Revision Information ################################################################################ +--------------+-----------------+------------------------------+------------------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+------------------------------+------------------------------+ | csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... | | sat | 2.0.1 | - | - | | sdu | 1.0.8 | - | - | | slingshot | 0.8.0 | - | - | | sma | 1.4.12 | - | - | +--------------+-----------------+------------------------------+------------------------------+ ################################################################################ Local Host Operating System ################################################################################ +-----------+----------------------+ | component | version | +-----------+----------------------+ | Kernel | 5.3.18-24.15-default | | SLES | SLES 15-SP2 | +-----------+----------------------+ " +}, +{ + "uri": "/docs-sat/en-25/release_notes/", + "title": "SAT Release Notes", + "tags": [], + "description": "", + "content": "SAT Release Notes Changes in SAT Version 2.x Changes in SAT 2.5 Changes in SAT 2.4 Changes in SAT 2.3 Changes in SAT 2.2 SAT Changes in Shasta Version 1.x SAT Changes in Shasta v1.5 SAT Changes in Shasta v1.4.1 SAT Changes in Shasta v1.4 SAT Changes in Shasta v1.3.2 SAT Changes in Shasta v1.3 " +}, +{ + "uri": "/docs-sat/en-25/usage/change_bos_version/", + "title": "Change the BOS Version", + "tags": [], + "description": "", + "content": "Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version two (v2). You can select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command.\nYou can also configure the BOS version to use in the SAT config file. Do this under the api_version setting in the bos section of the config file. If the system is using an existing SAT config file from an older version of SAT, the bos section might not exist. In that case, add the bos section with the BOS version desired in the api_version setting.\nFind the SAT config file at ~/.config/sat/sat.toml, and look for a section like this:\n[bos] api_version = \u0026#34;v2\u0026#34; In this example, SAT is using BOS version \u0026quot;v2\u0026quot;.\nChange the line specifying the api_version to the BOS version desired (for example, \u0026quot;v1\u0026quot;).\n[bos] api_version = \u0026#34;v1\u0026#34; If applicable, uncomment the api_version line.\nIf the system is using an existing SAT config file from a recent version of SAT, the api_version line might be commented out like this:\n[bos] # api_version = \u0026#34;v2\u0026#34; If the line is commented out, SAT will still use the default BOS version. To ensure a different BOS version is used, uncomment the api_version line by removing # at the beginning of the line.\n" +}, +{ + "uri": "/docs-sat/en-25/release_notes/sat_2.2_release_notes/", + "title": "Changes in SAT 2.2", + "tags": [], + "description": "", + "content": "Changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022.\nThis version of the SAT product included:\nVersion 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components:\nVersion 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release.\nKnown Issues in SAT 2.2 sat Command Unavailable in sat bash Shell After launching a shell within the SAT container with sat bash, the sat command will not be found. For example:\n(CONTAINER-ID) sat-container:~ # sat status bash: sat: command not found This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be prepended to the $PATH environment variable:\n(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH (CONTAINER-ID) sat-container:~ # sat status Or, the file /sat/venv/bin/activate may be sourced:\n(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate (CONTAINER-ID) sat-container:~ # sat status Tab Completion Unavailable in sat bash Shell After launching a shell within the SAT container with sat bash, tab completion for sat commands does not work.\nThis can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash:\nsource /etc/bash_completion.d/sat-completion.bash OCI Runtime Permission Error when Running sat in Root Directory sat commands will not work if the current directory is /. For example:\nncn-m001:/ # sat --help Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error To resolve, run sat in another directory.\nDuplicate Mount Error when Running sat in Config Directory sat commands will not work if the current directory is ~/.config/sat. For example:\nncn-m001:~/.config/sat # sat --help Error: /root/.config/sat: duplicate mount destination To resolve, run sat in another directory.\nNew sat Commands sat bootprep automates the creation of CFS configurations, the build and customization of IMS images, and the creation of BOS session templates. For more information, see SAT Bootprep. sat slscheck performs a check for consistency between the System Layout Service (SLS) and the Hardware State Manager (HSM). sat bmccreds provides a simple interface for interacting with the System Configuration Service (SCSD) to set BMC Redfish credentials. sat hwhist displays hardware component history by XName (location) or by its Field-Replaceable Unit ID (FRUID). This command queries the Hardware State Manager (HSM) API to obtain this information. Since the sat hwhist command supports querying for the history of a component by its FRUID, the FRUID of components has been added to the output of sat hwinv. Additional Install Automation The following automation has been added to the install script, install.sh:\nWait for the completion of the sat-config-import Kubernetes job, which is started when the sat-cfs-install Helm chart is deployed. Automate the modification of the CFS configuration, which applies to master management NCNs (for example, ncn-personalization). Changes to Product Catalog Data Schema The SAT product uploads additional information to the cray-product-catalog Kubernetes ConfigMap detailing the components it provides, including container (Docker) images, Helm charts, RPMs, and package repositories.\nThis information is used to support uninstall and downgrade of SAT product versions moving forward.\nSupport for Uninstall and Downgrade of SAT Versions Beginning with the 2.2 release, SAT now provides partial support for the uninstall and downgrade of the SAT product stream.\nFor more information, see Uninstall: Remove a Version of SAT and Downgrade: Switch Between SAT Versions.\nImprovements to sat status A Subrole column has been added to the output of sat status. This allows you to easily differentiate between master, worker, and storage nodes in the management role, for example.\nHostname information from SLS has been added to sat status output.\nAdded Support for JSON Output Support for JSON-formatted output has been added to commands which currently support the --format option, such as hwinv, status, and showrev.\nUsability Improvements Many usability improvements have been made to multiple sat commands, mostly related to filtering command output. The following are some highlights:\nAdded --fields option to display only specific fields for subcommands which display tabular reports. Added ability to filter on exact matches of a field name. Improved handling of multiple matches of a field name in --filter queries so that the first match is used, similar to --sort-by. Added support for --filter, --fields, and --reverse for summaries displayed by sat hwinv. Added borders to summary tables generated by sat hwinv. Improved documentation in the man pages. Default Log Level Changed The default log level for stderr has been changed from \u0026ldquo;WARNING\u0026rdquo; to \u0026ldquo;INFO\u0026rdquo;. For more information, see Update SAT Logging.\nMore Granular Log Level Configuration Options With the command-line options --loglevel-stderr and --loglevel-file, the log level can now be configured separately for stderr and the log file.\nThe existing --loglevel option is now an alias for the --loglevel-stderr option.\nPodman Wrapper Script Improvements The Podman wrapper script is the script installed at /usr/bin/sat on the master management NCNs by the cray-sat-podman RPM that runs the cray-sat container in podman. The following subsections detail improvements that were made to the wrapper script in this release.\nMounting of $HOME and Current Directories in cray-sat Container The Podman wrapper script that launches the cray-sat container with podman has been modified to mount the user\u0026rsquo;s current directory and home directory into the cray-sat container to provide access to local files in the container.\nPodman Wrapper Script Documentation Improvements The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:\nEnvironment variables that affect execution of the wrapper script Host files and directories mounted in the container Fixes to Podman Wrapper Script Output Redirection Fixed issues with redirecting stdout and stderr, and piping output to commands, such as awk, less, and more.\nConfigurable HTTP Timeout A new sat option has been added to configure the HTTP timeout length for requests to the API gateway. For more information, refer to sat-man sat.\nsat bootsys Improvements Many improvements and fixes have been made to sat bootsys. The following are some highlights:\nAdded the --excluded-ncns option, which can be used to omit NCNs from the platform-services and ncn-power stages in case they are inaccessible. Disruptive shutdown stages in sat bootsys shutdown now prompt the user to continue before proceeding. A new option, --disruptive, will bypass this. Improvements to Ceph service health checks and restart during the platform-services stage of sat bootsys boot. sat xname2nid Improvements sat xname2nid can now recursively expand slot, chassis, and cabinet XNames to a list of NIDs in those locations.\nA new --format option has been added to sat xname2nid. It sets the output format to either \u0026ldquo;range\u0026rdquo; (the default) or \u0026ldquo;NID\u0026rdquo;. The \u0026ldquo;range\u0026rdquo; format displays NIDs in a compressed range format suitable for use with a workload manager like Slurm.\nUsage of v2 HSM API The commands which interact with HSM (for example, sat status and sat hwinv) now use the v2 HSM API.\nsat diag Limited to HSN Switches sat diag will now only operate against HSN switches by default. These are the only controllers that support running diagnostics with HMJTD.\nsat showrev Enhancements A column has been added to the output of sat showrev that indicates whether a product version is \u0026ldquo;active\u0026rdquo;. The definition of \u0026ldquo;active\u0026rdquo; varies across products, and not all products may set an \u0026ldquo;active\u0026rdquo; version.\nFor SAT, the active version is the one with its hosted-type package repository in Nexus set as the member of the group-type package repository in Nexus, meaning that it will be used when installing the cray-sat-podman RPM.\ncray-sat Container Image Size Reduction The size of the cray-sat container image has been approximately cut in half by leveraging multi-stage builds. This also improved the repeatability of the unit tests by running them in the container.\nBug Fixes Minor bug fixes were made in cray-sat and in cray-sat-podman. For full change lists, refer to each repository\u0026rsquo;s CHANGELOG.md file.\n" +}, +{ + "uri": "/docs-sat/en-25/introduction/", + "title": "Introduction to SAT", + "tags": [], + "description": "", + "content": "Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components.\nSAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview.\nSix Kibana Dashboards are included with SAT. They provide organized output for system health information.\nAER Kibana Dashboard ATOM Kibana Dashboard Heartbeat Kibana Dashboard Kernel Kibana Dashboard MCE Kibana Dashboard RAS Daemon Kibana Dashboard Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and are reported through Redfish.\nGrafana Fabric Congestion Dashboard Grafana Fabric Errors Dashboard Grafana Fabric Port State Dashboard Grafana Fabric RFC3635 Dashboard In CSM 1.3 and newer, the sat command is automatically available on all the Kubernetes NCNs. For more information, see SAT in CSM. Older versions of CSM do not have the sat command automatically available, and SAT must be installed as a separate product.\nSAT Command Overview Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides instruction on the SAT Container Environment.\nSAT Command Line Utility The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes (ncn-m nodes).\nIt is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are similarities between SAT commands and xt commands used on the Cray XC platform.\nSAT Commands The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each have their own set of options.\nSAT Container Environment The sat command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on Kubernetes manager nodes. A few important points about the SAT container environment include the following:\nUsing either sat or sat bash always launches a container. The SAT container does not have access to the NCN file system. There are two ways to run sat.\nInteractive: Launching a container using sat bash, followed by a sat command. Non-interactive: Running a sat command directly on a Kubernetes manager node. In both of these cases, a container is launched in the background to execute the command. The first option, running sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the container is launched, executes the command, and upon the command\u0026rsquo;s completion the container exits. The following two examples show the same action, checking the system status, using interactive and non-interactive modes.\nInteractive ncn-m001# sat bash (CONTAINER-ID)sat-container# sat status Non-interactive ncn-m001# sat status Interactive Advantages Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral container storage. If multiple sat commands are being run in succession, then use sat bash to launch the container beforehand. This will save time because the container does not need to be launched for each sat command.\nNon-interactive Advantages The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of several steps that need to be executed from a management NCN.\nMan Pages - Interactive and Non-interactive Modes To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following example.\nncn-m001# sat-man status A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed either with man sat or man sat-podman from the manager node.\nncn-m001# man sat ncn-m001# man sat-podman SAT Command Authentication Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to be configured. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes manager node where SAT commands are run.\nFor more information on authentication requests, see System Security and Authentication in the Cray System Management Documentation. The following is a table describing SAT commands and the types of authentication they require.\nSAT Subcommand Authentication/Credentials Required Man Page Description sat auth Responsible for authenticating to the API gateway and storing a token. sat-auth Authenticate to the API gateway and save the token. sat bmccreds Requires authentication to the API gateway. sat-bmccreds Set BMC passwords. sat bootprep Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install. sat-bootprep Prepare to boot nodes with images and configurations. sat bootsys Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. sat-bootsys Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. sat diag Requires authentication to the API gateway. sat-diag Launch diagnostics on the HSN switches and generate a report. sat firmware Requires authentication to the API gateway. sat-firmware Report firmware version. sat hwhist Requires authentication to the API gateway. sat-hwhist Report hardware component history. sat hwinv Requires authentication to the API gateway. sat-hwinv Give a listing of the hardware of the HPE Cray EX system. sat hwmatch Requires authentication to the API gateway. sat-hwmatch Report hardware mismatches. sat init None sat-init Create a default SAT configuration file. sat jobstat Requires authentication to the API gateway. sat-jobstat Check the status of jobs and applications. sat k8s Requires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install. sat-k8s Report on Kubernetes replica sets that have co-located (on the same node) replicas. sat linkhealth This command has been deprecated. sat nid2xname Requires authentication to the API gateway. sat-nid2xname Translate node IDs to node XNames. sat sensors Requires authentication to the API gateway. sat-sensors Report current sensor data. sat setrev Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-setrev Set HPE Cray EX system revision information. sat showrev Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-showrev Print revision information for the HPE Cray EX system. sat slscheck Requires authentication to the API gateway. sat-slscheck Perform a cross-check between SLS and HSM. sat status Requires authentication to the API gateway. sat-status Report node status across the HPE Cray EX system. sat swap Requires authentication to the API gateway. sat-swap Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. sat xname2nid Requires authentication to the API gateway. sat-xname2nid Translate node and node BMC XNames to node IDs. sat switch This command has been deprecated. It has been replaced by sat swap. In order to authenticate to the API gateway, you must run the sat auth command. This command will prompt for a password on the command line. The username value is obtained from the following locations, in order of higher precedence to lower precedence:\nThe --username global command-line option. The username option in the api_gateway section of the config file at ~/.config/sat/sat.toml. The name of currently logged in user running the sat command. If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to ~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above and will use the token for that username if it has been obtained and saved by sat auth.\nCommand Prompt Conventions in SAT The host name in a command prompt indicates where the command must be run. The account that must run the command is also indicated in the prompt.\nThe root or super-user account always has the # character at the end of the prompt and has the host name of the host in the prompt. Any non-root account is indicated with account@hostname\u0026gt;. A user account that is neither root nor crayadm is referred to as user. The command prompt inside the SAT container environment is indicated with the string as follows. It also has the \u0026ldquo;#\u0026rdquo; character at the end of the prompt. Command Prompt Meaning ncn-m001# Run on one of the Kubernetes Manager servers. (Non-interactive) (CONTAINER_ID) sat-container# Run the command inside the SAT container environment by first running sat bash. (Interactive) Here are examples of the sat status command used by an administrator.\nncn-m001# sat status ncn-m001# sat bash (CONTAINER_ID) sat-container# sat status SAT in CSM In CSM 1.3 and newer, the sat command is automatically available on all the Kubernetes NCNs, but it is still possible to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the sat command available in CSM. Installing the SAT product stream allows additional supporting components to be added:\nAn entry for SAT in the cray-product-catalog Kubernetes ConfigMap is only created by installing the SAT product stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev.\nThe sat-install-utility container image is only available with the full SAT product stream. This container image provides uninstall and downgrade functionality when used with the prodmgr command. (In SAT 2.3 and older, SAT was only available to install as a separate product stream. Because these versions were packaged with sat-install-utility, it is still possible to uninstall these versions of SAT.)\nThe docs-sat RPM package is only available with the full SAT product stream.\nThe sat-config-management git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is only available with the full SAT product stream.\nIf the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS configurations that apply to management NCNs (for example, management-23.5.0) should not include a SAT layer.\nThe SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the Keycloak username that authenticates to the API gateway cannot be read by users other than root. Specifically, it it does the following:\nModifies the sat.toml configuration file which contains the username so that it is only readable by root.\nModifies the /root/.config/sat/tokens directory so that the directory is only readable by root. This is needed because the names of the files within the tokens directory contain the username.\nRegardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other users. These permission changes only apply to files created by previous installations of SAT. In the current version of SAT all files and directories are created with the appropriate permissions.\nSAT Dependencies Most sat subcommands depend on services or components from other products in the HPE Cray EX (Shasta) software stack. The following list shows these dependencies for each subcommand. Each service or component is listed under the product it belongs to.\nsat auth CSM Keycloak sat bmccreds CSM System Configuration Service (SCSD) sat bootprep CSM Boot Orchestration Service (BOS) Configuration Framework Service (CFS) Image Management Service (IMS) Version Control Service (VCS) Kubernetes S3 sat bootsys CSM Boot Orchestration Service (BOS) Cray Advanced Platform Monitoring and Control (CAPMC) Ceph Compute Rolling Upgrade Service (CRUS) Etcd Firmware Action Service (FAS) Hardware State Manager (HSM) Kubernetes S3 COS Node Memory Dump (NMD) sat diag CSM Hardware State Manager (HSM) CSM-Diags Fox sat firmware CSM Firmware Action Service (FAS) sat hwhist CSM Hardware State Manager (HSM) sat hwinv CSM Hardware State Manager (HSM) sat hwmatch CSM Hardware State Manager (HSM) sat init None\nsat jobstat PBS HPE State Checker sat k8s CSM Kubernetes sat nid2xname CSM Hardware State Manager (HSM) sat sensors CSM Hardware State Manager (HSM) HM Collector SMA Telemetry API sat setrev CSM S3 sat showrev CSM Hardware State Manager (HSM) Kubernetes S3 sat slscheck CSM Hardware State Manager (HSM) Kubernetes S3 sat status CSM Hardware State Manager (HSM) sat swap Slingshot Fabric Manager sat switch Deprecated: See sat swap\nsat xname2nid CSM Hardware State Manager (HSM) " +}, +{ + "uri": "/docs-sat/en-25/dashboards/sat_grafana_dashboards/", + "title": "SAT Grafana Dashboards", + "tags": [], + "description": "", + "content": "SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity.\nGrafana can be accessed via web browser at the following URL:\nhttps://sma-grafana.cmn.\u0026lt;site-domain\u0026gt; The value of site-domain can be obtained as follows:\nncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=\u0026#39;{.data.customizations\\.yaml}\u0026#39; | \\ base64 -d | grep \u0026#34;external:\u0026#34; That command will produce the following output, for example:\nexternal: EXAMPLE_DOMAIN.com This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com\nFor more information on accessing the Grafana Dashboards, refer to Access the Grafana Monitoring UI in the SMA product documentation.\nFor more information on the interpretation of metrics for the SAT Grafana Dashboards, refer to \u0026ldquo;Fabric Telemetry Kafka Topics\u0026rdquo; in the SMA product documentation.\nNavigate SAT Grafana Dashboards There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display telemetry in a tabular format.\nDashboard Name Display Type Fabric Congestion Chart Panels Fabric RFC3635 Chart Panels Fabric Errors Tabular Format Fabric Port State Tabular Format The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location during the time range selected, if any. The interval setting is not used for tabular dashboards.\nSAT Grafana Interval and Locations Options Shows the Interval and Locations Options for the available telemetry.\nThe value of the Interval option sets the time resolution of the received telemetry. This works a bit like a histogram, with the available telemetry in an interval of time going into a \u0026ldquo;bucket\u0026rdquo; and averaging out to a single point on the chart or table. The special value auto will choose an interval based on the time range selected.\nFor more information, refer to Grafana Templates and Variables.\nThe Locations option allows restriction of the telemetry shown by locations, either individual links or all links in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, which always has entries for all links and switches, although the errors shown are restricted to the selected time range.\nThe chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart\u0026rsquo;s legend or the trace on the chart.\nGrafana Fabric Congestion Dashboard SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in the system and assess the past and present health of the high-speed network. It also allows the ability to drill down to view data for specific ports on specific switches.\nThis dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, local, and global and correspond to the link\u0026rsquo;s relationship to the network topology. The locations presented in the panels are restricted to the values (any combination, defaults to \u0026ldquo;all\u0026rdquo;) selected.\nThe metric values for links of a given port type are similar in value to each other but very distinct from the values of other types. If the values for different port types are all plotted together, the values for links with lower values are indistinguishable from zero when plotted.\nThe port type of a link is reported as a port state \u0026ldquo;subtype\u0026rdquo; event when defined at port initialization.\nGrafana Fabric Errors Dashboard This dashboard reports error counters in a tabular format in three panels.\nThere is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nUnlike other dashboards, the locations presented are all locations in the system rather than having telemetry within the time range selected. However, the values are taken from telemetry within the time range.\nGrafana Fabric Port State Dashboard There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value is presented that displays the most recent value in the time range.\nThe Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours results in all states for all links in the system being shown.\nThe three columns named, group, switch, and port are not port state events, but extra information included with all port state events.\nGrafana Fabric RFC3635 Dashboard For more information on performance counters, refer to Definitions of Managed Objects for the Ethernet-like Interface Types, an Internet standards document.\nBecause these metrics are counters that only increase over time, the values plotted are the change in the counter\u0026rsquo;s value over the interval setting.\n" +}, +{ + "uri": "/docs-sat/en-25/usage/sat_and_iuf/", + "title": "SAT and IUF", + "tags": [], + "description": "", + "content": "SAT and IUF The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM with the help of sat bootprep. Outside of IUF, it is uncommon to use sat bootprep. For more information on IUF, see the IUF section of the Cray System Management Documentation. For more information on sat bootprep, see SAT Bootprep.\nVariable Substitutions Both IUF and sat bootprep allow variable substitutions into the default HPC CSM Software Recipe bootprep input files. The default variables of the HPC CSM Software Recipe are available in a product_vars.yaml file. To override the default variables, specify any site variables in a site_vars.yaml file. Variables are sourced from the command line, any variable files directly provided, and the HPC CSM Software Recipe files used, in that order.\nIUF Session Variables IUF also has special session variables internal to the iuf command that override any matching entries. Session variables are the set of product and version combinations being installed by the current IUF activity, and they are found inside IUF\u0026rsquo;s internal session_vars.yaml file. For more information on IUF and variable substitutions, see the IUF section of the Cray System Management Documentation.\nSAT Variable Limitations When using sat bootprep outside of IUF, you might encounter problems substituting variables into the default bootprep input files. Complex variables like \u0026quot;{{ working_branch }}\u0026quot; cannot be completely resolved outside of IUF and its internal session variables. Thus, the default product_vars.yaml file is unusable with only the sat bootprep command when variables like \u0026quot;{{ working_branch }}\u0026quot; are used. To work around this limitation if you are substituting complex variables, use the internal IUF session_vars.yaml file with sat bootprep and the default bootprep input files.\nFind the session_vars.yaml file from the most recent IUF activity on the system.\nThis process is documented in the upgrade prerequisites procedure of the Cray System Management Documentation. For more information, see steps 1-6 of Stage 0.3 - Option 2.\nUse the session_vars.yaml file to substitute variables into the default bootprep input files.\nncn-m001# sat bootprep run --vars-file session_vars.yaml Limit SAT Bootprep Run into Stages The sat bootprep run command uses information from the bootprep input files to create CFS configurations, IMS images, and BOS session templates. To restrict this creation into separate stages, use the --limit option and list whether you want to create configurations, images, session_templates, or some combination of these. IUF uses the --limit option in this way to install, upgrade, and deploy products on a system in stages. For example, to create only CFS configurations, run the following command used by the IUF update-cfs-config stage:\nncn-m001# sat bootprep run --limit configurations example-bootprep-input-file.yaml INFO: Validating given input file example-bootprep-input-file.yaml INFO: Input file successfully validated against schema INFO: Creating 3 CFS configurations ... INFO: Skipping creation of IMS images based on value of --limit option. INFO: Skipping creation of BOS session templates based on value of --limit option. To create only IMS images and BOS session templates, run the following command used by the IUF prepare-images stage:\nncn-m001# sat bootprep run --limit images --limit session_templates example-bootprep-input-file.yaml INFO: Validating given input file example-bootprep-input-file.yaml INFO: Input file successfully validated against schema INFO: Skipping creation of CFS configurations based on value of --limit option. " +}, +{ + "uri": "/docs-sat/en-25/release_notes/sat_2.3_release_notes/", + "title": "Changes in SAT 2.3", + "tags": [], + "description": "", + "content": "Changes in SAT 2.3 The 2.3.4 version of the SAT product includes:\nVersion 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None.\nCurrent Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share.\nFiles in the current working directory must be specified using relative paths to that directory, because the current working directory is always mounted on /sat/share. Absolute paths should be avoided, and paths that are outside of $HOME or $PWD are never accessible to the container environment.\nThe home directory is still mounted on the same path inside the container as it is on the host.\nChanges to sat bootsys The following options were added to sat bootsys.\n--bos-limit --recursive The --bos-limit option passes a given limit string to a BOS session. The --recursive option specifies a slot or other higher-level component in the limit string.\nChanges to sat bootprep The --delete-ims-jobs option was added to sat bootprep run. It deletes IMS jobs after sat bootprep is run. Jobs are no longer deleted by default.\nChanges to sat status sat status now includes information about nodes\u0026rsquo; CFS configuration statuses, such as desired configuration, configuration status, and error count.\nThe output of sat status now splits different component types into different report tables.\nThe following options were added to sat status.\n--hsm-fields, --sls-fields, --cfs-fields --bos-template The --hsm-fields, --sls-fields, --cfs-fields options limit the output columns according to specified CSM services.\nThe --bos-template option filters the status report according to the specified session template\u0026rsquo;s boot sets.\nCompatibility with CSM 1.2 The following components were modified to be compatible with CSM 1.2.\nsat-cfs-install container image and Helm chart sat-install-utility container image SAT product installer GPG Checking The sat-ncn Ansible role provided by sat-cfs-install was modified to enable GPG checks on packages while leaving GPG checks disabled on repository metadata.\nSecurity Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and refreshed Python dependency versions.\nBug Fixes Minor bug fixes were made in each of the repositories. For full change lists, refer to each repository’s CHANGELOG.md file.\nThe known issues listed under the SAT 2.2 release were fixed.\n" +}, +{ + "uri": "/docs-sat/en-25/dashboards/sat_kibana_dashboards/", + "title": "SAT Kibana Dashboards", + "tags": [], + "description": "", + "content": "SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information.\nKibana can be accessed via web browser at the following URL:\nhttps://sma-kibana.cmn.\u0026lt;site-domain\u0026gt; The value of site-domain can be obtained as follows:\nncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=\u0026#39;{.data.customizations\\.yaml}\u0026#39; | \\ base64 -d | grep \u0026#34;external:\u0026#34; That command will produce the following output, for example:\nexternal: EXAMPLE_DOMAIN.com This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com\nFor more information on accessing the Kibana Dashboards, refer to View Logs Via Kibana in the SMA product documentation.\nAdditional details about the AER, ATOM, Heartbeat, Kernel, MCE, and RAS Daemon Kibana Dashboards are included in this table.\nDashboard Short Description Long Description Kibana Visualization and Search Name sat-aer AER corrected Corrected Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-corrected Search: sat-aer-corrected sat-aer AER fatal Fatal Advanced Error Reporting messages from PCI Express devices on each node. Visualization: aer-fatal Search: sat-aer-fatal sat-atom ATOM failures Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. sat-atom-failed sat-atom ATOM admindown Application Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch. sat-atom-admindown sat-heartbeat Heartbeat loss events Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. sat-heartbeat sat-kernel Kernel assertions The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. sat-kassertions sat-kernel Kernel panics The kernel panics when something is seriously wrong. The node goes down. sat-kernel-panic sat-kernel Lustre bugs (LBUGs) The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. sat-lbug sat-kernel CPU stalls CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. sat-cpu-stall sat-kernel Out of memory An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. sat-oom sat-mce MCE Machine Check Exceptions (MCE) are errors detected at the processor level. sat-mce sat-rasdaemon rasdaemon errors Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the Linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. sat-rasdaemon-error sat-rasdaemon rasdaemon messages All messages from the rasdaemon service on nodes. sat-rasdaemon Disable Search Highlighting in Kibana Dashboard By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.\nThe Kibana Dashboard should be open on your system.\nNavigate to Management\nNavigate to Advanced Settings in the Kibana section, below the Elastic search section\nScroll down to the Discover section\nChange Highlight results from on to off\nClick Save to save changes\nAER Kibana Dashboard The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors are split up into separate visualizations depending on whether they are fatal or corrected errors.\nView the AER Kibana Dashboard Go to the dashboard section.\nSelect sat-aer dashboard.\nChoose the time range of interest.\nView the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nATOM Kibana Dashboard The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health checks and application test failures. Some test failures are of possible interest even though a node is not marked admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide clues if a node otherwise fails. They might also show application problems.\nView the ATOM Kibana Dashboard HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.\nGo to the dashboard section.\nSelect sat-atom dashboard.\nChoose the time range of interest.\nView any nodes marked admindown and any ATOM test failures. These failures occur during health checks and application test failures. Test failures marked admindown are important to note. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nHeartbeat Kibana Dashboard The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss.\nView the Heartbeat Kibana Dashboard Go to the dashboard section.\nSelect sat-heartbeat dashboard.\nChoose the time range of interest.\nView the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for monitoring nodes in the system for heartbeat loss. View the matching log messages in the panel.\nKernel Kibana Dashboard The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using too much memory.\nView the Kernel Kibana Dashboard Go to the dashboard section.\nSelect sat-kernel dashboard.\nChoose the time range of interest.\nView the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nMCE Kibana Dashboard The MCE Dashboard displays CPU detected processor-level hardware errors.\nView the MCE Kibana Dashboard Go to the dashboard section.\nSelect sat-mce dashboard.\nChoose the time range of interest.\nView the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\nRAS Daemon Kibana Dashboard The RAS Daemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. This service collects all hardware error events reported by the Linux kernel, including PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one for only messages of severity emerg or err and another for all messages from rasdaemon.\nView the RAS Daemon Kibana Dashboard Go to the dashboard section.\nSelect sat-rasdaemon dashboard.\nChoose the time range of interest.\nView the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.\n" +}, +{ + "uri": "/docs-sat/en-25/usage/sat_bootprep/", + "title": "SAT Bootprep", + "tags": [], + "description": "", + "content": "SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates. The solution is based on a given input file that defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similar to other SAT commands.\nncn-m001# sat-man sat-bootprep The sat bootprep command helps the Install and Upgrade Framework (IUF) install, upgrade, and deploy products on systems managed by CSM. Outside of IUF, it is uncommon to use sat bootprep. For more information on this relationship, see SAT and IUF. For more information on IUF, see the IUF section of the Cray System Management Documentation.\nSAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session.\nsat bootsys automates several portions of the boot and shutdown processes, including (but not limited to) performing BOS operations (such as creating BOS sessions), powering on and off cabinets, and checking the state of the system prior to shutdown.\nEdit a Bootprep Input File The input file provided to sat bootprep is a YAML-formatted file containing information which CFS, IMS, and BOS use to create configurations, images, and BOS session templates respectively. Writing and modifying these input files is the main task associated with using sat bootprep. An input file is composed of three main sections, one each for configurations, images, and session templates. These sections may be specified in any order, and any of the sections may be omitted if desired.\nProvide a Schema Version The sat bootprep input file is validated against a versioned schema definition. The input file should specify the version of the schema with which it is compatible under a schema_version key. For example:\n--- schema_version: 1.0.2 The current sat bootprep input file schema version can be viewed with the following command:\nncn-m001# sat bootprep view-schema | grep \u0026#39;^version:\u0026#39; version: \u0026#39;1.0.2\u0026#39; The sat bootprep run command validates the schema version specified in the input file. The command also makes sure that the schema version of the input file is compatible with the schema version understood by the current version of sat bootprep. For more information on schema version validation, refer to the schema_version property description in the bootprep input file schema. For more information on viewing the bootprep input file schema in either raw form or user-friendly HTML form, see View SAT Bootprep Schema.\nThe default HPC CSM Software Recipe bootprep input files provided by the hpc-csm-software-recipe release distribution already contain the correct schema version.\nDefine CFS Configurations The CFS configurations are defined under a configurations key. Under this key, you can list one or more configurations to create. For each configuration, give a name in addition to the list of layers that comprise the configuration.\nEach layer can be defined by a product name and optionally a version number, commit hash, or branch in the product\u0026rsquo;s configuration repository. If this method is used, the layer is created in CFS by looking up relevant configuration information (including the configuration repository and commit information) from the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be supplied. However, if it is absent, the version is assumed to be the latest version found in the cray-product-catalog.\nAlternatively, a configuration layer can be defined by explicitly referencing the desired configuration repository. You must then specify the intended version of the Ansible playbooks by providing a branch name or commit hash with branch or commit.\nThe following example shows a CFS configuration with two layers. The first layer is defined in terms of a product name and version, and the second layer is defined in terms of a Git clone URL and branch:\n--- configurations: - name: example-configuration layers: - name: example-product playbook: example.yml product: name: example version: 1.2.3 - name: another-example-product playbook: another-example.yml git: url: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34; branch: main When sat bootprep is run against an input file, a CFS configuration is created corresponding to each configuration in the configurations section. For example, the configuration created from an input file with the layers listed above might look something like the following:\n{ \u0026#34;lastUpdated\u0026#34;: \u0026#34;2022-02-07T21:47:49Z\u0026#34;, \u0026#34;layers\u0026#34;: [ { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;example.yml\u0026#34; }, { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;another example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;another-example.yml\u0026#34; } ], \u0026#34;name\u0026#34;: \u0026#34;example-configuration\u0026#34; } Define IMS Images The IMS images are defined under an images key. Under the images key, the user may define one or more images to be created in a list. Each element of the list defines a separate IMS image to be built and/or configured. Images must contain a name key and a base key.\nThe name key defines the name of the resulting IMS image. The base key defines the base image to be configured or the base recipe to be built and optionally configured. One of the following keys must be present under the base key:\nUse an ims key to specify an existing image or recipe in IMS. Use a product key to specify an image or recipe provided by a particular version of a product. If a product provides more than one image or recipe, a filter string prefix must be specified to select one. Use an image_ref key to specify another image from the input file using its ref_name. Images may also contain the following keys:\nUse a configuration key to specify a CFS configuration with which to customize the built image. If a configuration is specified, then configuration groups must also be specified using the configuration_group_names key. Use a ref_name key to specify a unique name that can refer to this image within the input file in other images or in session templates. The ref_name key allows references to images from the input file that have dynamically generated names as described in Dynamic Variable Substitutions. Use a description key to describe the image in the bootprep input file. Note that this key is not currently used. Here is an example of an image using an existing IMS recipe as its base. This example builds an IMS image from that recipe. It then configures it with a CFS configuration named example-compute-config. The example-compute-config CFS configuration can be defined under the configurations key in the same input file, or it can be an existing CFS configuration. Running sat bootprep against this input file results in an image named example-compute-image.\nimages: - name: example-compute-image description: \u0026gt; An example compute node image built from an existing IMS recipe. base: ims: name: example-compute-image-recipe type: recipe configuration: example-compute-config configuration_group_names: - Compute Here is an example showing the definition of two images. The first image is built from a recipe provided by the cos product. The second image uses the first image as a base and configures it with a configuration named example-compute-config. The value of the first image\u0026rsquo;s ref_name key is used in the second image\u0026rsquo;s base.image_ref key to specify it as a dependency. Running sat bootprep against this input file results in two images, the first named example-cos-image and the second named example-compute-image.\nimages: - name: example-cos-image ref_name: example-cos-image description: \u0026gt; An example image built from a recipe provided by the COS product. base: product: name: cos version: 2.3.101 type: recipe - name: example-compute-image description: \u0026gt; An example image built from a recipe provided by the COS product. base: image_ref: example-cos-image configuration: example-compute-config configuration_group_names: - Compute Here is an example of three IMS images built from the Kubernetes image and the Ceph storage image provided by the csm product. This example uses a filter string prefix to select from the multiple images provided by the CSM product. The first two IMS images in the example find any image from the specified csm product version whose name starts with secure-kubernetes. The third image in the example finds any csm image whose name starts with secure-storage-ceph. All three images are then configured with a configuration named example-management-config. Running sat bootprep against this input file results in three IMS images named worker-example-csm-image, master-example-csm-image, and storage-example-csm-image.\nimages: - name: worker-example-csm-image base: product: name: csm version: 1.4.1 type: image filter: prefix: secure-kubernetes configuration: example-management-config configuration_group_names: - Management_Worker - name: master-example-csm-image base: product: name: csm version: 1.4.1 type: image filter: prefix: secure-kubernetes configuration: example-management-config configuration_group_names: - Management_Master - name: storage-example-csm-image base: product: name: csm version: 1.4.1 type: image filter: prefix: secure-storage-ceph configuration: example-management-config configuration_group_names: - Management_Storage Define BOS Session Templates The BOS session templates are defined under the session_templates key. Each session template must provide values for the name, image, configuration, and bos_parameters keys. The name key defines the name of the resulting BOS session template. The image key defines the image to use in the BOS session template. One of the following keys must be present under the image key:\nUse an ims key to specify an existing image or recipe in IMS. Use an image_ref key to specify another image from the input file using its ref_name. The configuration key defines the CFS configuration specified in the BOS session template.\nThe bos_parameters key defines parameters that are passed through directly to the BOS session template. The bos_parameters key should contain a boot_sets key, and each boot set in the session template should be specified under boot_sets. Each boot set can contain the following keys, all of which are optional:\nUse a kernel_parameters key to specify the parameters passed to the kernel on the command line. Use a network key to specify the network over which the nodes boot. Use a node_list key to specify the nodes to add to the boot set. Use a node_roles_groups key to specify the HSM roles to add to the boot set. Use a node_groups key to specify the HSM groups to add to the boot set. Use a rootfs_provider key to specify the root file system provider. Use a rootfs_provider_passthrough key to specify the parameters to add to the rootfs= kernel parameter. As mentioned above, the parameters under bos_parameters are passed through directly to BOS. For more information on the properties of a BOS boot set, refer to BOS Session Templates in the Cray System Management Documentation.\nHere is an example of a BOS session template that refers to an existing IMS image by name:\nsession_templates: - name: example-session-template image: ims: name: example-image configuration: example-configuration bos_parameters: boot_sets: example_boot_set: kernel_parameters: ip=dhcp quiet node_roles_groups: - Compute rootfs_provider: cpss3 rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0 Here is an example of a BOS session template that refers to an image from the input file by its ref_name. This requires that an image defined in the input file specifies example-image as the value of its ref_name key.\nsession_templates: - name: example-session-template image: image_ref: example-image configuration: example-configuration bos_parameters: boot_sets: example_boot_set: kernel_parameters: ip=dhcp quiet node_roles_groups: - Compute rootfs_provider: cpss3 rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0 HPC CSM Software Recipe Variable Substitutions The sat bootprep command takes any variables you provide and substitutes them into the input file. Variables are sourced from the command line, any variable files directly provided, and the HPC CSM Software Recipe files used, in that order. When you provide values through a variable file, sat bootprep substitutes the values with Jinja2 template syntax. The HPC CSM Software Recipe provides default variables in a product_vars.yaml variable file. This file defines information about each HPC software product included in the recipe.\nYou will primarily substitute variables into the default HPC CSM Software Recipe bootprep input files through IUF. However, variable files can also be given to sat bootprep directly from IUF\u0026rsquo;s use of the recipe. If you do use variables directly with sat bootprep, you might encounter some limitations. For more information on SAT variable limitations, see SAT and IUF. For more information on IUF and variable substitutions, see the IUF section of the Cray System Management Documentation.\nSelect an HPC CSM Software Recipe Version You can view a listing of the default HPC CSM Software Recipe variables and their values by running sat bootprep list-vars. For more information on options that can be used with the list-vars subcommand, refer to the man page for the sat bootprep subcommand.\nBy default, the sat bootprep command uses the variables from the latest installed version of the HPC CSM Software Recipe. However, you can override this with the --recipe-version command line argument to sat bootprep run.\nFor example, to explicitly select the 22.11.0 version of the HPC CSM Software Recipe default variables, specify --recipe-version 22.11.0:\nncn-m001# sat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml Values Supporting Jinja2 Template Rendering The entire sat bootprep input file is not rendered by the Jinja2 template engine. Jinja2 template rendering of the input file is performed individually for each supported value. The values of the following keys in the bootprep input file support rendering as a Jinja2 template and thus support variables:\nThe name key of each configuration under the configurations key. The following keys of each layer under the layers key in a configuration: name playbook git.branch product.version product.branch The following keys of each image under the images key: name base.product.version configuration The following keys of each session template under the session_templates key: name configuration You can use Jinja2 built-in filters in values of any of the keys listed above. In addition, Python string methods can be called on the string variables.\nHyphens in HPC CSM Software Recipe Variables Variable names with hyphens are not allowed in Jinja2 expressions because they are parsed as an arithmetic expression instead of a single variable. To support product names with hyphens, sat bootprep converts hyphens to underscores in all top-level keys of the default HPC CSM Software Recipe variables. It also converts any variables sourced from the command line or any variable files you provide directly. When referring to a variable with hyphens in the bootprep input file, keep this in mind. For example, to refer to the product version variable for slingshot-host-software in the bootprep input file, write \u0026quot;{{slingshot_host_software.version}}\u0026quot;.\nHPC CSM Software Recipe Variable Substitution Example The following example bootprep input file shows how a variable of a COS version can be used in an input file that creates a CFS configuration for computes. Only one layer is shown for brevity.\n--- configurations: - name: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; layers: - name: cos-compute-{{cos.working_branch}} playbook: cos-compute.yml product: name: cos version: \u0026#34;{{cos.version}}\u0026#34; branch: \u0026#34;{{cos.working_branch}}\u0026#34; Note: When the value of a key in the bootprep input file is a Jinja2 expression, it must be quoted to pass YAML syntax checking.\nJinja2 expressions can also use filters and Python\u0026rsquo;s built-in string methods to manipulate the variable values. For example, suppose only the major and minor components of a COS version are to be used in the branch name for the COS layer of the CFS configuration. You can use the split string method to achieve this as follows:\n--- configurations: - name: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; layers: - name: cos-compute-{{cos.working_branch}} playbook: cos-compute.yml product: name: cos version: \u0026#34;{{cos.version}}\u0026#34; branch: integration-{{cos.version.split(\u0026#39;.\u0026#39;)[0]}}-{{cos.version.split(\u0026#39;.\u0026#39;)[1]}} Dynamic Variable Substitutions Additional variables are available besides the default variables provided by the HPC CSM Software Recipe. (For more information, see HPC CSM Software Recipe Variable Substitutions.) These additional variables are dynamic because their values are determined at run-time based on the context in which they appear. Available dynamic variables include the following:\nThe variable base.name can be used in the name of an image under the images key. The value of this variable is the name of the IMS image or recipe used as the base of this image.\nThe variable image.name can be used in the name of a session template under the session_templates key. The value of this variable is the name of the IMS image used in this session template.\nNote: The name of a session template is restricted to 45 characters. Keep this in mind when using image.name in the name of a session template.\nThese variables reduce the need to duplicate values throughout the sat bootprep input file and make the following use cases possible:\nYou want to build an image from a recipe provided by a product and use the name of the recipe in the name of the resulting image. You want to use the name of the image in the name of a session template, and the image is generated as described in the previous use case. Example Bootprep Input Files This section provides an example bootprep input file. It also gives instructions for obtaining the default bootprep input files delivered with a release of the HPC CSM Software Recipe.\nExample Bootprep Input File The following bootprep input file provides an example of using most of the features described in previous sections. It is not intended to be a complete bootprep file for the entire CSM product.\n--- configurations: - name: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; layers: - name: cos-compute-{{cos.working_branch}} playbook: cos-compute.yml product: name: cos version: \u0026#34;{{cos.version}}\u0026#34; branch: \u0026#34;{{cos.working_branch}}\u0026#34; - name: cpe-pe_deploy-{{cpe.working_branch}} playbook: pe_deploy.yml product: name: cpe version: \u0026#34;{{cpe.version}}\u0026#34; branch: \u0026#34;{{cpe.working_branch}}\u0026#34; images: - name: \u0026#34;{{default.note}}{{base.name}}{{default.suffix}}\u0026#34; ref_name: base_cos_image base: product: name: cos type: recipe version: \u0026#34;{{cos.version}}\u0026#34; - name: \u0026#34;compute-{{base.name}}\u0026#34; ref_name: compute_image base: image_ref: base_cos_image configuration: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; configuration_group_names: - Compute session_templates: - name: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; image: image_ref: compute_image configuration: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; bos_parameters: boot_sets: compute: kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN} node_roles_groups: - Compute rootfs_provider_passthrough: \u0026#34;dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0\u0026#34; Access Default Bootprep Input Files Default bootprep input files are delivered by the HPC CSM Software Recipe product. You can access these files by cloning the hpc-csm-software-recipe repository, as described in the Accessing sat bootprep files process of the Cray System Management Documentation. Find the default input files in the bootprep directory of the cloned repository:\nncn-m001# ls bootprep/ Generate an Example Bootprep Input File The sat bootprep generate-example command was not updated for recent bootprep schema changes. It is recommended that you instead use the default bootprep input files described in Access Default Bootprep Input Files. The sat bootprep generate-example command will be updated in a future release of SAT.\nSummary of SAT Bootprep Results The sat bootprep run command uses information from the bootprep input file to create CFS configurations, IMS images, and BOS session templates. For easy reference, the command also includes output summarizing the final creation results. The following example shows a sample table output.\nncn-m001# sat bootprep run ... ################################################################################ CFS configurations ################################################################################ +------------------+ | name | +------------------+ | example-config-1 | | example-config-2 | +------------------+ ################################################################################ IMS images ################################################################################ +---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+ | name | preconfigured_image_id | final_image_id | configuration | configuration_group_names | +---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+ | example-image | c1bcaf00-109d-470f-b665-e7b37dedb62f | a22fb912-22be-449b-a51b-081af2d7aff6 | example-config | Compute | +---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+ ################################################################################ BOS session templates ################################################################################ +------------------+----------------+ | name | configuration | +------------------+----------------+ | example-template | example-config | +------------------+----------------+ View SAT Bootprep Schema The contents of the YAML input files used by sat bootprep must conform to a schema which defines the structure of the data. The schema definition is written using the JSON Schema format. (Although the format is named \u0026ldquo;JSON Schema\u0026rdquo;, the schema itself is written in YAML as well.) More information, including introductory materials and a formal specification of the JSON Schema metaschema, can be found on the JSON Schema website.\nView the Exact Schema Specification To view the exact schema specification, run sat bootprep view-schema.\nncn-m001# sat bootprep view-schema --- $schema: \u0026#34;https://json-schema.org/draft/2020-12/schema\u0026#34; ... title: Bootprep Input File description: \u0026gt; A description of the set of CFS configurations to create, the set of IMS images to create and optionally customize with the defined CFS configurations, and the set of BOS session templates to create that reference the defined images and configurations. type: object additionalProperties: false properties: ... Generate User-Friendly Documentation The raw schema definition can be difficult to understand without experience working with JSON Schema specifications. For this reason, a feature is included with sat bootprep that generates user-friendly HTML documentation for the input file schema. This HTML documentation can be browsed with your preferred web browser.\nCreate a documentation tarball using sat bootprep.\nncn-m001# sat bootprep generate-docs INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz An alternate output directory can be specified with the --output-dir option. The generated tarball is always named bootprep-schema-docs.tar.gz.\nncn-m001# sat bootprep generate-docs --output-dir /tmp INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz From another machine, copy the tarball to a local directory.\nanother-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz . Extract the contents of the tarball and open the contained index.html.\nanother-machine$ tar xzvf bootprep-schema-docs.tar.gz x bootprep-schema-docs/ x bootprep-schema-docs/index.html x bootprep-schema-docs/schema_doc.css x bootprep-schema-docs/schema_doc.min.js another-machine$ open bootprep-schema-docs/index.html " +}, +{ + "uri": "/docs-sat/en-25/release_notes/sat_2.4_release_notes/", + "title": "Changes in SAT 2.4", + "tags": [], + "description": "", + "content": "Changes in SAT 2.4 The 2.4.13 version of the SAT product includes:\nVersion 3.19.3 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.5.5 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. Because of installation refactoring efforts, the following two components are no longer delivered with SAT:\nsat-cfs-install container image sat-cfs-install Helm chart Inclusion of SAT in CSM A version of the cray-sat container image is now included in CSM. For more information, see SAT in CSM.\nSAT Installation Improvements The SAT install.sh script no longer uses a sat-cfs-install Helm chart and container image to upload its Ansible content to the sat-config-management repository in VCS. Instead, it uses Podman to run the cf-gitea-import container directly. Some of the benefits of this change include the following:\nFewer container images that need to be managed by the SAT product Simplified SAT installation without Helm charts or Loftsman manifests Reduced SAT installation time Decoupling of cray-sat container image and cray-sat-podman package Decoupling of cray-sat Container Image and cray-sat-podman Package In older SAT releases, the sat wrapper script that was provided by the cray-sat-podman package installed on Kubernetes master NCNs included a hard-coded version of the cray-sat container image. As a result, every new version of the cray-sat image required a corresponding new version of the cray-sat-podman package.\nIn this release, this tight coupling of the cray-sat-podman package and the cray-sat container image was removed. The sat wrapper script provided by the cray-sat-podman package now looks for the version of the cray-sat container image in the /opt/cray/etc/sat/version file. This file is populated with the correct version of the cray-sat container image by the SAT layer of the CFS configuration that is applied to management NCNs. If the version file does not exist, the wrapper script defaults to the version of the cray-sat container image delivered with the latest version of CSM installed on the system.\nImproved NCN Personalization Automation The steps for performing NCN personalization as part of the SAT installation were moved out of the install.sh script and into a new update-mgmt-ncn-cfs-config.sh script that is provided in the SAT release distribution. The new script provides additional flexibility in how it modifies the NCN personalization CFS configuration for SAT. It can modify an existing CFS configuration by name, a CFS configuration being built in a JSON file, or an existing CFS configuration that applies to certain components.\nNew sat bootprep Features The following new features were added to the sat bootprep command:\nVariable substitutions using Jinja2 templates in certain fields of the sat bootprep input file\nFor more information, see HPC CSM Software Recipe Variable Substitutions and Dynamic Variable Substitutions.\nSchema version validation in the sat bootprep input files\nFor more information, see Provide a Schema Version.\nAbility to look up images and recipes provided by products\nFor more information, see Define IMS Images.\nThe schema of the sat bootprep input files was also changed to support these new features:\nThe base recipe or image used by an image in the input file should now be specified under a base key instead of under an ims key. The old ims key is deprecated. To specify an image that depends on another image in the input file, the dependent image should specify the dependency under base.image_ref. You should no longer use the IMS name of the image on which it depends. The image used by a session template should now be specified under image.ims.name, image.ims.id, or image.image_ref. Specifying a string value directly under the image key is deprecated. For more information on defining IMS images and BOS session templates in the sat bootprep input file, see Define IMS Images and Define BOS Session Templates.\nAdded Blade Swap Support to sat swap The sat swap command was updated to support swapping compute and UAN blades with sat swap blade. This functionality is described in the following processes of the Cray System Management Documentation:\nAdding a Liquid-cooled blade to a System Using SAT Removing a Liquid-cooled blade from a System Using SAT Replace a Compute Blade Using SAT Swap a Compute Blade with a Different System Using SAT Support for BOS v2 A new v2 version of the Boot Orchestration Service (BOS) is available in CSM 1.3.0. SAT has added support for BOS v2. This impacts the following commands that interact with BOS:\nsat bootprep sat bootsys sat status By default, SAT uses BOS v1. However, you can choose the BOS version you want to use. For more information, see Change the BOS Version.\nAdded BOS Fields to sat status When using BOS v2, sat status outputs additional fields. These fields show the most recent BOS session, session template, booted image, and boot status for each node. An additional --bos-fields option was added to limit the output of sat status to these fields. The fields are not displayed when using BOS v1.\nOpen Source Repositories This is the first release of SAT built from open source code repositories. As a result, build infrastructure was changed to use an external Jenkins instance, and artifacts are now published to an external Artifactory instance. These changes should not impact the functionality of the SAT product in any way.\nSecurity CVE Mitigation The paramiko Python package version was updated from 2.9.2 to 2.10.1 to mitigate CVE-2022-24302. The oauthlib Python package version was updated from 3.2.0 to 3.2.1 to mitigate CVE-2022-36087. Restricted Permissions on SAT Config Files and Directories SAT stores information used to authenticate to the API gateway with Keycloak. Token files are stored in the ~/.config/sat/tokens/ directory. Those files have always had permissions appropriately set to restrict them to be readable only by the user.\nKeycloak usernames used to authenticate to the API gateway are stored in the SAT config file at /.config/sat/sat.toml. Keycloak usernames are also used in the file names of tokens stored in /.config/sat/tokens. As an additional security measure, SAT now restricts the permissions of the SAT config file to be readable and writable only by the user. It also restricts the tokens directory and the entire SAT config directory ~/.config/sat to be accessible only by the user. This prevents other users on the system from viewing Keycloak usernames used to authenticate to the API gateway.\nBug Fixes Fixed an issue where sat init did not print a message confirming a new configuration file was created. Fixed an issue where sat showrev exited with a traceback if the file /opt/cray/etc/site_info.yaml existed but was empty. This could occur if the user exited sat setrev with Ctrl-C. Fixed outdated information in the sat bootsys man page, and added a description of the command stages. " +}, +{ + "uri": "/docs-sat/en-25/external_system/", + "title": "SAT on an External System", + "tags": [], + "description": "", + "content": "SAT on an External System SAT can optionally be installed and configured on an external system to interact with CSM over the CAN.\nLimitations Most SAT subcommands work by accessing APIs which are reachable via the CAN. However, certain SAT commands depend on host-based functionality on the management NCNs and will not work from an external system. This includes the following:\nThe platform-services and ncn-power stages of sat bootsys The local host information displayed by the --local option of sat showrev Installing SAT on an external system is not an officially supported configuration. These instructions are provided \u0026ldquo;as-is\u0026rdquo; with the hope that they can useful for users who desire additional flexibility.\nCertain additional steps may need to be taken to install and configure SAT depending on the configuration of the external system in use. These additional steps may include provisioning virtual machines, installing packages, or configuring TLS certificates, and these steps are outside the scope of this documentation. This section covers only the steps needed to configure SAT to use externally-accessible API endpoints exposed by CSM.\nInstall and Configure SAT Prerequisites The external system must be on the Customer Access Network (CAN). Python 3.7 or newer is installed on the system. kubectl, openssh, git, and curl are installed on the external system. The root CA certificates used when installing CSM have been added to the external system\u0026rsquo;s trust store such that authenticated TLS connections can be made to the CSM REST API gateway. For more information, refer to Certificate Authority in the Cray System Management Documentation. Procedure Create a Python virtual environment.\n$ SAT_VENV_PATH=\u0026#34;$(pwd)/venv\u0026#34; $ python3 -m venv ${SAT_VENV_PATH} $ . ${SAT_VENV_PATH}/bin/activate Clone the SAT source code.\nTo use SAT version 3.21, this example clones the release/3.21 branch of Cray-HPE/sat.\n(venv) $ git clone --branch=release/3.21 https://github.com/Cray-HPE/sat.git Set up the SAT CSM Python dependencies to be installed from their source code.\nSAT CSM Python dependency packages are not currently distributed publicly as source packages or binary distributions. They must be installed from their source code hosted on GitHub. Also, to install the cray-product-catalog Python package, you must first clone it locally. Use the following steps to modify the SAT CSM Python dependencies so they can be installed from their source code.\nClone the source code for cray-product-catalog.\n(venv) $ git clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog In the cray-product-catalog directory, create a file named .version that contains the version of cray-product-catalog.\n(venv) $ echo 1.6.0 \u0026gt; cray-product-catalog/.version Open the \u0026ldquo;locked\u0026rdquo; requirements file in a text editor.\n(venv) $ vim sat/requirements.lock.txt Update the line containing cray-product-catalog so that it reflects the local path to cray-product-catalog.\nIt should read as follows.\n./cray-product-catalog For versions of SAT newer than 3.19, change the line containing csm-api-client to read as follows.\ncsm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1 (Optional) Confirm that requirements.lock.txt is modified as expected.\nNote: For versions newer than 3.19, you will see both cray-product-catalog and csm-api-client. For version 3.19 and older, you will only see cray-product-catalog.\n(venv) $ grep -E \u0026#39;cray-product-catalog|csm-api-client\u0026#39; sat/requirements.lock.txt ./cray-product-catalog csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1 Install the modified SAT dependencies.\n(venv) $ pip install -r sat/requirements.lock.txt ... Install the SAT Python package.\n(venv) $ pip install ./sat ... (Optional) Add the sat virtual environment to the user\u0026rsquo;s PATH environment variable.\nIf a shell other than bash is in use, replace ~/.bash_profile with the appropriate profile path.\nIf the virtual environment is not added to the user\u0026rsquo;s PATH environment variable, then source ${SAT_VENV_PATH}/bin/activate will need to be run before running any SAT commands.\n(venv) $ deactivate $ echo export PATH=\\\u0026#34;${SAT_VENV_PATH}/bin:${PATH}\\\u0026#34; \u0026gt;\u0026gt; ~/.bash_profile $ source ~/.bash_profile Copy the file /etc/kubernetes/admin.conf from ncn-m001 to ~/.kube/config on the external system.\nNote that this file contains credentials to authenticate against the Kubernetes API as the administrative user, so it should be treated as sensitive.\n$ mkdir -p ~/.kube $ scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config admin.conf 100% 5566 3.0MB/s 00:00 Add a new entry for the hostname kubernetes to the external system\u0026rsquo;s /etc/hosts file.\nThe kubernetes hostname should correspond to the CAN IP address on ncn-m001. On CSM 1.2, this can be determined by querying the IP address of the bond0.cmn0 interface.\n$ ssh ncn-m001 ip addr show bond0.cmn0 13: bond0.cmn0@bond0: \u0026lt;BROADCAST,MULTICAST,UP,LOWER_UP\u0026gt; mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007 valid_lft forever preferred_lft forever inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link valid_lft forever preferred_lft forever $ IP_ADDRESS=10.102.1.11 On CSM versions prior to 1.2, the CAN IP can be determined by querying the IP address of the vlan007 interface.\n$ ssh ncn-m001 ip addr show vlan007 13: vlan007@bond0: \u0026lt;BROADCAST,MULTICAST,UP,LOWER_UP\u0026gt; mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007 valid_lft forever preferred_lft forever inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link valid_lft forever preferred_lft forever $ IP_ADDRESS=10.102.1.10 Once the IP address is determined, add an entry to /etc/hosts mapping the IP address to the hostname kubernetes.\n$ echo \u0026#34;${IP_ADDRESS} kubernetes\u0026#34; | sudo tee -a /etc/hosts 10.102.1.11 kubernetes Modify ~/.kube/config to set the cluster server address.\nThe value of the server key for the kubernetes cluster under the clusters section should be set to https://kubernetes:6443.\n--- clusters: - cluster: certificate-authority-data: REDACTED server: https://kubernetes:6443 name: kubernetes ... Confirm that kubectl can access the CSM Kubernetes cluster.\n$ kubectl get nodes NAME STATUS ROLES AGE VERSION ncn-m001 Ready master 135d v1.19.9 ncn-m002 Ready master 136d v1.19.9 ncn-m003 Ready master 136d v1.19.9 ncn-w001 Ready \u0026lt;none\u0026gt; 136d v1.19.9 ncn-w002 Ready \u0026lt;none\u0026gt; 136d v1.19.9 ncn-w003 Ready \u0026lt;none\u0026gt; 136d v1.19.9 Use sat init to create a configuration file for SAT.\n$ sat init INFO: Configuration file \u0026#34;/home/user/.config/sat/sat.toml\u0026#34; generated. Copy the platform CA certificates from the management NCN and configure the certificates for use with SAT.\nIf a shell other than bash is in use, replace ~/.bash_profile with the appropriate profile path.\n$ scp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt . $ echo export REQUESTS_CA_BUNDLE=\\\u0026#34;$(realpath platform-ca-certs.crt)\\\u0026#34; \u0026gt;\u0026gt; ~/.bash_profile $ source ~/.bash_profile Edit the SAT configuration file to set the API and S3 hostnames.\nExternally available API endpoints are given domain names in PowerDNS, so the endpoints in the configuration file should each be set to subdomain.system-name.site-domain, where system-name and site-domain are replaced with the values specified during csi config init, and subdomain is the DNS name for the externally available service. For more information, refer to Externally Exposed Services in the Cray System Management Documentation.\nThe API gateway has the subdomain api, and S3 has the subdomain s3. The S3 endpoint runs on port 8080. The following options should be set in the SAT configuration file.\n[api_gateway] host = \u0026#34;api.system-name.site-domain\u0026#34; [s3] endpoint = \u0026#34;http://s3.system-name.site-domain:8080\u0026#34; Edit the SAT configuration file to specify the Keycloak user which will be accessing the REST API.\n[api_gateway] username = \u0026#34;user\u0026#34; Run sat auth. Enter your password when prompted.\nThe admin account used to authenticate with sat auth must be enabled in Keycloak and must have its assigned role set to admin. For more information on editing Role Mappings, see Create Internal User Accounts in the Keycloak Shasta Realm in the Cray System Management Documentation. For more information on authentication types and authentication credentials, see SAT Command Authentication.\n$ sat auth Password for user: Succeeded! Ensure the files are readable only by the current user.\n$ touch ~/.config/sat/s3_access_key \\ ~/.config/sat/s3_secret_key $ chmod 600 ~/.config/sat/s3_access_key \\ ~/.config/sat/s3_secret_key Write the credentials to local files using kubectl.\nGenerate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT S3 bucket, the user must generate the S3 access key and secret keys and write them to a local file. SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev.\n$ kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.access_key}\u0026#39; | base64 -d \u0026gt; \\ ~/.config/sat/s3_access_key $ kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.secret_key}\u0026#39; | base64 -d \u0026gt; \\ ~/.config/sat/s3_secret_key " +}, +{ + "uri": "/docs-sat/en-25/release_notes/sat_2.5_release_notes/", + "title": "Changes in SAT 2.5", + "tags": [], + "description": "", + "content": "Changes in SAT 2.5 The 2.5.22 version of the SAT product includes:\nVersion 3.21.9 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.2 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. New sat Commands sat jobstat allows you to access application and job data through the command line. It provides a table summarizing information for all jobs on the system.\nChanges to sat bootprep A list-vars subcommand was added to sat bootprep.\nIt lists the variables available for use in bootprep input files at runtime.\nA --limit option was added to sat bootprep run.\nIt restricts the creation of CFS configurations, IMS images, and BOS session templates into separate stages. For more information, see Limit SAT Bootprep Run into Stages.\nsat bootprep now prompts individually for each CFS configuration that already exists.\nsat bootprep can now filter images provided by a product by using a prefix.\nThis is useful when specifying the base of an image in a bootprep input file. For more information, see Define IMS Images.\nTo support product names with hyphens, sat bootprep now converts hyphens to underscores within variables.\nFor more information, see Hyphens in HPC CSM Software Recipe Variables.\nIn sat bootprep input files, you can now render the value of the playbook property of CFS configuration layers with Jinja2 templates.\nFor more information, see Values Supporting Jinja2 Template Rendering.\nOutput was added to sat bootprep run that summarizes the CFS configurations, IMS images, and BOS session templates created.\nFor more information, see Summary of SAT Bootprep Results.\nImprovements were made to the sat bootprep output when CFS configuration and BOS session templates are created.\nChanges to sat bootsys A reboot subcommand was added to sat bootsys. It uses BOS to reboot nodes in the bos-operations stage. The --staged-session option was added to sat bootsys. It can be used to create staged BOS sessions. For more information, refer to Staging Changes with BOS in the Cray System Management Documentation. Changes to Other sat Commands When switching SAT versions with prodmgr, a version is no longer set as \u0026ldquo;active\u0026rdquo; in the product catalog. The \u0026ldquo;active\u0026rdquo; field was also removed from the output of sat showrev. Improvements were made to the performance of sat status when using BOS version two. New Install and Upgrade Framework The new Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products with the help of sat bootprep on HPE Cray EX systems managed by Cray System Management (CSM). IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product.\nBecause IUF now handles NCN personalization, information about this process was removed from the SAT documentation. Other sections in the documentation were also revised to support the new Install and Upgrade Framework. For example, the SAT Installation and SAT Upgrade sections of this guide now provide details on software and configuration content specific to SAT. The Cray System Management Documentation will indicate when these sections should be referred to for detailed information.\nFor more information on the relationship between sat bootprep and IUF, see SAT and IUF.\nNew Default BOS Version By default, SAT now uses version two of the Boot Orchestration Service (BOS). This change to BOS v2 impacts the following commands that interact with BOS:\nsat bootprep sat bootsys sat status If needed, you can choose the BOS version you want to use. For more information, see Change the BOS Version.\nSecurity Updated the version of certifi in the sat python package and CLI from 2021.10.8 to 2022.12.7 to resolve CVE-2022-23491. Updated the version of certifi in the sat-install-utility container image from 2021.5.30 to 2022.12.7 to resolve CVE-2022-23491. Updated the version of oauthlib from 3.2.1 to 3.2.2 to resolve CVE-2022-36087. Updated the version of cryptography from 36.0.1 to 39.0.1 to resolve CVE-2023-23931. Bug Fixes Fixed a bug that prevented sat init from creating a configuration file in the current directory when not prefixed with ./. Fixed a bug in which sat status failed with a traceback when using BOS version two and reported components whose most recent image did not exist. Fixed a build issue where the sat container could contain a different version of kubectl than the version found in CSM. Fixed error handling and improved command messages for sat bootprep and sat swap blade. " +}, +{ + "uri": "/docs-sat/en-25/release_notes/shasta_1.3.2_release_notes/", + "title": "SAT Changes in Shasta v1.3.2", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.\nThe following sections detail the changes in this release.\nsat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable.\nThe sat swap switch command is equivalent to sat switch. The sat switch command will be removed in a future release.\nAddition of Stages to sat bootsys Command The sat bootsys command now has multiple stages for both the boot and shutdown actions. Please refer to the \u0026ldquo;System Power On Procedures\u0026rdquo; and \u0026ldquo;System Power Off Procedures\u0026rdquo; sections of the Cray Shasta Administration Guide (S-8001) for more details on using this command in the context of a full system power off and power on.\n" +}, +{ + "uri": "/docs-sat/en-25/release_notes/shasta_1.3_release_notes/", + "title": "SAT Changes in Shasta v1.3", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI.\nThis version of the sat CLI contained the following commands:\nauth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch For more information on each of these commands, see the SAT Command Overview and the table of commands in the Authenticate SAT Commands section of this document.\n" +}, +{ + "uri": "/docs-sat/en-25/release_notes/shasta_1.4.1_release_notes/", + "title": "SAT Changes in Shasta v1.4.1", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1.\nThis version of the SAT product included:\nVersion 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release.\nNew Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames:\nsat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API.\nBug Fixes Fixed a problem in sat swap where creating the offline port policy failed. Changed sat bootsys shutdown --stage bos-operations to no longer forcefully power off all compute nodes and application nodes using CAPMC when BOS sessions complete or time out. Fixed an issue with the command sat bootsys boot --stage cabinet-power. " +}, +{ + "uri": "/docs-sat/en-25/release_notes/shasta_1.4_release_notes/", + "title": "SAT Changes in Shasta v1.4", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4.\nThis version of the SAT product included the following components:\nVersion 3.4.0 of the sat python package and CLI It also added the following new component:\nVersion 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release.\nSAT as an Independent Product SAT is now packaged and released as an independent product. The product deliverable is called a \u0026ldquo;release distribution\u0026rdquo;. The release distribution is a gzipped tar file containing an install script. This install script loads the cray/cray-sat container image into the Docker registry in Nexus and loads the cray-sat-podman RPM into a package repository in Nexus.\nIn this release, the cray-sat-podman package is still installed in the master and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in Shasta v1.5.\nSAT Running in a Container Under Podman The sat command now runs in a container under Podman. The sat executable is now installed on all nodes in the Kubernetes management cluster (workers and masters). This executable is a wrapper script that starts a SAT container in Podman and invokes the sat Python CLI within that container. The admin can run individual sat commands directly on the master or worker NCNs as before, or they can run sat commands inside the SAT container after using sat bash to enter an interactive shell inside the SAT container.\nTo view man pages for sat commands, the user can run sat-man SAT_COMMAND, replacing SAT_COMMAND with the name of the sat command. Alternatively, the user can enter the sat container with sat bash and use the man command.\nNew sat init Command and Config File Location Change The default location of the SAT config file has been changed from /etc/sat.toml to ~/.config/sat/sat.toml. A new command, sat init, has been added that initializes a configuration file in the new default directory. This better supports individual users on the system who want their own config files.\n~/.config/sat is mounted into the container that runs under Podman, so changes are persistent across invocations of the sat container. If desired, an alternate configuration directory can be specified with the SAT_CONFIG_DIR environment variable.\nAdditionally, if a config file does not yet exist when a user runs a sat command, one is generated automatically.\nAdditional Types Added to sat hwinv Additional functionality has been added to sat hwinv including:\nList node enclosure power supplies with the --list-node-enclosure-power-supplies option. List node accelerators (for example, GPUs) with the --list-node-accels option. The count of node accelerators is also included for each node. List node accelerator risers (for example, Redstone modules) with the --list-node-accel-risers option. The count of node accelerator risers is also included for each node. List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics option. The count of HSN NICs is also included for each node. Documentation for these new options has been added to the man page for sat hwinv.\nSite Information Stored by sat setrev in S3 The sat setrev and sat showrev commands now use S3 to store and obtain site information, including system name, site name, serial number, install date, and system type. Since the information is stored in S3, it will now be consistent regardless of the node on which sat is executed.\nAs a result of this change, S3 credentials must be configured for SAT. For more information, see Generate SAT S3 Credentials.\nProduct Version Information Shown by sat showrev sat showrev now shows product information from the cray-product-catalog ConfigMap in Kubernetes.\nAdditional Changes to sat showrev The output from sat showrev has also been changed in the following ways:\nThe --docker and --packages options were considered misleading and have been removed. Information pertaining to only to the local host, where the command is run, has been moved to the output of the --local option. Removal of sat cablecheck The sat cablecheck command has been removed. To verify that the system\u0026rsquo;s Slingshot network is cabled correctly, admins should now use the show cables command in the Slingshot Topology Tool (STT).\nsat swap Command Compatibility with Next-gen Fabric Controller The sat swap command was added in Shasta v1.3.2. This command used the Fabric Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the Fabric Controller API, so this command has been rewritten to use the new backwards-incompatible API. Usage of the command did not change.\nsat bootsys Functionality Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken by changes introduced in Shasta v1.4, which removed the Ansible inventory and playbooks.\nThe functionality in the platform-services stage of sat bootsys has been re-implemented to use python directly instead of Ansible. This resulted in a more robust procedure with better logging to the sat log file. Failures to stop containers on Kubernetes nodes are handled more gracefully, and more information about the containers that failed to stop, including how to debug the problem, is included.\nImprovements were made to console logging setup for non-compute nodes (NCNs) when they are shut down and booted.\nThe following improvements were made to the bos-operations stage of sat bootsys:\nMore information about the BOS sessions, BOA jobs, and BOA pods is printed. A command-line option, --bos-templates, and a corresponding config-file option, bos_templates, were added, and the --cle-bos-template and --uan-bos-template options and their corresponding config file options were deprecated. The following functionality has been removed from sat bootsys:\nThe hsn-bringup stage of sat bootsys boot has been removed due to removal of the underlying Ansible playbook. The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is now a manual procedure. Log File Location Change The location of the sat log file has changed from /var/log/cray/sat.log to /var/log/cray/sat/sat.log. This change simplifies mounting this file into the sat container running under Podman.\n" +}, +{ + "uri": "/docs-sat/en-25/release_notes/shasta_1.5_release_notes/", + "title": "SAT Changes in Shasta v1.5", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5.\nThis version of the SAT product included:\nVersion 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component:\nVersion 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release.\nInstall Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. The cray-sat-podman RPM is no longer installed in the management non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on all master management NCNs via an Ansible playbook which is referenced by a layer of the CFS configuration that applies to management NCNs. This CFS configuration is typically named ncn-personalization.\nThe SAT product now includes a Docker image and a Helm chart named sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart with Loftsman. This helm chart deploys a Kubernetes job that imports the SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. This repository is referenced by the layer added to the NCN personalization CFS configuration.\nRemoval of Direct Redfish Access All commands which used to access Redfish directly have either been removed or modified to use higher-level service APIs. This includes the following commands:\nsat sensors sat diag sat linkhealth The sat sensors command has been rewritten to use the SMA telemetry API to obtain the latest sensor values. The command\u0026rsquo;s usage has changed slightly, but legacy options work as before, so it is backwards compatible. Additionally, new commands have been added.\nThe sat diag command has been rewritten to use a new service called Fox, which is delivered with the CSM-Diags product. The sat diag command now launches diagnostics using the Fox service, which launches the corresponding diagnostic programs on controllers using the Hardware Management Job and Task Daemon (HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start diagnostics over Redfish.\nThe sat linkhealth command has been removed. Its functionality has been replaced by functionality from the Slingshot Topology Tool (STT) in the fabric manager pod.\nThe Redfish username and password command line options and config file options have been removed. For more information, see Remove Obsolete Configuration File Sections.\nAdditional Fields in sat setrev and sat showrev sat setrev now collects the following information from the admin, which is then displayed by sat showrev:\nSystem description Product number Company name Country code Additional guidance and validation has been added to each field collected by sat setrev. This sets the stage for sdu setup to stop collecting this information and instead collect it from sat showrev or its S3 bucket.\nImprovements to sat bootsys The platform-services stage of the sat bootsys boot command has been improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph health in the correct order. The ceph-check stage has been removed as it is no longer needed.\nThe platform-services stage of sat bootsys boot now prompts for confirmation of the storage NCN hostnames in addition to the Kubernetes masters and workers.\nBug Fixes and Security Fixes Improved error handling in sat firmware. Incremented version of Alpine Linux to 3.13.2 to address a security vulnerability. Other Notable Changes Ansible has been removed from the cray-sat container image. Support for the Firmware Update Service (FUS) has been removed from the sat firmware command. " +}, +{ + "uri": "/docs-sat/en-25/uninstall_and_downgrade/", + "title": "SAT Uninstall and Downgrade", + "tags": [], + "description": "", + "content": "SAT Uninstall and Downgrade Uninstall: Remove a Version of SAT This procedure can be used to uninstall a version of SAT.\nPrerequisites Only versions 2.2 or newer of SAT can be uninstalled with prodmgr. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+-------------------+-----------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+-------------------+-----------------------+ | sat | 2.3.3 | - | - | | sat | 2.2.10 | - | - | +--------------+-----------------+-------------------+-----------------------+ Use prodmgr to uninstall a version of SAT.\nThis command will do three things:\nRemove all hosted-type package repositories associated with the given version of SAT. Group-type repositories are not removed. Remove all container images associated with the given version of SAT. Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up in the output of sat showrev. ncn-m001# prodmgr uninstall sat 2.2.10 Repository sat-2.2.10-sle-15sp2 has been removed. Removed Docker image cray/cray-sat:3.9.0 Removed Docker image cray/sat-cfs-install:1.0.2 Removed Docker image cray/sat-install-utility:1.4.0 Deleted sat-2.2.10 from product catalog. Downgrade: Switch Between SAT Versions This procedure can be used to downgrade the active version of SAT.\nPrerequisites Only versions 2.2 or newer of SAT can be switched. Older versions must be switched manually. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT.\nncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------------------+-----------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+--------------------+-----------------------+ | sat | 2.3.3 | - | - | | sat | 2.2.10 | - | - | +--------------+-----------------+--------------------+-----------------------+ Use prodmgr to switch to a different version of SAT.\nThis command will do two things:\nFor all hosted-type package repositories associated with this version of SAT, set them as the sole member of their corresponding group-type repository. For example, switching to SAT version 2.2.10 sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group. Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are associated with NCNs with the role \u0026ldquo;Management\u0026rdquo; and subrole \u0026ldquo;Master\u0026rdquo; (for example, the CFS configuration management-23.5.0). Specifically, it will ensure that the layer refers to the version of SAT CFS configuration content associated with the version of SAT to which you are switching. ncn-m001# prodmgr activate sat 2.5.15 Repository sat-2.5.15-sle-15sp4 is now the default in sat-sle-15sp4. Updated CFS configurations: [management-23.5.0] Apply the modified CFS configuration to the management NCNs.\nAt this point, Nexus package repositories have been modified to set a particular package repository as active, but the SAT package may not have been updated on management NCNs.\nTo ensure that management NCNs have been updated to use the active SAT version, follow the Procedure to Apply CFS Configuration.\nProcedure to Apply CFS Configuration Set an environment variable that refers to the name of the CFS configuration to be applied to the management NCNs.\nncn-m001# export CFS_CONFIG_NAME=\u0026#34;management-23.5.0\u0026#34; Note: Refer to the output from the prodmgr activate command to find the name of the modified CFS configuration. If more than one CFS configuration was modified, use the first one.\nINFO: Successfully saved CFS configuration \u0026#34;management-23.5.0\u0026#34; Obtain the name of the CFS configuration layer for SAT and save it in an environment variable:\nncn-m001# export SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \\ | jq -r \u0026#39;.layers | map(select(.cloneUrl | contains(\u0026#34;sat-config-management.git\u0026#34;)))[0].name\u0026#39;) Create a CFS session that executes only the SAT layer of the given CFS configuration.\nThe --configuration-limit option limits the configuration session to run only the SAT layer of the configuration.\nncn-m001# cray cfs sessions create --name \u0026#34;sat-session-${CFS_CONFIG_NAME}\u0026#34; --configuration-name \\ \u0026#34;${CFS_CONFIG_NAME}\u0026#34; --configuration-limit \u0026#34;${SAT_LAYER_NAME}\u0026#34; Monitor the progress of the CFS session.\nSet an environment variable to name of the Ansible container within the pod for the CFS session:\nncn-m001# export ANSIBLE_CONTAINER=$(kubectl get pod -n services \\ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \\ -o json | jq -r \u0026#39;.items[0].spec.containers | map(select(.name | contains(\u0026#34;ansible\u0026#34;))) | .[0].name\u0026#39;) Next, get the logs for the Ansible container.\nncn-m001# kubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \\ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} Ansible plays, which are run by the CFS session, will install SAT on all the master management NCNs on the system. A summary of results can be found at the end of the log output. The following example shows a successful session.\n... PLAY RECAP ********************************************************************* x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 Note: Ensure that the PLAY RECAPs for each session show successes for all manager NCNs before proceeding.\nVerify that SAT was successfully configured.\nIf sat is configured, the --version command will indicate which version is installed. If sat is not properly configured, the command will fail.\nNote: This version number will differ from the version number of the SAT release distribution. This is the semantic version of the sat Python package, which is different from the version number of the overall SAT release distribution.\nncn-m001# sat --version sat 3.7.0 Note: Upon first running sat, you may see additional output while the sat container image is downloaded. This will occur the first time sat is run on each manager NCN. For example, if you run sat for the first time on ncn-m001 and then for the first time on ncn-m002, you will see this additional output both times.\nTrying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037... Getting image source signatures Copying blob da64e8df3afc done Copying blob 0f36fd81d583 done Copying blob 12527cf455ba done ... sat 3.7.0 Stop the typescript.\nncn-m001# exit SAT version x.y.z is now installed and configured:\nThe SAT RPM package is installed on the associated NCNs. Note on Procedure to Apply CFS Configuration The previous procedure is not always necessary because the CFS Batcher service automatically detects configuration changes and will automatically create new sessions to apply configuration changes according to certain rules. For more information on these rules, refer to Configuration Management with the CFS Batcher in the Cray System Management Documentation.\nThe main scenario in which the CFS batcher will not automatically re-apply the SAT layer is when the commit hash of the sat-config-management git repository has not changed between SAT versions. The previous procedure ensures the configuration is re-applied in all cases, and it is harmless if the batcher has already applied an updated configuration.\n" +}, +{ + "uri": "/docs-sat/en-25/upgrade/", + "title": "SAT Upgrade", + "tags": [], + "description": "", + "content": "SAT Upgrade Install and Upgrade Framework The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM. IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product.\nThis document does not replicate install, upgrade, or deployment procedures detailed in the Cray System Management Documentation. This document provides details regarding software and configuration content specific to SAT which is needed when installing, upgrading, or deploying a SAT release. The Cray System Management Documentation will indicate when sections of this document should be referred to for detailed information.\nIUF will perform the following tasks for a release of SAT.\nIUF deliver-product stage: Uploads SAT configuration content to VCS Uploads SAT information to the CSM product catalog Uploads SAT content to Nexus repositories IUF update-vcs-config stage: Updates the VCS integration branch with new SAT configuration content if a working branch is specified IUF update-cfs-config stage: Creates a new CFS configuration for management nodes with new SAT configuration content IUF prepare-images stage: Creates updated management NCN and managed node images with new SAT content IUF management-nodes-rollout stage: Boots management NCNs with an image containing new SAT content IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF section of the Cray System Management Documentation describes how to use these tools directly if it is desirable to use them instead of IUF.\nIUF Stage Details for SAT This section describes SAT details that an administrator must be aware of before running IUF stages. Entries are prefixed with Information if no administrative action is required or Action if an administrator needs to perform tasks outside of IUF.\nupdate-vcs-config Information: This stage is only run if a VCS working branch is specified for SAT. By default, SAT does not create or specify a VCS working branch.\nupdate-cfs-config Information: This stage only applies to the management configuration and not to the managed configuration.\nprepare-images Information: This stage only applies to management images and not to managed images.\nPost-Upgrade Procedures After upgrading SAT with IUF, it is recommended that you complete the following procedures before using SAT:\nRemove Obsolete Configuration File Sections Update SAT Logging Set System Revision Information Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace x.y.z with the version of the SAT product stream being upgraded. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. Remove Obsolete Configuration File Sections After upgrading SAT, if using the configuration file from a previous version, there may be configuration file sections no longer used in the new version. For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. In that case, the following warning may appear upon running sat commands.\nWARNING: Ignoring unknown section \u0026#39;redfish\u0026#39; in config file. Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.\n[redfish] username = \u0026#34;admin\u0026#34; password = \u0026#34;adminpass\u0026#34; Repeat this process for any configuration file sections for which there are \u0026ldquo;unknown section\u0026rdquo; warnings.\nUpdate SAT Logging As of SAT version 2.2, some command output that was previously printed to stdout is now logged to stderr. These messages are logged at the INFO level. The default logging threshold was changed from WARNING to INFO to accommodate this logging change. Additionally, some messages previously logged at the INFO are now logged at the DEBUG level.\nThese changes take effect automatically. However, if the default output threshold has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure that important output is shown in the terminal.\nUpdate Configuration In the following example, the stderr log level, logging.stderr_level, is set to WARNING, which will exclude INFO-level logging from terminal output.\nncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml [logging] ... stderr_level = \u0026#34;WARNING\u0026#34; To enable the new default behavior, comment this line out, delete it, or set the value to \u0026ldquo;INFO\u0026rdquo;.\nIf logging.stderr_level is commented out, its value will not affect logging behavior. However, it may be helpful to set its value to INFO as a reminder of the new default behavior.\nAffected Commands The following commands trigger messages that have been changed from stdout print calls to INFO-level (or WARNING- or ERROR-level) log messages:\nsat bootsys --stage shutdown --stage session-checks sat sensors The following commands trigger messages that have been changed from INFO-level log messages to DEBUG-level log messages:\nsat nid2xname sat xname2nid sat swap Set System Revision Information HPE service representatives use system revision information data to identify systems in support cases.\nPrerequisites SAT authentication has been set up during installation. See Authenticate SAT Commands. S3 credentials have been generated during installation. See Generate SAT S3 Credentials. Notes on the Procedure This procedure is not required if SAT was upgraded from 2.1 (Shasta v1.5) or later. It is required if SAT was upgraded from 2.0 (Shasta v1.4) or earlier.\nProcedure Set System Revision Information.\nRun sat setrev and follow the prompts to set the following site-specific values:\nSerial number System name System type System description Product number Company name Site name Country code System install date Tip: For \u0026ldquo;System type\u0026rdquo;, a system with any liquid-cooled components should be considered a liquid-cooled system. In other words, \u0026ldquo;System type\u0026rdquo; is EX-1C.\nncn-m001# sat setrev -------------------------------------------------------------------------------- Setting: Serial number Purpose: System identification. This will affect how snapshots are identified in the HPE backend services. Description: This is the top-level serial number which uniquely identifies the system. It can be requested from an HPE representative. Valid values: Alpha-numeric string, 4 - 20 characters. Type: \u0026lt;class \u0026#39;str\u0026#39;\u0026gt; Default: None Current value: None -------------------------------------------------------------------------------- Please do one of the following to set the value of the above setting: - Input a new value - Press CTRL-C to exit ... Verify System Revision Information.\nRun sat showrev and verify the output shown in the \u0026ldquo;System Revision Information table.\u0026rdquo;\nThe following example shows sample table output.\nncn-m001# sat showrev ################################################################################ System Revision Information ################################################################################ +---------------------+---------------+ | component | data | +---------------------+---------------+ | Company name | HPE | | Country code | US | | Interconnect | Sling | | Product number | R4K98A | | Serial number | 12345 | | Site name | HPE | | Slurm version | slurm 20.02.5 | | System description | Test System | | System install date | 2021-01-29 | | System name | eniac | | System type | EX-1C | +---------------------+---------------+ ################################################################################ Product Revision Information ################################################################################ +--------------+-----------------+------------------------------+------------------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+------------------------------+------------------------------+ | csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... | | sat | 2.0.1 | - | - | | sdu | 1.0.8 | - | - | | slingshot | 0.8.0 | - | - | | sma | 1.4.12 | - | - | +--------------+-----------------+------------------------------+------------------------------+ ################################################################################ Local Host Operating System ################################################################################ +-----------+----------------------+ | component | version | +-----------+----------------------+ | Kernel | 5.3.18-24.15-default | | SLES | SLES 15-SP2 | +-----------+----------------------+ " +}, +{ + "uri": "/docs-sat/en-25/categories/", + "title": "Categories", + "tags": [], + "description": "", + "content": "" +}, +{ + "uri": "/docs-sat/en-25/tags/", + "title": "Tags", + "tags": [], + "description": "", + "content": "" +}] \ No newline at end of file diff --git a/en-25/index.xml b/en-25/index.xml new file mode 100644 index 0000000000..57669d4497 --- /dev/null +++ b/en-25/index.xml @@ -0,0 +1,145 @@ + + + + HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + /docs-sat/en-25/ + Recent content in HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-25 + Wed, 11 Dec 2024 03:40:01 +0000 + + + SAT Installation + /docs-sat/en-25/install/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/install/ + SAT Installation Install and Upgrade Framework The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM. IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product. + + + Change the BOS Version + /docs-sat/en-25/usage/change_bos_version/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/usage/change_bos_version/ + Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version two (v2). You can select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command. You can also configure the BOS version to use in the SAT config file. Do this under the api_version setting in the bos section of the config file. + + + Changes in SAT 2.2 + /docs-sat/en-25/release_notes/sat_2.2_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/release_notes/sat_2.2_release_notes/ + Changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022. This version of the SAT product included: Version 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components: Version 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release. + + + Introduction to SAT + /docs-sat/en-25/introduction/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/introduction/ + Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview. + + + SAT Grafana Dashboards + /docs-sat/en-25/dashboards/sat_grafana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/dashboards/sat_grafana_dashboards/ + SAT Grafana Dashboards The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through Redfish. The messages are displayed based on severity. Grafana can be accessed via web browser at the following URL: https://sma-grafana.cmn.&lt;site-domain&gt; The value of site-domain can be obtained as follows: ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath=&#39;{.data.customizations\.yaml}&#39; | \ base64 -d | grep &#34;external:&#34; That command will produce the following output, for example: + + + SAT and IUF + /docs-sat/en-25/usage/sat_and_iuf/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/usage/sat_and_iuf/ + SAT and IUF The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM with the help of sat bootprep. Outside of IUF, it is uncommon to use sat bootprep. For more information on IUF, see the IUF section of the Cray System Management Documentation. For more information on sat bootprep, see SAT Bootprep. Variable Substitutions Both IUF and sat bootprep allow variable substitutions into the default HPC CSM Software Recipe bootprep input files. + + + Changes in SAT 2.3 + /docs-sat/en-25/release_notes/sat_2.3_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/release_notes/sat_2.3_release_notes/ + Changes in SAT 2.3 The 2.3.4 version of the SAT product includes: Version 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None. Current Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share. + + + SAT Kibana Dashboards + /docs-sat/en-25/dashboards/sat_kibana_dashboards/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/dashboards/sat_kibana_dashboards/ + SAT Kibana Dashboards Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in this way breaks down the complexity of large data volumes into easily understood information. + + + SAT Bootprep + /docs-sat/en-25/usage/sat_bootprep/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/usage/sat_bootprep/ + SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates. The solution is based on a given input file that defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similar to other SAT commands. ncn-m001# sat-man sat-bootprep The sat bootprep command helps the Install and Upgrade Framework (IUF) install, upgrade, and deploy products on systems managed by CSM. + + + Changes in SAT 2.4 + /docs-sat/en-25/release_notes/sat_2.4_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/release_notes/sat_2.4_release_notes/ + Changes in SAT 2.4 The 2.4.13 version of the SAT product includes: Version 3.19.3 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.5.5 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. Because of installation refactoring efforts, the following two components are no longer delivered with SAT: sat-cfs-install container image sat-cfs-install Helm chart Inclusion of SAT in CSM A version of the cray-sat container image is now included in CSM. + + + SAT on an External System + /docs-sat/en-25/external_system/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/external_system/ + SAT on an External System SAT can optionally be installed and configured on an external system to interact with CSM over the CAN. Limitations Most SAT subcommands work by accessing APIs which are reachable via the CAN. However, certain SAT commands depend on host-based functionality on the management NCNs and will not work from an external system. This includes the following: The platform-services and ncn-power stages of sat bootsys The local host information displayed by the --local option of sat showrev Installing SAT on an external system is not an officially supported configuration. + + + Changes in SAT 2.5 + /docs-sat/en-25/release_notes/sat_2.5_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/sat_2.5_release_notes/ + Changes in SAT 2.5 The 2.5.22 version of the SAT product includes: Version 3.21.9 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.2 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. New sat Commands sat jobstat allows you to access application and job data through the command line. It provides a table summarizing information for all jobs on the system. + + + SAT Changes in Shasta v1.3.2 + /docs-sat/en-25/release_notes/shasta_1.3.2_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.3.2_release_notes/ + SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI. The following sections detail the changes in this release. sat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable. The sat swap switch command is equivalent to sat switch. + + + SAT Changes in Shasta v1.3 + /docs-sat/en-25/release_notes/shasta_1.3_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.3_release_notes/ + SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI. This version of the sat CLI contained the following commands: auth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch For more information on each of these commands, see the SAT Command Overview and the table of commands in the Authenticate SAT Commands section of this document. + + + SAT Changes in Shasta v1.4.1 + /docs-sat/en-25/release_notes/shasta_1.4.1_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.4.1_release_notes/ + SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1. This version of the SAT product included: Version 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release. New Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames: sat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API. + + + SAT Changes in Shasta v1.4 + /docs-sat/en-25/release_notes/shasta_1.4_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.4_release_notes/ + SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4. This version of the SAT product included the following components: Version 3.4.0 of the sat python package and CLI It also added the following new component: Version 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release. + + + SAT Changes in Shasta v1.5 + /docs-sat/en-25/release_notes/shasta_1.5_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.5_release_notes/ + SAT Changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5. This version of the SAT product included: Version 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component: Version 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release. Install Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. + + + SAT Uninstall and Downgrade + /docs-sat/en-25/uninstall_and_downgrade/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/uninstall_and_downgrade/ + SAT Uninstall and Downgrade Uninstall: Remove a Version of SAT This procedure can be used to uninstall a version of SAT. Prerequisites Only versions 2.2 or newer of SAT can be uninstalled with prodmgr. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure Use sat showrev to list versions of SAT. ncn-m001# sat showrev --products --filter product_name=sat ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+-------------------+-----------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+-------------------+-----------------------+ | sat | 2. + + + SAT Upgrade + /docs-sat/en-25/upgrade/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/upgrade/ + SAT Upgrade Install and Upgrade Framework The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM. IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product. + + + diff --git a/en-25/install/index.html b/en-25/install/index.html new file mode 100644 index 0000000000..16ca47dc3a --- /dev/null +++ b/en-25/install/index.html @@ -0,0 +1,1440 @@ + + + + + + + + + + + + SAT Installation :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Installation

+

Install and Upgrade Framework

+

The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM. IUF capabilities are +described in detail in the IUF +section of the +Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.

+

This document does not replicate install, upgrade, or deployment procedures +detailed in the Cray System Management +Documentation. This document provides +details regarding software and configuration content specific to SAT which is +needed when installing, upgrading, or deploying a SAT release. The Cray +System Management Documentation will +indicate when sections of this document should be referred to for detailed +information.

+

IUF will perform the following tasks for a release of SAT.

+
    +
  • IUF deliver-product stage: +
      +
    • Uploads SAT configuration content to VCS
    • +
    • Uploads SAT information to the CSM product catalog
    • +
    • Uploads SAT content to Nexus repositories
    • +
    +
  • +
  • IUF update-vcs-config stage: +
      +
    • Updates the VCS integration branch with new SAT configuration content if a +working branch is specified
    • +
    +
  • +
  • IUF update-cfs-config stage: +
      +
    • Creates a new CFS configuration for management nodes with new SAT configuration content
    • +
    +
  • +
  • IUF prepare-images stage: +
      +
    • Creates updated management NCN and managed node images with new SAT content
    • +
    +
  • +
  • IUF management-nodes-rollout stage: +
      +
    • Boots management NCNs with an image containing new SAT content
    • +
    +
  • +
+

IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF +section of the +Cray System Management Documentation +describes how to use these tools directly if it is desirable to use them +instead of IUF.

+

IUF Stage Details for SAT

+

This section describes SAT details that an administrator must be aware of +before running IUF stages. Entries are prefixed with Information if no +administrative action is required or Action if an administrator needs +to perform tasks outside of IUF.

+

update-vcs-config

+

Information: This stage is only run if a VCS working branch is specified for +SAT. By default, SAT does not create or specify a VCS working branch.

+

update-cfs-config

+

Information: This stage only applies to the management configuration and +not to the managed configuration.

+

prepare-images

+

Information: This stage only applies to management images and not to +managed images.

+

Post-Installation Procedures

+

After installing SAT with IUF, you must complete the following SAT configuration +procedures before using SAT:

+ +

Notes on the Procedures

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace x.y.z with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
+

Authenticate SAT Commands

+

To run SAT commands on the manager NCNs, you must first set up authentication +to the API gateway. The admin account used to authenticate with sat auth +must be enabled in Keycloak and must have its assigned role set to admin. +For more information on editing Role Mappings, see Create Internal User Accounts +in the Keycloak Shasta Realm in the Cray System Management +Documentation. For more information on +authentication types and authentication credentials, see SAT Command +Authentication.

+

Prerequisites

+ +

Procedure

+

The following is the procedure to globally configure the username used by SAT and +authenticate to the API gateway.

+
    +
  1. +

    Generate a default SAT configuration file, if one does not exist.

    +
    ncn-m001# sat init
    +Configuration file "/root/.config/sat/sat.toml" generated.
    +

    Note: If the config file already exists, it will print out the following +error.

    +
    ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
    +Not generating configuration file.
    +
  2. +
  3. +

    Edit ~/.config/sat/sat.toml and set the username option in the api_gateway +section of the config file.

    +
    username = "crayadmin"
    +
  4. +
  5. +

    Run sat auth. Enter your password when prompted.

    +
    ncn-m001# sat auth
    +Password for crayadmin:
    +Succeeded!
    +
  6. +
  7. +

    Other sat commands are now authenticated to make requests to the API gateway.

    +
    ncn-m001# sat status
    +
  8. +
+

Generate SAT S3 Credentials

+

Generate S3 credentials and write them to a local file so the SAT user can access +S3 storage. In order to use the SAT S3 bucket, the System Administrator must +generate the S3 access key and secret keys and write them to a local file. This +must be done on every Kubernetes master node where SAT commands are run.

+

SAT uses S3 storage for several purposes, most importantly to store the +site-specific information set with sat setrev (see Set System Revision +Information).

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Ensure the files are readable only by root.

    +
    ncn-m001# touch /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
    ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
  2. +
  3. +

    Write the credentials to local files using kubectl.

    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.access_key}' | base64 -d > \
    +    /root/.config/sat/s3_access_key
    +
    ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.secret_key}' | base64 -d > \
    +    /root/.config/sat/s3_secret_key
    +
  4. +
  5. +

    Verify the S3 endpoint specified in the SAT configuration file is correct.

    +
      +
    1. +

      Get the SAT configuration file’s endpoint value.

      +

      Note: If the command’s output is commented out, indicated by an initial # +character, the SAT configuration will take the default value – "https://rgw-vip.nmn".

      +
      ncn-m001# grep endpoint ~/.config/sat/sat.toml
      +# endpoint = "https://rgw-vip.nmn"
      +
    2. +
    3. +

      Get the sat-s3-credentials secret’s endpoint value.

      +
      ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
      +    jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
      +https://rgw-vip.nmn
      +
    4. +
    5. +

      Compare the two endpoint values.

      +

      If the values differ, change the SAT configuration file’s endpoint value to +match the secret’s.

      +
    6. +
    +
  6. +
  7. +

    Copy SAT configurations to each manager node on the system.

    +
    ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
    +    mkdir -p /root/.config/sat; \
    +    scp -pr /root/.config/sat ${i}:/root/.config; done
    +

    Note: Depending on how many manager nodes are on the system, the list of +manager nodes may be different. This example assumes three manager nodes, where +the configuration files must be copied from ncn-m001 to ncn-m002 and +ncn-m003. Therefore, the list of hosts above is ncn-m002 and ncn-m003.

    +
  8. +
+

Set System Revision Information

+

HPE service representatives use system revision information data to identify +systems in support cases.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    Set System Revision Information.

    +

    Run sat setrev and follow the prompts to set the following site-specific values:

    +
      +
    • Serial number
    • +
    • System name
    • +
    • System type
    • +
    • System description
    • +
    • Product number
    • +
    • Company name
    • +
    • Site name
    • +
    • Country code
    • +
    • System install date
    • +
    +

    Tip: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.

    +
    ncn-m001# sat setrev
    +--------------------------------------------------------------------------------
    +Setting:        Serial number
    +Purpose:        System identification. This will affect how snapshots are
    +                identified in the HPE backend services.
    +Description:    This is the top-level serial number which uniquely identifies
    +                the system. It can be requested from an HPE representative.
    +Valid values:   Alpha-numeric string, 4 - 20 characters.
    +Type:           <class 'str'>
    +Default:        None
    +Current value:  None
    +--------------------------------------------------------------------------------
    +Please do one of the following to set the value of the above setting:
    +    - Input a new value
    +    - Press CTRL-C to exit
    +...
    +
  2. +
  3. +

    Verify System Revision Information.

    +

    Run sat showrev and verify the output shown in the “System Revision Information table.”

    +

    The following example shows sample table output.

    +
    ncn-m001# sat showrev
    +################################################################################
    +System Revision Information
    +################################################################################
    ++---------------------+---------------+
    +| component           | data          |
    ++---------------------+---------------+
    +| Company name        | HPE           |
    +| Country code        | US            |
    +| Interconnect        | Sling         |
    +| Product number      | R4K98A        |
    +| Serial number       | 12345         |
    +| Site name           | HPE           |
    +| Slurm version       | slurm 20.02.5 |
    +| System description  | Test System   |
    +| System install date | 2021-01-29    |
    +| System name         | eniac         |
    +| System type         | EX-1C         |
    ++---------------------+---------------+
    +################################################################################
    +Product Revision Information
    +################################################################################
    ++--------------+-----------------+------------------------------+------------------------------+
    +| product_name | product_version | images                       | image_recipes                |
    ++--------------+-----------------+------------------------------+------------------------------+
    +| csm          | 0.8.14          | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
    +| sat          | 2.0.1           | -                            | -                            |
    +| sdu          | 1.0.8           | -                            | -                            |
    +| slingshot    | 0.8.0           | -                            | -                            |
    +| sma          | 1.4.12          | -                            | -                            |
    ++--------------+-----------------+------------------------------+------------------------------+
    +################################################################################
    +Local Host Operating System
    +################################################################################
    ++-----------+----------------------+
    +| component | version              |
    ++-----------+----------------------+
    +| Kernel    | 5.3.18-24.15-default |
    +| SLES      | SLES 15-SP2          |
    ++-----------+----------------------+
    +
  4. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/introduction/index.html b/en-25/introduction/index.html new file mode 100644 index 0000000000..60ec49377a --- /dev/null +++ b/en-25/introduction/index.html @@ -0,0 +1,1606 @@ + + + + + + + + + + + + Introduction to SAT :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Introduction to SAT

+

About System Admin Toolkit (SAT)

+

The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.

+

SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview.

+

Six Kibana Dashboards are included with SAT. They provide organized output for system health information.

+ +

Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.

+ +

In CSM 1.3 and newer, the sat command is automatically available on all the +Kubernetes NCNs. For more information, see SAT in CSM. Older +versions of CSM do not have the sat command automatically available, and SAT +must be installed as a separate product.

+

SAT Command Overview

+

Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.

+

SAT Command Line Utility

+

The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes +(ncn-m nodes).

+

It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.

+

SAT Commands

+

The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.

+

SAT Container Environment

+

The sat command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on +Kubernetes manager nodes. A few important points about the SAT container environment include the following:

+
    +
  • Using either sat or sat bash always launches a container.
  • +
  • The SAT container does not have access to the NCN file system.
  • +
+

There are two ways to run sat.

+
    +
  • Interactive: Launching a container using sat bash, followed by a sat command.
  • +
  • Non-interactive: Running a sat command directly on a Kubernetes manager node.
  • +
+

In both of these cases, a container is launched in the background to execute the command. The first option, running +sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the +container is launched, executes the command, and upon the command’s completion the container exits. The following two +examples show the same action, checking the system status, using interactive and non-interactive modes.

+

Interactive

+
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+

Non-interactive

+
ncn-m001# sat status
+

Interactive Advantages

+

Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral +container storage. If multiple sat commands are being run in succession, then use sat bash to launch the +container beforehand. This will save time because the container does not need to be launched for each sat command.

+

Non-interactive Advantages

+

The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of +several steps that need to be executed from a management NCN.

+

Man Pages - Interactive and Non-interactive Modes

+

To view a sat man page from a Kubernetes manager node, use sat-man on the manager node as shown in the following +example.

+
ncn-m001# sat-man status
+

A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed +either with man sat or man sat-podman from the manager node.

+
ncn-m001# man sat
+
ncn-m001# man sat-podman
+

SAT Command Authentication

+

Some SAT subcommands make requests to the Shasta services through the API +gateway and thus require authentication to the API gateway in order to function. +Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to +be configured. In order to use the SAT S3 bucket, the System Administrator must +generate the S3 access key and secret keys and write them to a local file. This +must be done on every Kubernetes manager node where SAT commands are run.

+

For more information on authentication requests, see System Security and +Authentication in the Cray System Management +Documentation. The following is a table +describing SAT commands and the types of authentication they require.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SAT SubcommandAuthentication/Credentials RequiredMan PageDescription
sat authResponsible for authenticating to the API gateway and storing a token.sat-authAuthenticate to the API gateway and save the token.
sat bmccredsRequires authentication to the API gateway.sat-bmccredsSet BMC passwords.
sat bootprepRequires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install.sat-bootprepPrepare to boot nodes with images and configurations.
sat bootsysRequires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages.sat-bootsysBoot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software.
sat diagRequires authentication to the API gateway.sat-diagLaunch diagnostics on the HSN switches and generate a report.
sat firmwareRequires authentication to the API gateway.sat-firmwareReport firmware version.
sat hwhistRequires authentication to the API gateway.sat-hwhistReport hardware component history.
sat hwinvRequires authentication to the API gateway.sat-hwinvGive a listing of the hardware of the HPE Cray EX system.
sat hwmatchRequires authentication to the API gateway.sat-hwmatchReport hardware mismatches.
sat initNonesat-initCreate a default SAT configuration file.
sat jobstatRequires authentication to the API gateway.sat-jobstatCheck the status of jobs and applications.
sat k8sRequires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install.sat-k8sReport on Kubernetes replica sets that have co-located (on the same node) replicas.
sat linkhealthThis command has been deprecated.
sat nid2xnameRequires authentication to the API gateway.sat-nid2xnameTranslate node IDs to node XNames.
sat sensorsRequires authentication to the API gateway.sat-sensorsReport current sensor data.
sat setrevRequires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-setrevSet HPE Cray EX system revision information.
sat showrevRequires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-showrevPrint revision information for the HPE Cray EX system.
sat slscheckRequires authentication to the API gateway.sat-slscheckPerform a cross-check between SLS and HSM.
sat statusRequires authentication to the API gateway.sat-statusReport node status across the HPE Cray EX system.
sat swapRequires authentication to the API gateway.sat-swapPrepare HSN switch or cable for replacement and bring HSN switch or cable into service.
sat xname2nidRequires authentication to the API gateway.sat-xname2nidTranslate node and node BMC XNames to node IDs.
sat switchThis command has been deprecated. It has been replaced by sat swap.
+

In order to authenticate to the API gateway, you must run the sat auth +command. This command will prompt for a password on the command line. The +username value is obtained from the following locations, in order of higher +precedence to lower precedence:

+
    +
  • The --username global command-line option.
  • +
  • The username option in the api_gateway section of the config file at +~/.config/sat/sat.toml.
  • +
  • The name of currently logged in user running the sat command.
  • +
+

If credentials are entered correctly when prompted by sat auth, a token file +will be obtained and saved to ~/.config/sat/tokens. Subsequent sat commands +will determine the username the same way as sat auth described above and will +use the token for that username if it has been obtained and saved by sat auth.

+

Command Prompt Conventions in SAT

+

The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.

+
    +
  • The root or super-user account always has the # character at the end of the prompt and has the host name of the +host in the prompt.
  • +
  • Any non-root account is indicated with account@hostname>. A user account that is neither root nor crayadm is +referred to as user.
  • +
  • The command prompt inside the SAT container environment is indicated with the string as follows. It also has the “#” +character at the end of the prompt.
  • +
+ + + + + + + + + + + + + + + + + +
Command PromptMeaning
ncn-m001#Run on one of the Kubernetes Manager servers. (Non-interactive)
(CONTAINER_ID) sat-container#Run the command inside the SAT container environment by first running sat bash. (Interactive)
+

Here are examples of the sat status command used by an administrator.

+
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+

SAT in CSM

+

In CSM 1.3 and newer, the sat command is automatically available on all the Kubernetes NCNs, but it is still possible +to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the +sat command available in CSM. Installing the SAT product stream allows additional supporting components to be added:

+
    +
  • +

    An entry for SAT in the cray-product-catalog Kubernetes ConfigMap is only created by installing the SAT product +stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev.

    +
  • +
  • +

    The sat-install-utility container image is only available with the full SAT product stream. This container image +provides uninstall and downgrade functionality when used with the prodmgr command. (In SAT 2.3 and older, SAT was +only available to install as a separate product stream. Because these versions were packaged with +sat-install-utility, it is still possible to uninstall these versions of SAT.)

    +
  • +
  • +

    The docs-sat RPM package is only available with the full SAT product stream.

    +
  • +
  • +

    The sat-config-management git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is +only available with the full SAT product stream.

    +
  • +
+

If the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS +configurations that apply to management NCNs (for example, management-23.5.0) should not include a SAT layer.

+

The SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the +Keycloak username that authenticates to the API gateway cannot be read by users other than root. Specifically, it +it does the following:

+
    +
  • +

    Modifies the sat.toml configuration file which contains the username so that it is only readable by root.

    +
  • +
  • +

    Modifies the /root/.config/sat/tokens directory so that the directory is only readable by root. This is needed +because the names of the files within the tokens directory contain the username.

    +
  • +
+

Regardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other +users. These permission changes only apply to files created by previous installations of SAT. In the current version of +SAT all files and directories are created with the appropriate permissions.

+

SAT Dependencies

+

Most sat subcommands depend on services or components from other products in the +HPE Cray EX (Shasta) software stack. The following list shows these dependencies +for each subcommand. Each service or component is listed under the product it belongs to.

+

sat auth

+

CSM

+
    +
  • Keycloak
  • +
+

sat bmccreds

+

CSM

+
    +
  • System Configuration Service (SCSD)
  • +
+

sat bootprep

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Configuration Framework Service (CFS)
  • +
  • Image Management Service (IMS)
  • +
  • Version Control Service (VCS)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat bootsys

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Cray Advanced Platform Monitoring and Control (CAPMC)
  • +
  • Ceph
  • +
  • Compute Rolling Upgrade Service (CRUS)
  • +
  • Etcd
  • +
  • Firmware Action Service (FAS)
  • +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

COS

+
    +
  • Node Memory Dump (NMD)
  • +
+

sat diag

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

CSM-Diags

+
    +
  • Fox
  • +
+

sat firmware

+

CSM

+
    +
  • Firmware Action Service (FAS)
  • +
+

sat hwhist

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwinv

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwmatch

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat init

+

None

+

sat jobstat

+

PBS

+
    +
  • HPE State Checker
  • +
+

sat k8s

+

CSM

+
    +
  • Kubernetes
  • +
+

sat nid2xname

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat sensors

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • HM Collector
  • +
+

SMA

+
    +
  • Telemetry API
  • +
+

sat setrev

+

CSM

+
    +
  • S3
  • +
+

sat showrev

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat slscheck

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat status

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat swap

+

Slingshot

+
    +
  • Fabric Manager
  • +
+

sat switch

+

Deprecated: See sat swap

+

sat xname2nid

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/index.html b/en-25/release_notes/index.html new file mode 100644 index 0000000000..adc938f153 --- /dev/null +++ b/en-25/release_notes/index.html @@ -0,0 +1,1163 @@ + + + + + + + + + + + + SAT Release Notes :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Release Notes

+

Changes in SAT Version 2.x

+ +

SAT Changes in Shasta Version 1.x

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/index.xml b/en-25/release_notes/index.xml new file mode 100644 index 0000000000..6942ad2ee6 --- /dev/null +++ b/en-25/release_notes/index.xml @@ -0,0 +1,75 @@ + + + + SAT Release Notes on System Admin Toolkit (SAT) + /docs-sat/en-25/release_notes/ + Recent content in SAT Release Notes on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-25 + Wed, 11 Dec 2024 03:40:01 +0000 + + + Changes in SAT 2.2 + /docs-sat/en-25/release_notes/sat_2.2_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/release_notes/sat_2.2_release_notes/ + Changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022. This version of the SAT product included: Version 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components: Version 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release. + + + Changes in SAT 2.3 + /docs-sat/en-25/release_notes/sat_2.3_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/release_notes/sat_2.3_release_notes/ + Changes in SAT 2.3 The 2.3.4 version of the SAT product includes: Version 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None. Current Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share. + + + Changes in SAT 2.4 + /docs-sat/en-25/release_notes/sat_2.4_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-25/release_notes/sat_2.4_release_notes/ + Changes in SAT 2.4 The 2.4.13 version of the SAT product includes: Version 3.19.3 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.5.5 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. Because of installation refactoring efforts, the following two components are no longer delivered with SAT: sat-cfs-install container image sat-cfs-install Helm chart Inclusion of SAT in CSM A version of the cray-sat container image is now included in CSM. + + + Changes in SAT 2.5 + /docs-sat/en-25/release_notes/sat_2.5_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/sat_2.5_release_notes/ + Changes in SAT 2.5 The 2.5.22 version of the SAT product includes: Version 3.21.9 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.2 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. New sat Commands sat jobstat allows you to access application and job data through the command line. It provides a table summarizing information for all jobs on the system. + + + SAT Changes in Shasta v1.3.2 + /docs-sat/en-25/release_notes/shasta_1.3.2_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.3.2_release_notes/ + SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI. The following sections detail the changes in this release. sat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable. The sat swap switch command is equivalent to sat switch. + + + SAT Changes in Shasta v1.3 + /docs-sat/en-25/release_notes/shasta_1.3_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.3_release_notes/ + SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI. This version of the sat CLI contained the following commands: auth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch For more information on each of these commands, see the SAT Command Overview and the table of commands in the Authenticate SAT Commands section of this document. + + + SAT Changes in Shasta v1.4.1 + /docs-sat/en-25/release_notes/shasta_1.4.1_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.4.1_release_notes/ + SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1. This version of the SAT product included: Version 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release. New Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames: sat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API. + + + SAT Changes in Shasta v1.4 + /docs-sat/en-25/release_notes/shasta_1.4_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.4_release_notes/ + SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4. This version of the SAT product included the following components: Version 3.4.0 of the sat python package and CLI It also added the following new component: Version 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release. + + + SAT Changes in Shasta v1.5 + /docs-sat/en-25/release_notes/shasta_1.5_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/release_notes/shasta_1.5_release_notes/ + SAT Changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5. This version of the SAT product included: Version 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component: Version 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release. Install Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. + + + diff --git a/en-25/release_notes/sat_2.2_release_notes/index.html b/en-25/release_notes/sat_2.2_release_notes/index.html new file mode 100644 index 0000000000..58d8a1cf51 --- /dev/null +++ b/en-25/release_notes/sat_2.2_release_notes/index.html @@ -0,0 +1,1269 @@ + + + + + + + + + + + + Changes in SAT 2.2 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.2

+

SAT 2.2.16 was released on February 25th, 2022.

+

This version of the SAT product included:

+
    +
  • Version 3.14.0 of the sat python package and CLI
  • +
  • Version 1.6.4 of the sat-podman wrapper script
  • +
  • Version 1.0.4 of the sat-cfs-install container image and Helm chart
  • +
+

It also added the following new components:

+
    +
  • Version 1.4.3 of the sat-install-utility container image
  • +
  • Version 2.0.2 of the cfs-config-util container image
  • +
+

The following sections detail the changes in this release.

+

Known Issues in SAT 2.2

+

sat Command Unavailable in sat bash Shell

+

After launching a shell within the SAT container with sat bash, the sat +command will not be found. For example:

+
(CONTAINER-ID) sat-container:~ # sat status
+bash: sat: command not found
+

This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be +prepended to the $PATH environment variable:

+
(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH
+(CONTAINER-ID) sat-container:~ # sat status
+

Or, the file /sat/venv/bin/activate may be sourced:

+
(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate
+(CONTAINER-ID) sat-container:~ # sat status
+

Tab Completion Unavailable in sat bash Shell

+

After launching a shell within the SAT container with sat bash, tab completion +for sat commands does not work.

+

This can be resolved temporarily by sourcing the file +/etc/bash_completion.d/sat-completion.bash:

+
source /etc/bash_completion.d/sat-completion.bash
+

OCI Runtime Permission Error when Running sat in Root Directory

+

sat commands will not work if the current directory is /. For example:

+
ncn-m001:/ # sat --help
+Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+

To resolve, run sat in another directory.

+

Duplicate Mount Error when Running sat in Config Directory

+

sat commands will not work if the current directory is ~/.config/sat. +For example:

+
ncn-m001:~/.config/sat # sat --help
+Error: /root/.config/sat: duplicate mount destination
+

To resolve, run sat in another directory.

+

New sat Commands

+
    +
  • sat bootprep automates the creation of CFS configurations, the build and +customization of IMS images, and the creation of BOS session templates. For +more information, see SAT Bootprep.
  • +
  • sat slscheck performs a check for consistency between the System Layout +Service (SLS) and the Hardware State Manager (HSM).
  • +
  • sat bmccreds provides a simple interface for interacting with the System +Configuration Service (SCSD) to set BMC Redfish credentials.
  • +
  • sat hwhist displays hardware component history by XName (location) or by +its Field-Replaceable Unit ID (FRUID). This command queries the Hardware +State Manager (HSM) API to obtain this information. Since the sat hwhist +command supports querying for the history of a component by its FRUID, the +FRUID of components has been added to the output of sat hwinv.
  • +
+

Additional Install Automation

+

The following automation has been added to the install script, install.sh:

+
    +
  • Wait for the completion of the sat-config-import Kubernetes job, which is +started when the sat-cfs-install Helm chart is deployed.
  • +
  • Automate the modification of the CFS configuration, which applies to master +management NCNs (for example, ncn-personalization).
  • +
+

Changes to Product Catalog Data Schema

+

The SAT product uploads additional information to the cray-product-catalog +Kubernetes ConfigMap detailing the components it provides, including container +(Docker) images, Helm charts, RPMs, and package repositories.

+

This information is used to support uninstall and downgrade of SAT product +versions moving forward.

+

Support for Uninstall and Downgrade of SAT Versions

+

Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and downgrade of the SAT product stream.

+

For more information, see +Uninstall: Remove a Version of SAT and +Downgrade: Switch Between SAT Versions.

+

Improvements to sat status

+

A Subrole column has been added to the output of sat status. This allows you +to easily differentiate between master, worker, and storage nodes in the +management role, for example.

+

Hostname information from SLS has been added to sat status output.

+

Added Support for JSON Output

+

Support for JSON-formatted output has been added to commands which currently +support the --format option, such as hwinv, status, and showrev.

+

Usability Improvements

+

Many usability improvements have been made to multiple sat commands, +mostly related to filtering command output. The following are some highlights:

+
    +
  • Added --fields option to display only specific fields for subcommands which +display tabular reports.
  • +
  • Added ability to filter on exact matches of a field name.
  • +
  • Improved handling of multiple matches of a field name in --filter queries +so that the first match is used, similar to --sort-by.
  • +
  • Added support for --filter, --fields, and --reverse for summaries +displayed by sat hwinv.
  • +
  • Added borders to summary tables generated by sat hwinv.
  • +
  • Improved documentation in the man pages.
  • +
+

Default Log Level Changed

+

The default log level for stderr has been changed from “WARNING” to “INFO”. For +more information, see Update SAT Logging.

+

More Granular Log Level Configuration Options

+

With the command-line options --loglevel-stderr and --loglevel-file, the log +level can now be configured separately for stderr and the log file.

+

The existing --loglevel option is now an alias for the --loglevel-stderr +option.

+

Podman Wrapper Script Improvements

+

The Podman wrapper script is the script installed at /usr/bin/sat on the +master management NCNs by the cray-sat-podman RPM that runs the cray-sat +container in podman. The following subsections detail improvements that were +made to the wrapper script in this release.

+

Mounting of $HOME and Current Directories in cray-sat Container

+

The Podman wrapper script that launches the cray-sat container with podman +has been modified to mount the user’s current directory and home directory into +the cray-sat container to provide access to local files in the container.

+

Podman Wrapper Script Documentation Improvements

+

The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:

+
    +
  • Environment variables that affect execution of the wrapper script
  • +
  • Host files and directories mounted in the container
  • +
+

Fixes to Podman Wrapper Script Output Redirection

+

Fixed issues with redirecting stdout and stderr, and piping output to +commands, such as awk, less, and more.

+

Configurable HTTP Timeout

+

A new sat option has been added to configure the HTTP timeout length for +requests to the API gateway. For more information, refer to sat-man sat.

+

sat bootsys Improvements

+

Many improvements and fixes have been made to sat bootsys. The following are +some highlights:

+
    +
  • Added the --excluded-ncns option, which can be used to omit NCNs +from the platform-services and ncn-power stages in case they are +inaccessible.
  • +
  • Disruptive shutdown stages in sat bootsys shutdown now prompt the user to +continue before proceeding. A new option, --disruptive, will bypass this.
  • +
  • Improvements to Ceph service health checks and restart during the +platform-services stage of sat bootsys boot.
  • +
+

sat xname2nid Improvements

+

sat xname2nid can now recursively expand slot, chassis, and cabinet XNames to +a list of NIDs in those locations.

+

A new --format option has been added to sat xname2nid. It sets the output +format to either “range” (the default) or “NID”. The “range” format displays NIDs +in a compressed range format suitable for use with a workload manager like Slurm.

+

Usage of v2 HSM API

+

The commands which interact with HSM (for example, sat status and sat hwinv) +now use the v2 HSM API.

+

sat diag Limited to HSN Switches

+

sat diag will now only operate against HSN switches by default. These are the +only controllers that support running diagnostics with HMJTD.

+

sat showrev Enhancements

+

A column has been added to the output of sat showrev that indicates whether a +product version is “active”. The definition of “active” varies across products, +and not all products may set an “active” version.

+

For SAT, the active version is the one with its hosted-type package repository +in Nexus set as the member of the group-type package repository in Nexus, +meaning that it will be used when installing the cray-sat-podman RPM.

+

cray-sat Container Image Size Reduction

+

The size of the cray-sat container image has been approximately cut in half by +leveraging multi-stage builds. This also improved the repeatability of the unit +tests by running them in the container.

+

Bug Fixes

+

Minor bug fixes were made in cray-sat and in cray-sat-podman. For full +change lists, refer to each repository’s CHANGELOG.md file.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/sat_2.3_release_notes/index.html b/en-25/release_notes/sat_2.3_release_notes/index.html new file mode 100644 index 0000000000..a524d83463 --- /dev/null +++ b/en-25/release_notes/sat_2.3_release_notes/index.html @@ -0,0 +1,1147 @@ + + + + + + + + + + + + Changes in SAT 2.3 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.3

+

The 2.3.4 version of the SAT product includes:

+
    +
  • Version 3.15.4 of the sat python package and CLI
  • +
  • Version 1.6.11 of the sat-podman wrapper script
  • +
  • Version 1.2.0 of the sat-cfs-install container image
  • +
  • Version 2.0.0 of the sat-cfs-install Helm chart
  • +
  • Version 1.5.0 of the sat-install-utility container image
  • +
  • Version 2.0.3 of the cfs-config-util container image
  • +
+

New sat Commands

+

None.

+

Current Working Directory in SAT Container

+

When running sat commands, the current working directory is now mounted in the +container as /sat/share, and the current working directory within the container +is also /sat/share.

+

Files in the current working directory must be specified using relative paths to +that directory, because the current working directory is always mounted on +/sat/share. Absolute paths should be avoided, and paths that are outside of +$HOME or $PWD are never accessible to the container environment.

+

The home directory is still mounted on the same path inside the container as it +is on the host.

+

Changes to sat bootsys

+

The following options were added to sat bootsys.

+
    +
  • --bos-limit
  • +
  • --recursive
  • +
+

The --bos-limit option passes a given limit string to a BOS session. The +--recursive option specifies a slot or other higher-level component in the +limit string.

+

Changes to sat bootprep

+

The --delete-ims-jobs option was added to sat bootprep run. It deletes IMS +jobs after sat bootprep is run. Jobs are no longer deleted by default.

+

Changes to sat status

+

sat status now includes information about nodes’ CFS configuration statuses, +such as desired configuration, configuration status, and error count.

+

The output of sat status now splits different component types into different +report tables.

+

The following options were added to sat status.

+
    +
  • --hsm-fields, --sls-fields, --cfs-fields
  • +
  • --bos-template
  • +
+

The --hsm-fields, --sls-fields, --cfs-fields options limit the output +columns according to specified CSM services.

+

The --bos-template option filters the status report according to the specified +session template’s boot sets.

+

Compatibility with CSM 1.2

+

The following components were modified to be compatible with CSM 1.2.

+
    +
  • sat-cfs-install container image and Helm chart
  • +
  • sat-install-utility container image
  • +
  • SAT product installer
  • +
+

GPG Checking

+

The sat-ncn Ansible role provided by sat-cfs-install was modified to enable +GPG checks on packages while leaving GPG checks disabled on repository metadata.

+

Security

+

Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and +refreshed Python dependency versions.

+

Bug Fixes

+

Minor bug fixes were made in each of the repositories. For full change lists, +refer to each repository’s CHANGELOG.md file.

+

The known issues listed under the SAT 2.2 release +were fixed.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/sat_2.4_release_notes/index.html b/en-25/release_notes/sat_2.4_release_notes/index.html new file mode 100644 index 0000000000..2f98bcb08e --- /dev/null +++ b/en-25/release_notes/sat_2.4_release_notes/index.html @@ -0,0 +1,1237 @@ + + + + + + + + + + + + Changes in SAT 2.4 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.4

+

The 2.4.13 version of the SAT product includes:

+
    +
  • Version 3.19.3 of the sat python package and CLI.
  • +
  • Version 2.0.0-1 of the sat-podman wrapper script.
  • +
  • Version 1.5.5 of the sat-install-utility container image.
  • +
  • Version 3.3.1 of the cfs-config-util container image.
  • +
+

Because of installation refactoring efforts, the following two components +are no longer delivered with SAT:

+
    +
  • sat-cfs-install container image
  • +
  • sat-cfs-install Helm chart
  • +
+

Inclusion of SAT in CSM

+

A version of the cray-sat container image is now included in CSM. For more +information, see SAT in CSM.

+

SAT Installation Improvements

+

The SAT install.sh script no longer uses a sat-cfs-install Helm chart and +container image to upload its Ansible content to the sat-config-management +repository in VCS. Instead, it uses Podman to run the cf-gitea-import container +directly. Some of the benefits of this change include the following:

+ +

Decoupling of cray-sat Container Image and cray-sat-podman Package

+

In older SAT releases, the sat wrapper script that was provided by the +cray-sat-podman package installed on Kubernetes master NCNs included a +hard-coded version of the cray-sat container image. As a result, every new +version of the cray-sat image required a corresponding new version of the +cray-sat-podman package.

+

In this release, this tight coupling of the cray-sat-podman package and the +cray-sat container image was removed. The sat wrapper script provided +by the cray-sat-podman package now looks for the version of the cray-sat +container image in the /opt/cray/etc/sat/version file. This file is populated +with the correct version of the cray-sat container image by the SAT layer of +the CFS configuration that is applied to management NCNs. If the version file +does not exist, the wrapper script defaults to the version of the cray-sat +container image delivered with the latest version of CSM installed on the system.

+

Improved NCN Personalization Automation

+

The steps for performing NCN personalization as part of the SAT installation +were moved out of the install.sh script and into a new +update-mgmt-ncn-cfs-config.sh script that is provided in the SAT release +distribution. The new script provides additional flexibility in how it modifies +the NCN personalization CFS configuration for SAT. It can modify an existing CFS +configuration by name, a CFS configuration being built in a JSON file, or an +existing CFS configuration that applies to certain components.

+

New sat bootprep Features

+

The following new features were added to the sat bootprep command:

+ +

The schema of the sat bootprep input files was also changed to support these +new features:

+
    +
  • The base recipe or image used by an image in the input file should now be +specified under a base key instead of under an ims key. The old ims +key is deprecated.
  • +
  • To specify an image that depends on another image in the input file, the +dependent image should specify the dependency under base.image_ref. +You should no longer use the IMS name of the image on which it depends.
  • +
  • The image used by a session template should now be specified under +image.ims.name, image.ims.id, or image.image_ref. Specifying a string +value directly under the image key is deprecated.
  • +
+

For more information on defining IMS images and BOS session templates in the +sat bootprep input file, see Define IMS Images +and Define BOS Session Templates.

+

Added Blade Swap Support to sat swap

+

The sat swap command was updated to support swapping compute and UAN blades +with sat swap blade. This functionality is described in the following processes +of the Cray System Management Documentation:

+
    +
  • Adding a Liquid-cooled blade to a System Using SAT
  • +
  • Removing a Liquid-cooled blade from a System Using SAT
  • +
  • Replace a Compute Blade Using SAT
  • +
  • Swap a Compute Blade with a Different System Using SAT
  • +
+

Support for BOS v2

+

A new v2 version of the Boot Orchestration Service (BOS) is available in CSM +1.3.0. SAT has added support for BOS v2. This impacts the following commands +that interact with BOS:

+
    +
  • sat bootprep
  • +
  • sat bootsys
  • +
  • sat status
  • +
+

By default, SAT uses BOS v1. However, you can choose the BOS version you want +to use. For more information, see Change the BOS Version.

+

Added BOS Fields to sat status

+

When using BOS v2, sat status outputs additional fields. These fields show +the most recent BOS session, session template, booted image, and boot status for +each node. An additional --bos-fields option was added to limit the output of +sat status to these fields. The fields are not displayed when using BOS v1.

+

Open Source Repositories

+

This is the first release of SAT built from open source code repositories. +As a result, build infrastructure was changed to use an external Jenkins instance, +and artifacts are now published to an external Artifactory instance. These +changes should not impact the functionality of the SAT product in any way.

+

Security

+

CVE Mitigation

+
    +
  • The paramiko Python package version was updated from 2.9.2 to 2.10.1 to +mitigate CVE-2022-24302.
  • +
  • The oauthlib Python package version was updated from 3.2.0 to 3.2.1 to +mitigate CVE-2022-36087.
  • +
+

Restricted Permissions on SAT Config Files and Directories

+

SAT stores information used to authenticate to the API gateway with Keycloak. +Token files are stored in the ~/.config/sat/tokens/ directory. Those files +have always had permissions appropriately set to restrict them to be readable +only by the user.

+

Keycloak usernames used to authenticate to the API gateway are stored in the +SAT config file at /.config/sat/sat.toml. Keycloak usernames are also used in +the file names of tokens stored in /.config/sat/tokens. As an additional +security measure, SAT now restricts the permissions of the SAT config file +to be readable and writable only by the user. It also restricts the tokens +directory and the entire SAT config directory ~/.config/sat to be accessible +only by the user. This prevents other users on the system from viewing +Keycloak usernames used to authenticate to the API gateway.

+

Bug Fixes

+
    +
  • Fixed an issue where sat init did not print a message confirming a new +configuration file was created.
  • +
  • Fixed an issue where sat showrev exited with a traceback if the file +/opt/cray/etc/site_info.yaml existed but was empty. This could occur if the +user exited sat setrev with Ctrl-C.
  • +
  • Fixed outdated information in the sat bootsys man page, and added a +description of the command stages.
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/sat_2.5_release_notes/index.html b/en-25/release_notes/sat_2.5_release_notes/index.html new file mode 100644 index 0000000000..1b0d96f8d4 --- /dev/null +++ b/en-25/release_notes/sat_2.5_release_notes/index.html @@ -0,0 +1,1202 @@ + + + + + + + + + + + + Changes in SAT 2.5 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.5

+

The 2.5.22 version of the SAT product includes:

+
    +
  • Version 3.21.9 of the sat python package and CLI.
  • +
  • Version 2.0.0-1 of the sat-podman wrapper script.
  • +
  • Version 1.6.2 of the sat-install-utility container image.
  • +
  • Version 3.3.1 of the cfs-config-util container image.
  • +
+

New sat Commands

+

sat jobstat allows you to access application and job data through the command +line. It provides a table summarizing information for all jobs on the system.

+

Changes to sat bootprep

+
    +
  • +

    A list-vars subcommand was added to sat bootprep.

    +

    It lists the variables available for use in bootprep input files at runtime.

    +
  • +
  • +

    A --limit option was added to sat bootprep run.

    +

    It restricts the creation of CFS configurations, IMS images, and BOS session +templates into separate stages. For more information, see +Limit SAT Bootprep Run into Stages.

    +
  • +
  • +

    sat bootprep now prompts individually for each CFS configuration that +already exists.

    +
  • +
  • +

    sat bootprep can now filter images provided by a product by using a prefix.

    +

    This is useful when specifying the base of an image in a bootprep input +file. For more information, see +Define IMS Images.

    +
  • +
  • +

    To support product names with hyphens, sat bootprep now converts hyphens to +underscores within variables.

    +

    For more information, see +Hyphens in HPC CSM Software Recipe Variables.

    +
  • +
  • +

    In sat bootprep input files, you can now render the value of the playbook +property of CFS configuration layers with Jinja2 templates.

    +

    For more information, see +Values Supporting Jinja2 Template Rendering.

    +
  • +
  • +

    Output was added to sat bootprep run that summarizes the CFS configurations, +IMS images, and BOS session templates created.

    +

    For more information, see +Summary of SAT Bootprep Results.

    +
  • +
  • +

    Improvements were made to the sat bootprep output when CFS configuration +and BOS session templates are created.

    +
  • +
+

Changes to sat bootsys

+
    +
  • A reboot subcommand was added to sat bootsys. It uses BOS to reboot +nodes in the bos-operations stage.
  • +
  • The --staged-session option was added to sat bootsys. It can be used to +create staged BOS sessions. For more information, refer to Staging Changes +with BOS in the Cray System Management Documentation.
  • +
+

Changes to Other sat Commands

+
    +
  • When switching SAT versions with prodmgr, a version is no longer set as +“active” in the product catalog. The “active” field was also removed from the +output of sat showrev.
  • +
  • Improvements were made to the performance of sat status when using BOS +version two.
  • +
+

New Install and Upgrade Framework

+

The new Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products with the help of sat bootprep on HPE Cray EX +systems managed by Cray System Management (CSM). IUF capabilities are described +in detail in the IUF section +of the Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.

+

Because IUF now handles NCN personalization, information about this process was +removed from the SAT documentation. Other sections in the documentation were +also revised to support the new Install and Upgrade Framework. For example, the +SAT Installation and SAT Upgrade sections of this +guide now provide details on software and configuration content specific to SAT. +The Cray System Management Documentation +will indicate when these sections should be referred to for detailed information.

+

For more information on the relationship between sat bootprep and IUF, see +SAT and IUF.

+

New Default BOS Version

+

By default, SAT now uses version two of the Boot Orchestration Service (BOS). +This change to BOS v2 impacts the following commands that interact with BOS:

+
    +
  • sat bootprep
  • +
  • sat bootsys
  • +
  • sat status
  • +
+

If needed, you can choose the BOS version you want to use. For more information, +see Change the BOS Version.

+

Security

+
    +
  • Updated the version of certifi in the sat python package and CLI from +2021.10.8 to 2022.12.7 to resolve CVE-2022-23491.
  • +
  • Updated the version of certifi in the sat-install-utility container image +from 2021.5.30 to 2022.12.7 to resolve CVE-2022-23491.
  • +
  • Updated the version of oauthlib from 3.2.1 to 3.2.2 to resolve CVE-2022-36087.
  • +
  • Updated the version of cryptography from 36.0.1 to 39.0.1 to resolve +CVE-2023-23931.
  • +
+

Bug Fixes

+
    +
  • Fixed a bug that prevented sat init from creating a configuration file in +the current directory when not prefixed with ./.
  • +
  • Fixed a bug in which sat status failed with a traceback when using BOS +version two and reported components whose most recent image did not exist.
  • +
  • Fixed a build issue where the sat container could contain a different +version of kubectl than the version found in CSM.
  • +
  • Fixed error handling and improved command messages for sat bootprep and +sat swap blade.
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/shasta_1.3.2_release_notes/index.html b/en-25/release_notes/shasta_1.3.2_release_notes/index.html new file mode 100644 index 0000000000..4792575636 --- /dev/null +++ b/en-25/release_notes/shasta_1.3.2_release_notes/index.html @@ -0,0 +1,1089 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.3.2 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.3.2

+

Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.

+

The following sections detail the changes in this release.

+

sat swap Command for Switch and Cable Replacement

+

The sat switch command which supported operations for replacing a switch has +been deprecated and replaced with the sat swap command, which now supports +replacing a switch OR cable.

+

The sat swap switch command is equivalent to sat switch. The sat switch +command will be removed in a future release.

+

Addition of Stages to sat bootsys Command

+

The sat bootsys command now has multiple stages for both the boot and +shutdown actions. Please refer to the “System Power On Procedures” and “System +Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001) +for more details on using this command in the context of a full system power off +and power on.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/shasta_1.3_release_notes/index.html b/en-25/release_notes/shasta_1.3_release_notes/index.html new file mode 100644 index 0000000000..c1c504a9e7 --- /dev/null +++ b/en-25/release_notes/shasta_1.3_release_notes/index.html @@ -0,0 +1,1093 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.3 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.3

+

Shasta v1.3 included version 2.2.3 of the sat python package and CLI.

+

This version of the sat CLI contained the following commands:

+
    +
  • auth
  • +
  • bootsys
  • +
  • cablecheck
  • +
  • diag
  • +
  • firmware
  • +
  • hwinv
  • +
  • hwmatch
  • +
  • k8s
  • +
  • linkhealth
  • +
  • sensors
  • +
  • setrev
  • +
  • showrev
  • +
  • status
  • +
  • swap
  • +
  • switch
  • +
+

For more information on each of these commands, see the +SAT Command Overview and the table +of commands in the Authenticate SAT Commands +section of this document.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/shasta_1.4.1_release_notes/index.html b/en-25/release_notes/shasta_1.4.1_release_notes/index.html new file mode 100644 index 0000000000..37bf95ae9b --- /dev/null +++ b/en-25/release_notes/shasta_1.4.1_release_notes/index.html @@ -0,0 +1,1098 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.4.1 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.4.1

+

We released version 2.0.4 of the SAT product in Shasta v1.4.1.

+

This version of the SAT product included:

+
    +
  • Version 3.5.0 of the sat python package and CLI.
  • +
  • Version 1.4.3 of the sat-podman wrapper script.
  • +
+

The following sections detail the changes in this release.

+

New Commands to Translate Between NIDs and XNames

+

Two new commands were added to translate between NIDs and XNames:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
+

These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.

+

Bug Fixes

+
    +
  • Fixed a problem in sat swap where creating the offline port policy failed.
  • +
  • Changed sat bootsys shutdown --stage bos-operations to no longer forcefully +power off all compute nodes and application nodes using CAPMC when BOS +sessions complete or time out.
  • +
  • Fixed an issue with the command sat bootsys boot --stage cabinet-power.
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/shasta_1.4_release_notes/index.html b/en-25/release_notes/shasta_1.4_release_notes/index.html new file mode 100644 index 0000000000..d8fdb361bd --- /dev/null +++ b/en-25/release_notes/shasta_1.4_release_notes/index.html @@ -0,0 +1,1200 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.4 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.4

+

In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.

+

This version of the SAT product included the following components:

+
    +
  • Version 3.4.0 of the sat python package and CLI
  • +
+

It also added the following new component:

+
    +
  • Version 1.4.2 of the sat-podman wrapper script
  • +
+

The following sections detail the changes in this release.

+

SAT as an Independent Product

+

SAT is now packaged and released as an independent product. The product +deliverable is called a “release distribution”. The release distribution is a +gzipped tar file containing an install script. This install script loads the +cray/cray-sat container image into the Docker registry in Nexus and loads the +cray-sat-podman RPM into a package repository in Nexus.

+

In this release, the cray-sat-podman package is still installed in the master +and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in +Shasta v1.5.

+

SAT Running in a Container Under Podman

+

The sat command now runs in a container under Podman. The sat executable is +now installed on all nodes in the Kubernetes management cluster (workers and +masters). This executable is a wrapper script that starts a SAT container in +Podman and invokes the sat Python CLI within that container. The admin can run +individual sat commands directly on the master or worker NCNs as before, or +they can run sat commands inside the SAT container after using sat bash to +enter an interactive shell inside the SAT container.

+

To view man pages for sat commands, the user can run sat-man SAT_COMMAND, +replacing SAT_COMMAND with the name of the sat command. Alternatively, +the user can enter the sat container with sat bash and use the man command.

+

New sat init Command and Config File Location Change

+

The default location of the SAT config file has been changed from /etc/sat.toml +to ~/.config/sat/sat.toml. A new command, sat init, has been added that +initializes a configuration file in the new default directory. This better supports +individual users on the system who want their own config files.

+

~/.config/sat is mounted into the container that runs under Podman, so changes +are persistent across invocations of the sat container. If desired, an alternate +configuration directory can be specified with the SAT_CONFIG_DIR environment +variable.

+

Additionally, if a config file does not yet exist when a user runs a sat +command, one is generated automatically.

+

Additional Types Added to sat hwinv

+

Additional functionality has been added to sat hwinv including:

+
    +
  • List node enclosure power supplies with the --list-node-enclosure-power-supplies +option.
  • +
  • List node accelerators (for example, GPUs) with the --list-node-accels option. +The count of node accelerators is also included for each node.
  • +
  • List node accelerator risers (for example, Redstone modules) with the +--list-node-accel-risers option. The count of node accelerator risers is also +included for each node.
  • +
  • List High-Speed Node Network Interface Cards (HSN NICs) with the +--list-node-hsn-nics option. The count of HSN NICs is also included for each node.
  • +
+

Documentation for these new options has been added to the man page for sat hwinv.

+

Site Information Stored by sat setrev in S3

+

The sat setrev and sat showrev commands now use S3 to store and obtain site +information, including system name, site name, serial number, install date, and +system type. Since the information is stored in S3, it will now be consistent +regardless of the node on which sat is executed.

+

As a result of this change, S3 credentials must be configured for SAT. For more +information, see Generate SAT S3 Credentials.

+

Product Version Information Shown by sat showrev

+

sat showrev now shows product information from the cray-product-catalog +ConfigMap in Kubernetes.

+

Additional Changes to sat showrev

+

The output from sat showrev has also been changed in the following ways:

+
    +
  • The --docker and --packages options were considered misleading and have +been removed.
  • +
  • Information pertaining to only to the local host, where the command is run, +has been moved to the output of the --local option.
  • +
+

Removal of sat cablecheck

+

The sat cablecheck command has been removed. To verify that the system’s Slingshot +network is cabled correctly, admins should now use the show cables command in the +Slingshot Topology Tool (STT).

+

sat swap Command Compatibility with Next-gen Fabric Controller

+

The sat swap command was added in Shasta v1.3.2. This command used the Fabric +Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the +Fabric Controller API, so this command has been rewritten to use the new +backwards-incompatible API. Usage of the command did not change.

+

sat bootsys Functionality

+

Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken +by changes introduced in Shasta v1.4, which removed the Ansible inventory +and playbooks.

+

The functionality in the platform-services stage of sat bootsys has been +re-implemented to use python directly instead of Ansible. This resulted in +a more robust procedure with better logging to the sat log file. Failures +to stop containers on Kubernetes nodes are handled more gracefully, and +more information about the containers that failed to stop, including how to +debug the problem, is included.

+

Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.

+

The following improvements were made to the bos-operations stage +of sat bootsys:

+
    +
  • More information about the BOS sessions, BOA jobs, and BOA pods is printed.
  • +
  • A command-line option, --bos-templates, and a corresponding config-file +option, bos_templates, were added, and the --cle-bos-template and +--uan-bos-template options and their corresponding config file options were +deprecated.
  • +
+

The following functionality has been removed from sat bootsys:

+
    +
  • The hsn-bringup stage of sat bootsys boot has been removed due to removal +of the underlying Ansible playbook.
  • +
  • The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is +now a manual procedure.
  • +
+

Log File Location Change

+

The location of the sat log file has changed from /var/log/cray/sat.log to +/var/log/cray/sat/sat.log. This change simplifies mounting this file into the +sat container running under Podman.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/release_notes/shasta_1.5_release_notes/index.html b/en-25/release_notes/shasta_1.5_release_notes/index.html new file mode 100644 index 0000000000..d5321b1d80 --- /dev/null +++ b/en-25/release_notes/shasta_1.5_release_notes/index.html @@ -0,0 +1,1154 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.5 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.5

+

We released version 2.1.16 of the SAT product in Shasta v1.5.

+

This version of the SAT product included:

+
    +
  • Version 3.7.4 of the sat python package and CLI
  • +
  • Version 1.4.10 of the sat-podman wrapper script
  • +
+

It also added the following new component:

+
    +
  • Version 1.0.3 of the sat-cfs-install docker image and helm chart
  • +
+

The following sections detail the changes in this release.

+

Install Changes to Separate Product from CSM

+

This release further decouples the installation of the SAT product from the CSM +product. The cray-sat-podman RPM is no longer installed in the management +non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on +all master management NCNs via an Ansible playbook which is referenced by a +layer of the CFS configuration that applies to management NCNs. This CFS +configuration is typically named ncn-personalization.

+

The SAT product now includes a Docker image and a Helm chart named +sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart +with Loftsman. This helm chart deploys a Kubernetes job that imports the +SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. +This repository is referenced by the layer added to the NCN personalization +CFS configuration.

+

Removal of Direct Redfish Access

+

All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:

+
    +
  • sat sensors
  • +
  • sat diag
  • +
  • sat linkhealth
  • +
+

The sat sensors command has been rewritten to use the SMA telemetry API to +obtain the latest sensor values. The command’s usage has changed slightly, but +legacy options work as before, so it is backwards compatible. Additionally, new +commands have been added.

+

The sat diag command has been rewritten to use a new service called Fox, which +is delivered with the CSM-Diags product. The sat diag command now launches +diagnostics using the Fox service, which launches the corresponding diagnostic +programs on controllers using the Hardware Management Job and Task Daemon +(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start +diagnostics over Redfish.

+

The sat linkhealth command has been removed. Its functionality has been +replaced by functionality from the Slingshot Topology Tool (STT) in the +fabric manager pod.

+

The Redfish username and password command line options and config file options +have been removed. For more information, see +Remove Obsolete Configuration File Sections.

+

Additional Fields in sat setrev and sat showrev

+

sat setrev now collects the following information from the admin, which is then +displayed by sat showrev:

+
    +
  • System description
  • +
  • Product number
  • +
  • Company name
  • +
  • Country code
  • +
+

Additional guidance and validation has been added to each field collected by +sat setrev. This sets the stage for sdu setup to stop collecting this +information and instead collect it from sat showrev or its S3 bucket.

+

Improvements to sat bootsys

+

The platform-services stage of the sat bootsys boot command has been +improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph +health in the correct order. The ceph-check stage has been removed as it is no +longer needed.

+

The platform-services stage of sat bootsys boot now prompts for confirmation +of the storage NCN hostnames in addition to the Kubernetes masters and workers.

+

Bug Fixes and Security Fixes

+
    +
  • Improved error handling in sat firmware.
  • +
  • Incremented version of Alpine Linux to 3.13.2 to address a security +vulnerability.
  • +
+

Other Notable Changes

+
    +
  • Ansible has been removed from the cray-sat container image.
  • +
  • Support for the Firmware Update Service (FUS) has been removed from the sat firmware command.
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/sitemap.xml b/en-25/sitemap.xml new file mode 100644 index 0000000000..148822c842 --- /dev/null +++ b/en-25/sitemap.xml @@ -0,0 +1,513 @@ + + + + /docs-sat/en-25/usage/ + 2024-12-11T03:40:01+00:00 + + + + + + + /docs-sat/en-25/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-25/dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-25/install/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-25/release_notes/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-25/usage/change_bos_version/ + 2024-12-11T03:40:01+00:00 + + + + + /docs-sat/en-25/release_notes/sat_2.2_release_notes/ + 2024-12-11T03:40:00+00:00 + + + + /docs-sat/en-25/introduction/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-25/dashboards/sat_grafana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-25/usage/sat_and_iuf/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/release_notes/sat_2.3_release_notes/ + 2024-12-11T03:40:00+00:00 + + + + /docs-sat/en-25/dashboards/sat_kibana_dashboards/ + 2024-12-11T03:40:00+00:00 + + + + + + + /docs-sat/en-25/usage/sat_bootprep/ + 2024-12-11T03:40:01+00:00 + + + + + /docs-sat/en-25/release_notes/sat_2.4_release_notes/ + 2024-12-11T03:40:00+00:00 + + + + /docs-sat/en-25/external_system/ + 2024-12-11T03:40:00+00:00 + + + + /docs-sat/en-25/release_notes/sat_2.5_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/release_notes/shasta_1.3.2_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/release_notes/shasta_1.3_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/release_notes/shasta_1.4.1_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/release_notes/shasta_1.4_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/release_notes/shasta_1.5_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/uninstall_and_downgrade/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/upgrade/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-25/categories/ + + + + + + + + /docs-sat/en-25/tags/ + + + + + + + + diff --git a/en-25/tags/index.html b/en-25/tags/index.html new file mode 100644 index 0000000000..472edf8b6d --- /dev/null +++ b/en-25/tags/index.html @@ -0,0 +1,1142 @@ + + + + + + + + + + + + Tags :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + tag :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-25/tags/index.xml b/en-25/tags/index.xml new file mode 100644 index 0000000000..2be1589e57 --- /dev/null +++ b/en-25/tags/index.xml @@ -0,0 +1,11 @@ + + + + Tags on System Admin Toolkit (SAT) + /docs-sat/en-25/tags/ + Recent content in Tags on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-25 + + + diff --git a/en-25/uninstall_and_downgrade/index.html b/en-25/uninstall_and_downgrade/index.html new file mode 100644 index 0000000000..ac935ff3f7 --- /dev/null +++ b/en-25/uninstall_and_downgrade/index.html @@ -0,0 +1,1247 @@ + + + + + + + + + + + + SAT Uninstall and Downgrade :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Uninstall and Downgrade

+

Uninstall: Remove a Version of SAT

+

This procedure can be used to uninstall a version of SAT.

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be uninstalled with prodmgr.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    Use sat showrev to list versions of SAT.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+-------------------+-----------------------+
    +| product_name | product_version | images            | image_recipes         |
    ++--------------+-----------------+-------------------+-----------------------+
    +| sat          | 2.3.3           | -                 | -                     |
    +| sat          | 2.2.10          | -                 | -                     |
    ++--------------+-----------------+-------------------+-----------------------+
    +
  2. +
  3. +

    Use prodmgr to uninstall a version of SAT.

    +

    This command will do three things:

    +
      +
    • Remove all hosted-type package repositories associated with the given version of SAT. Group-type +repositories are not removed.
    • +
    • Remove all container images associated with the given version of SAT.
    • +
    • Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up +in the output of sat showrev.
    • +
    +
    ncn-m001# prodmgr uninstall sat 2.2.10
    +Repository sat-2.2.10-sle-15sp2 has been removed.
    +Removed Docker image cray/cray-sat:3.9.0
    +Removed Docker image cray/sat-cfs-install:1.0.2
    +Removed Docker image cray/sat-install-utility:1.4.0
    +Deleted sat-2.2.10 from product catalog.
    +
  4. +
+

Downgrade: Switch Between SAT Versions

+

This procedure can be used to downgrade the active version of SAT.

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be switched. Older versions must be switched manually.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    Use sat showrev to list versions of SAT.

    +
    ncn-m001# sat showrev --products --filter product_name=sat
    +###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------------------+-----------------------+
    +| product_name | product_version | images             | image_recipes         |
    ++--------------+-----------------+--------------------+-----------------------+
    +| sat          | 2.3.3           | -                  | -                     |
    +| sat          | 2.2.10          | -                  | -                     |
    ++--------------+-----------------+--------------------+-----------------------+
    +
  2. +
  3. +

    Use prodmgr to switch to a different version of SAT.

    +

    This command will do two things:

    +
      +
    • For all hosted-type package repositories associated with this version of SAT, set them as the sole member +of their corresponding group-type repository. For example, switching to SAT version 2.2.10 +sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group.
    • +
    • Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are +associated with NCNs with the role “Management” and subrole “Master” (for example, the CFS configuration +management-23.5.0). Specifically, it will ensure that the layer refers to the version of SAT CFS +configuration content associated with the version of SAT to which you are switching.
    • +
    +
    ncn-m001# prodmgr activate sat 2.5.15
    +Repository sat-2.5.15-sle-15sp4 is now the default in sat-sle-15sp4.
    +Updated CFS configurations: [management-23.5.0]
    +
  4. +
  5. +

    Apply the modified CFS configuration to the management NCNs.

    +

    At this point, Nexus package repositories have been modified to set a +particular package repository as active, but the SAT package may not have +been updated on management NCNs.

    +

    To ensure that management NCNs have been updated to use the active SAT +version, follow the Procedure to Apply CFS Configuration.

    +
  6. +
+

Procedure to Apply CFS Configuration

+
    +
  1. +

    Set an environment variable that refers to the name of the CFS configuration +to be applied to the management NCNs.

    +
    ncn-m001# export CFS_CONFIG_NAME="management-23.5.0"
    +

    Note: Refer to the output from the prodmgr activate command to find +the name of the modified CFS configuration. If more than one CFS configuration +was modified, use the first one.

    +
    INFO: Successfully saved CFS configuration "management-23.5.0"
    +
  2. +
  3. +

    Obtain the name of the CFS configuration layer for SAT and save it in an +environment variable:

    +
    ncn-m001# export SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \
    +    | jq -r '.layers | map(select(.cloneUrl | contains("sat-config-management.git")))[0].name')
    +
  4. +
  5. +

    Create a CFS session that executes only the SAT layer of the given CFS +configuration.

    +

    The --configuration-limit option limits the configuration session to run +only the SAT layer of the configuration.

    +
    ncn-m001# cray cfs sessions create --name "sat-session-${CFS_CONFIG_NAME}" --configuration-name \
    +    "${CFS_CONFIG_NAME}" --configuration-limit "${SAT_LAYER_NAME}"
    +
  6. +
  7. +

    Monitor the progress of the CFS session.

    +

    Set an environment variable to name of the Ansible container within the pod +for the CFS session:

    +
    ncn-m001# export ANSIBLE_CONTAINER=$(kubectl get pod -n services \
    +    --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \
    +    -o json | jq -r '.items[0].spec.containers | map(select(.name | contains("ansible"))) | .[0].name')
    +

    Next, get the logs for the Ansible container.

    +
    ncn-m001# kubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \
    +    --selector=cfsession=sat-session-${CFS_CONFIG_NAME}
    +

    Ansible plays, which are run by the CFS session, will install SAT on all the +master management NCNs on the system. A summary of results can be found at +the end of the log output. The following example shows a successful session.

    +
    ...
    +PLAY RECAP *********************************************************************
    +x3000c0s1b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s3b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s5b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +

    Note: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.

    +
  8. +
  9. +

    Verify that SAT was successfully configured.

    +

    If sat is configured, the --version command will indicate which version +is installed. If sat is not properly configured, the command will fail.

    +

    Note: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the sat Python package, +which is different from the version number of the overall SAT release distribution.

    +
    ncn-m001# sat --version
    +sat 3.7.0
    +

    Note: Upon first running sat, you may see additional output while the sat +container image is downloaded. This will occur the first time sat is run on +each manager NCN. For example, if you run sat for the first time on ncn-m001 +and then for the first time on ncn-m002, you will see this additional output +both times.

    +
    Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
    +Getting image source signatures
    +Copying blob da64e8df3afc done
    +Copying blob 0f36fd81d583 done
    +Copying blob 12527cf455ba done
    +...
    +sat 3.7.0
    +
  10. +
  11. +

    Stop the typescript.

    +
    ncn-m001# exit
    +
  12. +
+

SAT version x.y.z is now installed and configured:

+
    +
  • The SAT RPM package is installed on the associated NCNs.
  • +
+

Note on Procedure to Apply CFS Configuration

+

The previous procedure is not always necessary because the CFS Batcher service +automatically detects configuration changes and will automatically create new +sessions to apply configuration changes according to certain rules. For more +information on these rules, refer to Configuration Management with +the CFS Batcher in the Cray System Management Documentation.

+

The main scenario in which the CFS batcher will not automatically re-apply the +SAT layer is when the commit hash of the sat-config-management git repository +has not changed between SAT versions. The previous procedure ensures the +configuration is re-applied in all cases, and it is harmless if the batcher has +already applied an updated configuration.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/upgrade/index.html b/en-25/upgrade/index.html new file mode 100644 index 0000000000..090e582f57 --- /dev/null +++ b/en-25/upgrade/index.html @@ -0,0 +1,1297 @@ + + + + + + + + + + + + SAT Upgrade :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Upgrade

+

Install and Upgrade Framework

+

The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM. IUF capabilities are +described in detail in the IUF +section of the +Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.

+

This document does not replicate install, upgrade, or deployment procedures +detailed in the Cray System Management +Documentation. This document provides +details regarding software and configuration content specific to SAT which is +needed when installing, upgrading, or deploying a SAT release. The Cray +System Management Documentation will +indicate when sections of this document should be referred to for detailed +information.

+

IUF will perform the following tasks for a release of SAT.

+
    +
  • IUF deliver-product stage: +
      +
    • Uploads SAT configuration content to VCS
    • +
    • Uploads SAT information to the CSM product catalog
    • +
    • Uploads SAT content to Nexus repositories
    • +
    +
  • +
  • IUF update-vcs-config stage: +
      +
    • Updates the VCS integration branch with new SAT configuration content if a +working branch is specified
    • +
    +
  • +
  • IUF update-cfs-config stage: +
      +
    • Creates a new CFS configuration for management nodes with new SAT configuration content
    • +
    +
  • +
  • IUF prepare-images stage: +
      +
    • Creates updated management NCN and managed node images with new SAT content
    • +
    +
  • +
  • IUF management-nodes-rollout stage: +
      +
    • Boots management NCNs with an image containing new SAT content
    • +
    +
  • +
+

IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF +section of the +Cray System Management Documentation +describes how to use these tools directly if it is desirable to use them +instead of IUF.

+

IUF Stage Details for SAT

+

This section describes SAT details that an administrator must be aware of +before running IUF stages. Entries are prefixed with Information if no +administrative action is required or Action if an administrator needs +to perform tasks outside of IUF.

+

update-vcs-config

+

Information: This stage is only run if a VCS working branch is specified for +SAT. By default, SAT does not create or specify a VCS working branch.

+

update-cfs-config

+

Information: This stage only applies to the management configuration and +not to the managed configuration.

+

prepare-images

+

Information: This stage only applies to management images and not to +managed images.

+

Post-Upgrade Procedures

+

After upgrading SAT with IUF, it is recommended that you complete the following +procedures before using SAT:

+ +

Notes on the Procedures

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace x.y.z with the version of the SAT product stream +being upgraded.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
+

Remove Obsolete Configuration File Sections

+

After upgrading SAT, if using the configuration file from a previous version, there may be +configuration file sections no longer used in the new version. For example, when upgrading +from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used. +In that case, the following warning may appear upon running sat commands.

+
WARNING: Ignoring unknown section 'redfish' in config file.
+

Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.

+
[redfish]
+username = "admin"
+password = "adminpass"
+

Repeat this process for any configuration file sections for which there are “unknown section” warnings.

+

Update SAT Logging

+

As of SAT version 2.2, some command output that was previously printed to stdout +is now logged to stderr. These messages are logged at the INFO level. The +default logging threshold was changed from WARNING to INFO to accommodate +this logging change. Additionally, some messages previously logged at the INFO +are now logged at the DEBUG level.

+

These changes take effect automatically. However, if the default output threshold +has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure +that important output is shown in the terminal.

+

Update Configuration

+

In the following example, the stderr log level, logging.stderr_level, is set to +WARNING, which will exclude INFO-level logging from terminal output.

+
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+

To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.

+

If logging.stderr_level is commented out, its value will not affect logging +behavior. However, it may be helpful to set its value to INFO as a reminder of +the new default behavior.

+

Affected Commands

+

The following commands trigger messages that have been changed from stdout +print calls to INFO-level (or WARNING- or ERROR-level) log messages:

+
    +
  • sat bootsys --stage shutdown --stage session-checks
  • +
  • sat sensors
  • +
+

The following commands trigger messages that have been changed from INFO-level +log messages to DEBUG-level log messages:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
  • sat swap
  • +
+

Set System Revision Information

+

HPE service representatives use system revision information data to identify +systems in support cases.

+

Prerequisites

+ +

Notes on the Procedure

+

This procedure is not required if SAT was upgraded from 2.1 (Shasta v1.5) +or later. It is required if SAT was upgraded from 2.0 (Shasta v1.4) or +earlier.

+

Procedure

+
    +
  1. +

    Set System Revision Information.

    +

    Run sat setrev and follow the prompts to set the following site-specific values:

    +
      +
    • Serial number
    • +
    • System name
    • +
    • System type
    • +
    • System description
    • +
    • Product number
    • +
    • Company name
    • +
    • Site name
    • +
    • Country code
    • +
    • System install date
    • +
    +

    Tip: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.

    +
    ncn-m001# sat setrev
    +--------------------------------------------------------------------------------
    +Setting:        Serial number
    +Purpose:        System identification. This will affect how snapshots are
    +                identified in the HPE backend services.
    +Description:    This is the top-level serial number which uniquely identifies
    +                the system. It can be requested from an HPE representative.
    +Valid values:   Alpha-numeric string, 4 - 20 characters.
    +Type:           <class 'str'>
    +Default:        None
    +Current value:  None
    +--------------------------------------------------------------------------------
    +Please do one of the following to set the value of the above setting:
    +    - Input a new value
    +    - Press CTRL-C to exit
    +...
    +
  2. +
  3. +

    Verify System Revision Information.

    +

    Run sat showrev and verify the output shown in the “System Revision Information table.”

    +

    The following example shows sample table output.

    +
    ncn-m001# sat showrev
    +################################################################################
    +System Revision Information
    +################################################################################
    ++---------------------+---------------+
    +| component           | data          |
    ++---------------------+---------------+
    +| Company name        | HPE           |
    +| Country code        | US            |
    +| Interconnect        | Sling         |
    +| Product number      | R4K98A        |
    +| Serial number       | 12345         |
    +| Site name           | HPE           |
    +| Slurm version       | slurm 20.02.5 |
    +| System description  | Test System   |
    +| System install date | 2021-01-29    |
    +| System name         | eniac         |
    +| System type         | EX-1C         |
    ++---------------------+---------------+
    +################################################################################
    +Product Revision Information
    +################################################################################
    ++--------------+-----------------+------------------------------+------------------------------+
    +| product_name | product_version | images                       | image_recipes                |
    ++--------------+-----------------+------------------------------+------------------------------+
    +| csm          | 0.8.14          | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
    +| sat          | 2.0.1           | -                            | -                            |
    +| sdu          | 1.0.8           | -                            | -                            |
    +| slingshot    | 0.8.0           | -                            | -                            |
    +| sma          | 1.4.12          | -                            | -                            |
    ++--------------+-----------------+------------------------------+------------------------------+
    +################################################################################
    +Local Host Operating System
    +################################################################################
    ++-----------+----------------------+
    +| component | version              |
    ++-----------+----------------------+
    +| Kernel    | 5.3.18-24.15-default |
    +| SLES      | SLES 15-SP2          |
    ++-----------+----------------------+
    +
  4. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/usage/change_bos_version/index.html b/en-25/usage/change_bos_version/index.html new file mode 100644 index 0000000000..9534270522 --- /dev/null +++ b/en-25/usage/change_bos_version/index.html @@ -0,0 +1,1118 @@ + + + + + + + + + + + + Change the BOS Version :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Change the BOS Version

+

By default, SAT uses Boot Orchestration Service (BOS) version two (v2). You can +select the BOS version to use for individual commands with the --bos-version +option. For more information on this option, refer to the man page for a specific +command.

+

You can also configure the BOS version to use in the SAT config file. Do this +under the api_version setting in the bos section of the config file. If +the system is using an existing SAT config file from an older version of SAT, +the bos section might not exist. In that case, add the bos section with the +BOS version desired in the api_version setting.

+
    +
  1. +

    Find the SAT config file at ~/.config/sat/sat.toml, and look for a section +like this:

    +
    [bos]
    +api_version = "v2"
    +

    In this example, SAT is using BOS version "v2".

    +
  2. +
  3. +

    Change the line specifying the api_version to the BOS version desired (for +example, "v1").

    +
    [bos]
    +api_version = "v1"
    +
  4. +
  5. +

    If applicable, uncomment the api_version line.

    +

    If the system is using an existing SAT config file from a recent version of +SAT, the api_version line might be commented out like this:

    +
    [bos]
    +# api_version = "v2"
    +

    If the line is commented out, SAT will still use the default BOS +version. To ensure a different BOS version is used, uncomment the +api_version line by removing # at the beginning of the line.

    +
  6. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/usage/index.html b/en-25/usage/index.html new file mode 100644 index 0000000000..331e2a4931 --- /dev/null +++ b/en-25/usage/index.html @@ -0,0 +1,1125 @@ + + + + + + + + + + + + SAT Usage :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Usage

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-25/usage/index.xml b/en-25/usage/index.xml new file mode 100644 index 0000000000..0543a94a35 --- /dev/null +++ b/en-25/usage/index.xml @@ -0,0 +1,33 @@ + + + + SAT Usage on System Admin Toolkit (SAT) + /docs-sat/en-25/usage/ + Recent content in SAT Usage on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-25 + Wed, 11 Dec 2024 03:40:01 +0000 + + + Change the BOS Version + /docs-sat/en-25/usage/change_bos_version/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/usage/change_bos_version/ + Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version two (v2). You can select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command. You can also configure the BOS version to use in the SAT config file. Do this under the api_version setting in the bos section of the config file. + + + SAT and IUF + /docs-sat/en-25/usage/sat_and_iuf/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/usage/sat_and_iuf/ + SAT and IUF The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM with the help of sat bootprep. Outside of IUF, it is uncommon to use sat bootprep. For more information on IUF, see the IUF section of the Cray System Management Documentation. For more information on sat bootprep, see SAT Bootprep. Variable Substitutions Both IUF and sat bootprep allow variable substitutions into the default HPC CSM Software Recipe bootprep input files. + + + SAT Bootprep + /docs-sat/en-25/usage/sat_bootprep/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-25/usage/sat_bootprep/ + SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates. The solution is based on a given input file that defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similar to other SAT commands. ncn-m001# sat-man sat-bootprep The sat bootprep command helps the Install and Upgrade Framework (IUF) install, upgrade, and deploy products on systems managed by CSM. + + + diff --git a/en-25/usage/sat_and_iuf/index.html b/en-25/usage/sat_and_iuf/index.html new file mode 100644 index 0000000000..4b27a0f6a1 --- /dev/null +++ b/en-25/usage/sat_and_iuf/index.html @@ -0,0 +1,1144 @@ + + + + + + + + + + + + SAT and IUF :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT and IUF

+

The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM with the help of +sat bootprep. Outside of IUF, it is uncommon to use sat bootprep. +For more information on IUF, see the +IUF section of +the Cray System Management Documentation. +For more information on sat bootprep, see SAT Bootprep.

+

Variable Substitutions

+

Both IUF and sat bootprep allow variable substitutions into the default HPC +CSM Software Recipe bootprep input files. The default variables of the HPC +CSM Software Recipe are available in a product_vars.yaml file. To override +the default variables, specify any site variables in a site_vars.yaml file. +Variables are sourced from the command line, any variable files directly +provided, and the HPC CSM Software Recipe files used, in that order.

+

IUF Session Variables

+

IUF also has special session variables internal to the iuf command that +override any matching entries. Session variables are the set of product and +version combinations being installed by the current IUF activity, and they are +found inside IUF’s internal session_vars.yaml file. For more information on +IUF and variable substitutions, see the +IUF section of +the Cray System Management Documentation.

+

SAT Variable Limitations

+

When using sat bootprep outside of IUF, you might encounter problems +substituting variables into the default bootprep input files. Complex variables +like "{{ working_branch }}" cannot be completely resolved outside of IUF and +its internal session variables. Thus, the default product_vars.yaml file is +unusable with only the sat bootprep command when variables like +"{{ working_branch }}" are used. To work around this limitation if you are +substituting complex variables, use the internal IUF session_vars.yaml file +with sat bootprep and the default bootprep input files.

+
    +
  1. +

    Find the session_vars.yaml file from the most recent IUF activity on the +system.

    +

    This process is documented in the upgrade prerequisites procedure of the +Cray System Management Documentation. For more information, see steps 1-6 of +Stage 0.3 - Option 2.

    +
  2. +
  3. +

    Use the session_vars.yaml file to substitute variables into the default +bootprep input files.

    +
    ncn-m001# sat bootprep run --vars-file session_vars.yaml
    +
  4. +
+

Limit SAT Bootprep Run into Stages

+

The sat bootprep run command uses information from the bootprep input files +to create CFS configurations, IMS images, and BOS session templates. To restrict +this creation into separate stages, use the --limit option and list whether +you want to create configurations, images, session_templates, or some +combination of these. IUF uses the --limit option in this way to install, +upgrade, and deploy products on a system in stages. For example, to create only +CFS configurations, run the following command used by the IUF update-cfs-config +stage:

+
ncn-m001# sat bootprep run --limit configurations example-bootprep-input-file.yaml
+INFO: Validating given input file example-bootprep-input-file.yaml
+INFO: Input file successfully validated against schema
+INFO: Creating 3 CFS configurations
+...
+INFO: Skipping creation of IMS images based on value of --limit option.
+INFO: Skipping creation of BOS session templates based on value of --limit option.
+

To create only IMS images and BOS session templates, run the following command +used by the IUF prepare-images stage:

+
ncn-m001# sat bootprep run --limit images --limit session_templates example-bootprep-input-file.yaml
+INFO: Validating given input file example-bootprep-input-file.yaml
+INFO: Input file successfully validated against schema
+INFO: Skipping creation of CFS configurations based on value of --limit option.
+
+ +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-25/usage/sat_bootprep/index.html b/en-25/usage/sat_bootprep/index.html new file mode 100644 index 0000000000..7dc9b8689d --- /dev/null +++ b/en-25/usage/sat_bootprep/index.html @@ -0,0 +1,1684 @@ + + + + + + + + + + + + SAT Bootprep :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Bootprep

+

SAT provides an automated solution for creating CFS configurations, building +and configuring images in IMS, and creating BOS session templates. The +solution is based on a given input file that defines how those configurations, +images, and session templates should be created. This automated process centers +around the sat bootprep command. Man page documentation for sat bootprep +can be viewed similar to other SAT commands.

+
ncn-m001# sat-man sat-bootprep
+

The sat bootprep command helps the Install and Upgrade Framework (IUF) +install, upgrade, and deploy products on systems managed by CSM. Outside of IUF, +it is uncommon to use sat bootprep. For more information on this relationship, +see SAT and IUF. For more information on IUF, see the +IUF section of +the Cray System Management Documentation.

+

SAT Bootprep vs SAT Bootsys

+

sat bootprep is used to create CFS configurations, build and +rename IMS images, and create BOS session templates which tie the +configurations and images together during a BOS session.

+

sat bootsys automates several portions of the boot and shutdown processes, +including (but not limited to) performing BOS operations (such as creating BOS +sessions), powering on and off cabinets, and checking the state of the system +prior to shutdown.

+

Edit a Bootprep Input File

+

The input file provided to sat bootprep is a YAML-formatted file containing +information which CFS, IMS, and BOS use to create configurations, images, and +BOS session templates respectively. Writing and modifying these input files is +the main task associated with using sat bootprep. An input file is composed of +three main sections, one each for configurations, images, and session templates. +These sections may be specified in any order, and any of the sections may be +omitted if desired.

+

Provide a Schema Version

+

The sat bootprep input file is validated against a versioned schema +definition. The input file should specify the version of the schema with which +it is compatible under a schema_version key. For example:

+
---
+schema_version: 1.0.2
+

The current sat bootprep input file schema version can be viewed with the +following command:

+
ncn-m001# sat bootprep view-schema | grep '^version:'
+version: '1.0.2'
+

The sat bootprep run command validates the schema version specified +in the input file. The command also makes sure that the schema version +of the input file is compatible with the schema version understood by the +current version of sat bootprep. For more information on schema version +validation, refer to the schema_version property description in the bootprep +input file schema. For more information on viewing the bootprep input file +schema in either raw form or user-friendly HTML form, see View SAT Bootprep +Schema.

+

The default HPC CSM Software Recipe bootprep input files provided by the +hpc-csm-software-recipe release distribution already contain the correct +schema version.

+

Define CFS Configurations

+

The CFS configurations are defined under a configurations key. Under this +key, you can list one or more configurations to create. For each +configuration, give a name in addition to the list of layers that +comprise the configuration.

+

Each layer can be defined by a product name and optionally a version number, +commit hash, or branch in the product’s configuration repository. If this +method is used, the layer is created in CFS by looking up relevant configuration +information (including the configuration repository and commit information) from +the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be +supplied. However, if it is absent, the version is assumed to be the latest +version found in the cray-product-catalog.

+

Alternatively, a configuration layer can be defined by explicitly referencing +the desired configuration repository. You must then specify the intended version +of the Ansible playbooks by providing a branch name or commit hash with branch +or commit.

+

The following example shows a CFS configuration with two layers. The first +layer is defined in terms of a product name and version, and the second layer +is defined in terms of a Git clone URL and branch:

+
---
+configurations:
+- name: example-configuration
+  layers:
+  - name: example-product
+    playbook: example.yml
+    product:
+      name: example
+      version: 1.2.3
+  - name: another-example-product
+    playbook: another-example.yml
+    git:
+      url: "https://vcs.local/vcs/another-example-config-management.git"
+      branch: main
+

When sat bootprep is run against an input file, a CFS configuration is created +corresponding to each configuration in the configurations section. For +example, the configuration created from an input file with the layers listed +above might look something like the following:

+
{
+    "lastUpdated": "2022-02-07T21:47:49Z",
+    "layers": [
+        {
+            "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "example product",
+            "playbook": "example.yml"
+        },
+        {
+            "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "another example product",
+            "playbook": "another-example.yml"
+        }
+    ],
+    "name": "example-configuration"
+}
+

Define IMS Images

+

The IMS images are defined under an images key. Under the images key, the +user may define one or more images to be created in a list. Each element of the +list defines a separate IMS image to be built and/or configured. Images must +contain a name key and a base key.

+

The name key defines the name of the resulting IMS image. The base key +defines the base image to be configured or the base recipe to be built and +optionally configured. One of the following keys must be present under the +base key:

+
    +
  • Use an ims key to specify an existing image or recipe in IMS.
  • +
  • Use a product key to specify an image or recipe provided by a particular +version of a product. If a product provides more than one image or recipe, +a filter string prefix must be specified to select one.
  • +
  • Use an image_ref key to specify another image from the input file +using its ref_name.
  • +
+

Images may also contain the following keys:

+
    +
  • Use a configuration key to specify a CFS configuration with which to +customize the built image. If a configuration is specified, then configuration +groups must also be specified using the configuration_group_names key.
  • +
  • Use a ref_name key to specify a unique name that can refer to this image +within the input file in other images or in session templates. The ref_name +key allows references to images from the input file that have dynamically +generated names as described in +Dynamic Variable Substitutions.
  • +
  • Use a description key to describe the image in the bootprep input file. +Note that this key is not currently used.
  • +
+

Here is an example of an image using an existing IMS recipe as its base. This +example builds an IMS image from that recipe. It then configures it with +a CFS configuration named example-compute-config. The example-compute-config +CFS configuration can be defined under the configurations key in the same +input file, or it can be an existing CFS configuration. Running sat bootprep +against this input file results in an image named example-compute-image.

+
images:
+- name: example-compute-image
+  description: >
+    An example compute node image built from an existing IMS recipe.    
+  base:
+    ims:
+      name: example-compute-image-recipe
+      type: recipe
+  configuration: example-compute-config
+  configuration_group_names:
+  - Compute
+

Here is an example showing the definition of two images. The first image is +built from a recipe provided by the cos product. The second image uses the +first image as a base and configures it with a configuration named +example-compute-config. The value of the first image’s ref_name key is used +in the second image’s base.image_ref key to specify it as a dependency. +Running sat bootprep against this input file results in two images, the +first named example-cos-image and the second named example-compute-image.

+
images:
+- name: example-cos-image
+  ref_name: example-cos-image
+  description: >
+    An example image built from a recipe provided by the COS product.    
+  base:
+    product:
+      name: cos
+      version: 2.3.101
+      type: recipe
+- name: example-compute-image
+  description: >
+    An example image built from a recipe provided by the COS product.    
+  base:
+    image_ref: example-cos-image
+  configuration: example-compute-config
+  configuration_group_names:
+  - Compute
+

Here is an example of three IMS images built from the Kubernetes image and the +Ceph storage image provided by the csm product. This example uses a filter +string prefix to select from the multiple images provided by the CSM product. +The first two IMS images in the example find any image from the specified csm +product version whose name starts with secure-kubernetes. The third image in +the example finds any csm image whose name starts with secure-storage-ceph. +All three images are then configured with a configuration named +example-management-config. Running sat bootprep against this input file +results in three IMS images named worker-example-csm-image, +master-example-csm-image, and storage-example-csm-image.

+
images:
+- name: worker-example-csm-image
+  base:
+    product:
+      name: csm
+      version: 1.4.1
+      type: image
+      filter:
+        prefix: secure-kubernetes
+  configuration: example-management-config
+  configuration_group_names:
+  - Management_Worker
+
+- name: master-example-csm-image
+  base:
+    product:
+      name: csm
+      version: 1.4.1
+      type: image
+      filter:
+        prefix: secure-kubernetes
+  configuration: example-management-config
+  configuration_group_names:
+  - Management_Master
+
+- name: storage-example-csm-image
+  base:
+    product:
+      name: csm
+      version: 1.4.1
+      type: image
+      filter:
+        prefix: secure-storage-ceph
+  configuration: example-management-config
+  configuration_group_names:
+  - Management_Storage
+

Define BOS Session Templates

+

The BOS session templates are defined under the session_templates key. Each +session template must provide values for the name, image, configuration, +and bos_parameters keys. The name key defines the name of the resulting BOS +session template. The image key defines the image to use in the BOS session +template. One of the following keys must be present under the image key:

+
    +
  • Use an ims key to specify an existing image or recipe in IMS.
  • +
  • Use an image_ref key to specify another image from the input file +using its ref_name.
  • +
+

The configuration key defines the CFS configuration specified +in the BOS session template.

+

The bos_parameters key defines parameters that are passed through directly to +the BOS session template. The bos_parameters key should contain a boot_sets +key, and each boot set in the session template should be specified under +boot_sets. Each boot set can contain the following keys, all of +which are optional:

+
    +
  • Use a kernel_parameters key to specify the parameters passed to the kernel on the command line.
  • +
  • Use a network key to specify the network over which the nodes boot.
  • +
  • Use a node_list key to specify the nodes to add to the boot set.
  • +
  • Use a node_roles_groups key to specify the HSM roles to add to the boot set.
  • +
  • Use a node_groups key to specify the HSM groups to add to the boot set.
  • +
  • Use a rootfs_provider key to specify the root file system provider.
  • +
  • Use a rootfs_provider_passthrough key to specify the parameters to add to the rootfs= +kernel parameter.
  • +
+

As mentioned above, the parameters under bos_parameters are passed through +directly to BOS. For more information on the properties of a BOS boot set, +refer to BOS Session Templates in the Cray +System Management Documentation.

+

Here is an example of a BOS session template that refers to an existing IMS +image by name:

+
session_templates:
+- name: example-session-template
+  image:
+    ims:
+      name: example-image
+  configuration: example-configuration
+  bos_parameters:
+    boot_sets:
+      example_boot_set:
+        kernel_parameters: ip=dhcp quiet
+        node_roles_groups:
+        - Compute
+        rootfs_provider: cpss3
+        rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+

Here is an example of a BOS session template that refers to an image from the +input file by its ref_name. This requires that an image defined in the input +file specifies example-image as the value of its ref_name key.

+
session_templates:
+- name: example-session-template
+  image:
+    image_ref: example-image
+  configuration: example-configuration
+  bos_parameters:
+    boot_sets:
+      example_boot_set:
+        kernel_parameters: ip=dhcp quiet
+        node_roles_groups:
+        - Compute
+        rootfs_provider: cpss3
+        rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+

HPC CSM Software Recipe Variable Substitutions

+

The sat bootprep command takes any variables you provide and substitutes them +into the input file. Variables are sourced from the command line, any variable +files directly provided, and the HPC CSM Software Recipe files used, in that +order. When you provide values through a variable file, sat bootprep +substitutes the values with Jinja2 template syntax. The HPC CSM Software Recipe +provides default variables in a product_vars.yaml variable file. This file +defines information about each HPC software product included in the recipe.

+

You will primarily substitute variables into the default HPC CSM Software Recipe +bootprep input files through IUF. However, variable files can also be given to +sat bootprep directly from IUF’s use of the recipe. If you do use variables +directly with sat bootprep, you might encounter some limitations. For more +information on SAT variable limitations, see SAT and IUF. +For more information on IUF and variable substitutions, see the +IUF section of +the Cray System Management Documentation.

+

Select an HPC CSM Software Recipe Version

+

You can view a listing of the default HPC CSM Software Recipe variables and +their values by running sat bootprep list-vars. For more information on +options that can be used with the list-vars subcommand, refer to the man page +for the sat bootprep subcommand.

+

By default, the sat bootprep command uses the variables from the latest +installed version of the HPC CSM Software Recipe. However, you can override +this with the --recipe-version command line argument to sat bootprep run.

+

For example, to explicitly select the 22.11.0 version of the HPC CSM Software +Recipe default variables, specify --recipe-version 22.11.0:

+
ncn-m001# sat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml
+

Values Supporting Jinja2 Template Rendering

+

The entire sat bootprep input file is not rendered by the Jinja2 template +engine. Jinja2 template rendering of the input file is performed individually +for each supported value. The values of the following keys in the bootprep +input file support rendering as a Jinja2 template and thus support variables:

+
    +
  • The name key of each configuration under the configurations key.
  • +
  • The following keys of each layer under the layers key in a +configuration: +
      +
    • name
    • +
    • playbook
    • +
    • git.branch
    • +
    • product.version
    • +
    • product.branch
    • +
    +
  • +
  • The following keys of each image under the images key: +
      +
    • name
    • +
    • base.product.version
    • +
    • configuration
    • +
    +
  • +
  • The following keys of each session template under the +session_templates key: +
      +
    • name
    • +
    • configuration
    • +
    +
  • +
+

You can use Jinja2 built-in filters in values of any of the keys listed above. +In addition, Python string methods can be called on the string variables.

+

Hyphens in HPC CSM Software Recipe Variables

+

Variable names with hyphens are not allowed in Jinja2 expressions because they +are parsed as an arithmetic expression instead of a single variable. To support +product names with hyphens, sat bootprep converts hyphens to underscores in +all top-level keys of the default HPC CSM Software Recipe variables. It also +converts any variables sourced from the command line or any variable files +you provide directly. When referring to a variable with hyphens in the bootprep +input file, keep this in mind. For example, to refer to the product version +variable for slingshot-host-software in the bootprep input file, write +"{{slingshot_host_software.version}}".

+

HPC CSM Software Recipe Variable Substitution Example

+

The following example bootprep input file shows how a variable of a COS version +can be used in an input file that creates a CFS configuration for computes. +Only one layer is shown for brevity.

+
---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  layers:
+  - name: cos-compute-{{cos.working_branch}}
+    playbook: cos-compute.yml
+    product:
+      name: cos
+      version: "{{cos.version}}"
+      branch: "{{cos.working_branch}}"
+

Note: When the value of a key in the bootprep input file is a Jinja2 +expression, it must be quoted to pass YAML syntax checking.

+

Jinja2 expressions can also use filters and Python’s built-in string methods to +manipulate the variable values. For example, suppose only the major and minor +components of a COS version are to be used in the branch name for the COS +layer of the CFS configuration. You can use the split string method to +achieve this as follows:

+
---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  layers:
+  - name: cos-compute-{{cos.working_branch}}
+    playbook: cos-compute.yml
+    product:
+      name: cos
+      version: "{{cos.version}}"
+      branch: integration-{{cos.version.split('.')[0]}}-{{cos.version.split('.')[1]}}
+

Dynamic Variable Substitutions

+

Additional variables are available besides the default variables provided by +the HPC CSM Software Recipe. (For more information, see HPC CSM Software +Recipe Variable Substitutions.) +These additional variables are dynamic because their values are determined +at run-time based on the context in which they appear. Available dynamic +variables include the following:

+
    +
  • +

    The variable base.name can be used in the name of an image under the +images key. The value of this variable is the name of the IMS image or +recipe used as the base of this image.

    +
  • +
  • +

    The variable image.name can be used in the name of a session template +under the session_templates key. The value of this variable is the name of +the IMS image used in this session template.

    +

    Note: The name of a session template is restricted to 45 characters. Keep +this in mind when using image.name in the name of a session template.

    +
  • +
+

These variables reduce the need to duplicate values throughout the sat bootprep input file and make the following use cases possible:

+
    +
  • You want to build an image from a recipe provided by a product and use the +name of the recipe in the name of the resulting image.
  • +
  • You want to use the name of the image in the name of a session template, and +the image is generated as described in the previous use case.
  • +
+

Example Bootprep Input Files

+

This section provides an example bootprep input file. It also gives +instructions for obtaining the default bootprep input files delivered +with a release of the HPC CSM Software Recipe.

+

Example Bootprep Input File

+

The following bootprep input file provides an example of using most of the +features described in previous sections. It is not intended to be a complete +bootprep file for the entire CSM product.

+
---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  layers:
+  - name: cos-compute-{{cos.working_branch}}
+    playbook: cos-compute.yml
+    product:
+      name: cos
+      version: "{{cos.version}}"
+      branch: "{{cos.working_branch}}"
+  - name: cpe-pe_deploy-{{cpe.working_branch}}
+    playbook: pe_deploy.yml
+    product:
+      name: cpe
+      version: "{{cpe.version}}"
+      branch: "{{cpe.working_branch}}"
+
+images:
+- name: "{{default.note}}{{base.name}}{{default.suffix}}"
+  ref_name: base_cos_image
+  base:
+    product:
+      name: cos
+      type: recipe
+      version: "{{cos.version}}"
+
+- name: "compute-{{base.name}}"
+  ref_name: compute_image
+  base:
+    image_ref: base_cos_image
+  configuration: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  configuration_group_names:
+  - Compute
+
+session_templates:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  image:
+    image_ref: compute_image
+  configuration: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  bos_parameters:
+    boot_sets:
+      compute:
+        kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+        node_roles_groups:
+        - Compute
+        rootfs_provider_passthrough: "dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0"
+

Access Default Bootprep Input Files

+

Default bootprep input files are delivered by the HPC CSM Software Recipe +product. You can access these files by cloning the hpc-csm-software-recipe +repository, as described in the Accessing sat bootprep files process of +the Cray System Management +Documentation. Find the +default input files in the bootprep directory of the cloned repository:

+
ncn-m001# ls bootprep/
+

Generate an Example Bootprep Input File

+

The sat bootprep generate-example command was not updated for +recent bootprep schema changes. It is recommended that you instead use the +default bootprep input files described in Access Default Bootprep Input +Files. The sat bootprep generate-example command will be updated in a future release of SAT.

+

Summary of SAT Bootprep Results

+

The sat bootprep run command uses information from the bootprep input file to +create CFS configurations, IMS images, and BOS session templates. For easy +reference, the command also includes output summarizing the final creation +results. The following example shows a sample table output.

+
ncn-m001# sat bootprep run
+...
+################################################################################
+CFS configurations
+################################################################################
++------------------+
+| name             |
++------------------+
+| example-config-1 |
+| example-config-2 |
++------------------+
+################################################################################
+IMS images
+################################################################################
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+| name          | preconfigured_image_id               | final_image_id                       | configuration  | configuration_group_names  |
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+| example-image | c1bcaf00-109d-470f-b665-e7b37dedb62f | a22fb912-22be-449b-a51b-081af2d7aff6 | example-config | Compute                    |
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+################################################################################
+BOS session templates
+################################################################################
++------------------+----------------+
+| name             | configuration  |
++------------------+----------------+
+| example-template | example-config |
++------------------+----------------+
+

View SAT Bootprep Schema

+

The contents of the YAML input files used by sat bootprep must conform to a +schema which defines the structure of the data. The schema definition is written +using the JSON Schema format. (Although the format is named “JSON Schema”, the +schema itself is written in YAML as well.) More information, including introductory +materials and a formal specification of the JSON Schema metaschema, can be found +on the JSON Schema website.

+

View the Exact Schema Specification

+

To view the exact schema specification, run sat bootprep view-schema.

+
ncn-m001# sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft/2020-12/schema"
+...
+title: Bootprep Input File
+description: >
+  A description of the set of CFS configurations to create, the set of IMS
+  images to create and optionally customize with the defined CFS configurations,
+  and the set of BOS session templates to create that reference the defined
+  images and configurations.
+type: object
+additionalProperties: false
+properties:
+  ...
+

Generate User-Friendly Documentation

+

The raw schema definition can be difficult to understand without experience +working with JSON Schema specifications. For this reason, a feature is included +with sat bootprep that generates user-friendly HTML documentation for the input +file schema. This HTML documentation can be browsed with your preferred web +browser.

+
    +
  1. +

    Create a documentation tarball using sat bootprep.

    +
    ncn-m001# sat bootprep generate-docs
    +INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
    +

    An alternate output directory can be specified with the --output-dir +option. The generated tarball is always named bootprep-schema-docs.tar.gz.

    +
    ncn-m001# sat bootprep generate-docs --output-dir /tmp
    +INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
    +
  2. +
  3. +

    From another machine, copy the tarball to a local directory.

    +
    another-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz .
    +
  4. +
  5. +

    Extract the contents of the tarball and open the contained index.html.

    +
    another-machine$ tar xzvf bootprep-schema-docs.tar.gz
    +x bootprep-schema-docs/
    +x bootprep-schema-docs/index.html
    +x bootprep-schema-docs/schema_doc.css
    +x bootprep-schema-docs/schema_doc.min.js
    +another-machine$ open bootprep-schema-docs/index.html
    +
  6. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/404.html b/en-26/404.html new file mode 100644 index 0000000000..bbffb11fa9 --- /dev/null +++ b/en-26/404.html @@ -0,0 +1,59 @@ + + + + + + + + + 404 Page not found + + + + + + + + + + + + + + + + + + +
+
+
+
+

+

+

+

+

+

+

Page not found!

+
+
+ +
+ + + diff --git a/en-26/about_sat/command_authentication/index.html b/en-26/about_sat/command_authentication/index.html new file mode 100644 index 0000000000..9967a220b3 --- /dev/null +++ b/en-26/about_sat/command_authentication/index.html @@ -0,0 +1,1381 @@ + + + + + + + + + + + + SAT Command Authentication :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Command Authentication

+

Some SAT subcommands make requests to the HPE Cray EX services through the API +gateway and thus require authentication to the API gateway in order to function. +Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to +be configured. In order to use the SAT S3 bucket, the System Administrator must +generate the S3 access key and secret keys and write them to a local file. This +must be done on every Kubernetes control plane node where SAT commands are run.

+

For more information on authentication requests, see System Security and +Authentication in the Cray System Management +Documentation. The following is a table +describing SAT commands and the types of authentication they require.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SAT SubcommandAuthentication/Credentials RequiredMan PageDescription
sat authResponsible for authenticating to the API gateway and storing a token.sat-authAuthenticate to the API gateway and save the token.
sat bmccredsRequires authentication to the API gateway.sat-bmccredsSet BMC passwords.
sat bootprepRequires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install.sat-bootprepPrepare to boot nodes with images and configurations.
sat bootsysRequires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages.sat-bootsysBoot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software.
sat diagRequires authentication to the API gateway.sat-diagLaunch diagnostics on the HSN switches and generate a report.
sat firmwareRequires authentication to the API gateway.sat-firmwareReport firmware version.
sat hwhistRequires authentication to the API gateway.sat-hwhistReport hardware component history.
sat hwinvRequires authentication to the API gateway.sat-hwinvGive a listing of the hardware of the HPE Cray EX system.
sat hwmatchRequires authentication to the API gateway.sat-hwmatchReport hardware mismatches.
sat initNonesat-initCreate a default SAT configuration file.
sat jobstatRequires authentication to the API gateway.sat-jobstatCheck the status of jobs and applications.
sat k8sRequires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install.sat-k8sReport on Kubernetes replica sets that have co-located (on the same node) replicas.
sat linkhealthThis command has been deprecated.
sat nid2xnameRequires authentication to the API gateway.sat-nid2xnameTranslate node IDs to node XNames.
sat sensorsRequires authentication to the API gateway.sat-sensorsReport current sensor data.
sat setrevRequires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-setrevSet HPE Cray EX system revision information.
sat showrevRequires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name.sat-showrevPrint revision information for the HPE Cray EX system.
sat slscheckRequires authentication to the API gateway.sat-slscheckPerform a cross-check between SLS and HSM.
sat statusRequires authentication to the API gateway.sat-statusReport node status across the HPE Cray EX system.
sat swapRequires authentication to the API gateway.sat-swapPrepare HSN switch or cable for replacement and bring HSN switch or cable into service.
sat xname2nidRequires authentication to the API gateway.sat-xname2nidTranslate node and node BMC XNames to node IDs.
sat switchThis command has been deprecated. It has been replaced by sat swap.
+

In order to authenticate to the API gateway, run the sat auth +command. This command will prompt for a password on the command line. The +username value is obtained from the following locations, in order of higher +precedence to lower precedence:

+
    +
  • The --username global command-line option.
  • +
  • The username option in the api_gateway section of the configuration file +at ~/.config/sat/sat.toml.
  • +
  • The name of currently logged in user running the sat command.
  • +
+

If credentials are entered correctly when prompted by sat auth, a token file +will be obtained and saved to ~/.config/sat/tokens. Subsequent sat commands +will determine the username the same way as sat auth described above and will +use the token for that username if it has been obtained and saved by sat auth.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/about_sat/dependencies/index.html b/en-26/about_sat/dependencies/index.html new file mode 100644 index 0000000000..6bc224bb30 --- /dev/null +++ b/en-26/about_sat/dependencies/index.html @@ -0,0 +1,1453 @@ + + + + + + + + + + + + SAT Dependencies :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Dependencies

+

Most sat subcommands depend on services or components from other products in the +HPE Cray EX software stack. The following list shows these dependencies for each +subcommand. Each service or component is listed under the product it belongs to.

+

sat auth

+

CSM

+
    +
  • Keycloak
  • +
+

sat bmccreds

+

CSM

+
    +
  • System Configuration Service (SCSD)
  • +
+

sat bootprep

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Configuration Framework Service (CFS)
  • +
  • Image Management Service (IMS)
  • +
  • Version Control Service (VCS)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat bootsys

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Cray Advanced Platform Monitoring and Control (CAPMC)
  • +
  • Ceph
  • +
  • Etcd
  • +
  • Firmware Action Service (FAS)
  • +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

HPE Cray Supercomputing User Services Software (USS)

+
    +
  • Node Memory Dump (NMD)
  • +
+

sat diag

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

CSM-Diags

+
    +
  • Fox
  • +
+

sat firmware

+

CSM

+
    +
  • Firmware Action Service (FAS)
  • +
+

sat hwhist

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwinv

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat hwmatch

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat init

+

None

+

sat jobstat

+

PBS

+
    +
  • HPE State Checker
  • +
+

sat k8s

+

CSM

+
    +
  • Kubernetes
  • +
+

sat nid2xname

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+

sat sensors

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • HM Collector
  • +
+

SMA

+
    +
  • Telemetry API
  • +
+

sat setrev

+

CSM

+
    +
  • S3
  • +
+

sat showrev

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • Kubernetes
  • +
  • S3
  • +
+

sat slscheck

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
  • System Layout Service (SLS)
  • +
+

sat status

+

CSM

+
    +
  • Boot Orchestration Service (BOS)
  • +
  • Configuration Framework Service (CFS)
  • +
  • Hardware State Manager (HSM)
  • +
  • Image Management Service (IMS)
  • +
  • System Layout Service (SLS)
  • +
+

sat swap

+

Slingshot

+
    +
  • Fabric Manager
  • +
+

sat switch

+

Deprecated: See sat swap

+

sat xname2nid

+

CSM

+
    +
  • Hardware State Manager (HSM)
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/about_sat/index.html b/en-26/about_sat/index.html new file mode 100644 index 0000000000..5782aaa08d --- /dev/null +++ b/en-26/about_sat/index.html @@ -0,0 +1,1216 @@ + + + + + + + + + + + + About SAT :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

About SAT

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-26/about_sat/index.xml b/en-26/about_sat/index.xml new file mode 100644 index 0000000000..1303404e03 --- /dev/null +++ b/en-26/about_sat/index.xml @@ -0,0 +1,47 @@ + + + + About SAT on System Admin Toolkit (SAT) + /docs-sat/en-26/about_sat/ + Recent content in About SAT on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-26 + Wed, 11 Dec 2024 03:40:00 +0000 + + + SAT Command Authentication + /docs-sat/en-26/about_sat/command_authentication/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/command_authentication/ + SAT Command Authentication Some SAT subcommands make requests to the HPE Cray EX services through the API gateway and thus require authentication to the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to be configured. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. + + + SAT Dependencies + /docs-sat/en-26/about_sat/dependencies/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/dependencies/ + SAT Dependencies Most sat subcommands depend on services or components from other products in the HPE Cray EX software stack. The following list shows these dependencies for each subcommand. Each service or component is listed under the product it belongs to. sat auth CSM Keycloak sat bmccreds CSM System Configuration Service (SCSD) sat bootprep CSM Boot Orchestration Service (BOS) Configuration Framework Service (CFS) Image Management Service (IMS) Version Control Service (VCS) Kubernetes S3 sat bootsys CSM Boot Orchestration Service (BOS) Cray Advanced Platform Monitoring and Control (CAPMC) Ceph Etcd Firmware Action Service (FAS) Hardware State Manager (HSM) Kubernetes S3 HPE Cray Supercomputing User Services Software (USS) Node Memory Dump (NMD) sat diag CSM Hardware State Manager (HSM) CSM-Diags Fox sat firmware CSM Firmware Action Service (FAS) sat hwhist CSM Hardware State Manager (HSM) sat hwinv CSM Hardware State Manager (HSM) sat hwmatch CSM Hardware State Manager (HSM) sat init None + + + Introduction to SAT + /docs-sat/en-26/about_sat/introduction/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/introduction/ + Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview. + + + SAT in CSM + /docs-sat/en-26/about_sat/sat_in_csm/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/sat_in_csm/ + SAT in CSM In CSM 1.3 and newer, the sat command is automatically available on the Kubernetes control plane, but it is still possible to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the sat command available in CSM. Installing the SAT product stream allows additional supporting components to be added: An entry for SAT in the cray-product-catalog Kubernetes ConfigMap is only created by installing the SAT product stream. + + + View SAT Documentation + /docs-sat/en-26/about_sat/view_sat_docs/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/view_sat_docs/ + View SAT Documentation View the System Admin Toolkit (SAT) documentation both online and offline by using the information in this section. Online Documentation The SAT documentation can be found online in HTML form at the following link: SAT Documentation. The navigation pane on the left of the HTML page orders topics alphabetically. Navigate an individual topic&rsquo;s headings by using the Headings icon at the top of the page, as shown in the following images. + + + diff --git a/en-26/about_sat/introduction/index.html b/en-26/about_sat/introduction/index.html new file mode 100644 index 0000000000..cf9c34cbd8 --- /dev/null +++ b/en-26/about_sat/introduction/index.html @@ -0,0 +1,1335 @@ + + + + + + + + + + + + Introduction to SAT :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Introduction to SAT

+

About System Admin Toolkit (SAT)

+

The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.

+

SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview.

+

In CSM 1.3 and newer, the sat command is automatically available on all the +Kubernetes control plane. For more information, see SAT in CSM. Older +versions of CSM do not have the sat command automatically available, and SAT +must be installed as a separate product.

+

SAT Command Overview

+

Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.

+

SAT Command Line Utility

+

The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes control plane nodes +(ncn-m nodes).

+

It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.

+

SAT Commands

+

The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.

+

SAT Container Environment

+

The sat command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on +Kubernetes control plane nodes. A few important points about the SAT container environment include the following:

+
    +
  • Using either sat or sat bash always launches a container.
  • +
  • The SAT container does not have access to the NCN file system.
  • +
+

There are two ways to run sat.

+
    +
  • Interactive: Launching a container using sat bash, followed by a sat command.
  • +
  • Non-interactive: Running a sat command directly on a Kubernetes control plane node.
  • +
+

In both of these cases, a container is launched in the background to execute the command. The first option, running +sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the +container is launched, executes the command, and upon the command’s completion the container exits. The following two +examples show the same action, checking the system status, using both modes.

+

(ncn-m001#) Here is an example using interactive mode:

+
sat bash
+

((CONTAINER_ID) sat-container#) Example sat command after a container is launched:

+
sat status
+

(ncn-m001#) Here is an example using non-interactive mode:

+
sat status
+

Interactive Advantages

+

Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral +container storage. If multiple sat commands are being run in succession, use sat bash to launch the +container beforehand. This will save time because the container does not need to be launched for each sat command.

+

Non-interactive Advantages

+

The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of +several steps that need to be executed from a management NCN.

+

Man Pages - Interactive and Non-interactive Modes

+

To view a sat man page from a Kubernetes control plane node, use sat-man on the manager node.

+

(ncn-m001#) Here is an example:

+
sat-man status
+

A man page describing the SAT container environment is available on the Kubernetes control plane nodes, which can be viewed +either with man sat or man sat-podman from the manager node.

+

(ncn-m001#) Here are examples:

+
man sat
+
man sat-podman
+

Command Prompt Conventions in SAT

+

The host name in a command prompt indicates where the command must be run. The +user account that must run the command is also indicated in the prompt.

+
    +
  • The root or super-user account always has host name in the prompt and the +# character at the end of the prompt.
  • +
  • Any non-root account is indicated with account@hostname>. A non-privileged +account is referred to as user.
  • +
  • The command prompt inside the SAT container environment is indicated with the +string as follows. It also has the # character at the end of the prompt.
  • +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Command PromptMeaning
ncn-m001#Run the command as root on the specific Kubernetes control plane server which has this hostname (ncn-m001 in this example). (Non-interactive)
user@hostname>Run the command as any non-root user on the specified hostname. (Non-interactive)
(venv) user@hostname>Run the command as any non-root user within a Python virtual environment on the specified hostname. (Non-interactive)
(CONTAINER_ID) sat-container#Run the command inside the SAT container environment by first running sat bash. (Interactive)
+

These command prompts should be inserted into text before the fenced code block +instead of inside of it. This is a change from the documentation of SAT 2.5 and +earlier. Here is an example of the new use of the command prompt:

+
    +
  1. +

    (ncn-m001#) Example first step.

    +
    yes >/dev/null
    +
  2. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/about_sat/sat_in_csm/index.html b/en-26/about_sat/sat_in_csm/index.html new file mode 100644 index 0000000000..ed6fb4f0d0 --- /dev/null +++ b/en-26/about_sat/sat_in_csm/index.html @@ -0,0 +1,1255 @@ + + + + + + + + + + + + SAT in CSM :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT in CSM

+

In CSM 1.3 and newer, the sat command is automatically available on the Kubernetes control plane, but it is still possible +to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the +sat command available in CSM. Installing the SAT product stream allows additional supporting components to be added:

+
    +
  • +

    An entry for SAT in the cray-product-catalog Kubernetes ConfigMap is only created by installing the SAT product +stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev.

    +
  • +
  • +

    The sat-install-utility container image is only available with the full SAT product stream. This container image +provides uninstall and downgrade functionality when used with the prodmgr command. (In SAT 2.3 and older, SAT was +only available to install as a separate product stream. Because these versions were packaged with +sat-install-utility, it is still possible to uninstall these versions of SAT.)

    +
  • +
  • +

    The docs-sat RPM package is only available with the full SAT product stream.

    +
  • +
  • +

    The sat-config-management git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is +only available with the full SAT product stream.

    +
  • +
+

If the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS +configurations that apply to management NCNs (for example, management-23.5.0) should not include a SAT layer.

+

The SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the +Keycloak username that authenticates to the API gateway cannot be read by users other than root. Specifically, +it does the following:

+
    +
  • +

    Modifies the sat.toml configuration file which contains the username so that it is only readable by root.

    +
  • +
  • +

    Modifies the /root/.config/sat/tokens directory so that the directory is only readable by root. This is needed +because the names of the files within the tokens directory contain the username.

    +
  • +
+

Regardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other +users. These permission changes only apply to files created by previous installations of SAT. In the current version of +SAT all files and directories are created with the appropriate permissions.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/about_sat/view_sat_docs/index.html b/en-26/about_sat/view_sat_docs/index.html new file mode 100644 index 0000000000..258c5910c0 --- /dev/null +++ b/en-26/about_sat/view_sat_docs/index.html @@ -0,0 +1,1245 @@ + + + + + + + + + + + + View SAT Documentation :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

View SAT Documentation

+

View the System Admin Toolkit (SAT) documentation both online and +offline by using the information in this section.

+

Online Documentation

+

The SAT documentation can be found online in HTML form at the following link: +SAT Documentation. The navigation pane +on the left of the HTML page orders topics alphabetically. Navigate an +individual topic’s headings by using the Headings icon at the top of the +page, as shown in the following images.

+

HTML Heading Icon

+

HTML Heading Navigation

+

The documentation can also be viewed online in GitHub by navigating to the +docs/ subdirectory of the +docs-sat repository. +Navigate an individual topic’s headings with a similar +Headings icon at the top of the page, as shown in the following images.

+

GitHub Heading Icon

+

GitHub Heading Navigation

+

Offline Documentation

+

The SAT documentation is available offline as markdown, which can be +viewed with a markdown viewer or with a text editor. The offline +documentation is available in the docs/ directory of the SAT release +distribution as well as in RPM package format. The RPM package is +installed as a part of the Ansible plays launched by the Configuration +Framework Service (CFS). Its files are installed to /usr/share/doc/sat.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/categories/index.html b/en-26/categories/index.html new file mode 100644 index 0000000000..a135ff44cb --- /dev/null +++ b/en-26/categories/index.html @@ -0,0 +1,1308 @@ + + + + + + + + + + + + Categories :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + category :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-26/categories/index.xml b/en-26/categories/index.xml new file mode 100644 index 0000000000..50e66fc9b7 --- /dev/null +++ b/en-26/categories/index.xml @@ -0,0 +1,11 @@ + + + + Categories on System Admin Toolkit (SAT) + /docs-sat/en-26/categories/ + Recent content in Categories on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-26 + + + diff --git a/en-26/external_system/index.html b/en-26/external_system/index.html new file mode 100644 index 0000000000..96722f18ba --- /dev/null +++ b/en-26/external_system/index.html @@ -0,0 +1,1492 @@ + + + + + + + + + + + + SAT on an External System :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT on an External System

+

SAT can optionally be installed and configured on an external system to interact +with CSM over the CAN.

+

Limitations

+

Most SAT subcommands work by accessing APIs which are reachable via the CAN. +However, certain SAT commands depend on host-based functionality on the +management NCNs and will not work from an external system. This includes the +following:

+
    +
  • The platform-services and ncn-power stages of sat bootsys
  • +
  • The local host information displayed by the --local option of sat showrev
  • +
+

Installing SAT on an external system is not an officially supported configuration. +These instructions are provided “as-is” with the hope that they can be useful for +users who desire additional flexibility.

+

Certain additional steps may need to be taken to install and configure SAT +depending on the configuration of the external system in use. These additional +steps may include provisioning virtual machines, installing packages, or +configuring TLS certificates, and these steps are outside the scope of this +documentation. This section covers only the steps needed to configure SAT to +use externally-accessible API endpoints exposed by CSM.

+

Install and Configure SAT

+

Prerequisites

+
    +
  • The external system must be on the Customer Access Network (CAN).
  • +
  • Python 3.7 or newer is installed on the system.
  • +
  • kubectl, openssh, git, and curl are installed on the external system.
  • +
  • The root CA certificates used when installing CSM have been added to the +external system’s trust store such that authenticated TLS connections can be +made to the CSM REST API gateway. For more information, refer to Certificate +Authority in the Cray System Management Documentation.
  • +
+

Procedure

+
    +
  1. +

    (user@hostname>) Create a Python virtual environment.

    +
    SAT_VENV_PATH="$(pwd)/venv"
    +python3 -m venv ${SAT_VENV_PATH}
    +. ${SAT_VENV_PATH}/bin/activate
    +
  2. +
  3. +

    ((venv) user@hostname>) Clone the SAT source code.

    +

    To use SAT version 3.21, this example clones the release/3.21 branch of +Cray-HPE/sat.

    +
    git clone --branch=release/3.21 https://github.com/Cray-HPE/sat.git
    +
  4. +
  5. +

    Set up the SAT CSM Python dependencies to be installed from their source code.

    +

    SAT CSM Python dependency packages are not currently distributed publicly as +source packages or binary distributions. They must be installed from +their source code hosted on GitHub. Also, to install the cray-product-catalog +Python package, first clone it locally. Use the following steps to +modify the SAT CSM Python dependencies so they can be installed from their source +code.

    +
      +
    1. +

      ((venv) user@hostname>) Clone the source code for cray-product-catalog.

      +
      git clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog
      +
    2. +
    3. +

      ((venv) user@hostname>) In the cray-product-catalog directory, create a file named .version +that contains the version of cray-product-catalog.

      +
      echo 1.6.0 > cray-product-catalog/.version
      +
    4. +
    5. +

      ((venv) user@hostname>) Open the “locked” requirements file in a text editor.

      +
      vim sat/requirements.lock.txt
      +
    6. +
    7. +

      Update the line containing cray-product-catalog so that it reflects the +local path to cray-product-catalog.

      +

      It should read as follows:

      +
      ./cray-product-catalog
      +
    8. +
    9. +

      For versions of SAT newer than 3.19, change the line containing csm-api-client +to read as follows.

      +
      csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
      +
    10. +
    11. +

      (Optional) ((venv) user@hostname>) Confirm that requirements.lock.txt is modified as expected.

      +
      grep -E 'cray-product-catalog|csm-api-client' sat/requirements.lock.txt
      +

      Example output:

      +
      ./cray-product-catalog
      +csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
      +

      Note: For versions newer than 3.19, the output will show both +cray-product-catalog and csm-api-client. For version 3.19 and older, +the output will only show cray-product-catalog.

      +
    12. +
    +
  6. +
  7. +

    ((venv) user@hostname>) Install the modified SAT dependencies.

    +
    pip install -r sat/requirements.lock.txt
    +
  8. +
  9. +

    ((venv) user@hostname>) Install the SAT Python package.

    +
    pip install ./sat
    +
  10. +
  11. +

    (Optional) ((venv) user@hostname>) Add the sat virtual environment to the user’s PATH environment +variable.

    +

    If a shell other than bash is in use, replace ~/.bash_profile with the +appropriate profile path.

    +

    If the virtual environment is not added to the user’s PATH environment +variable, then source ${SAT_VENV_PATH}/bin/activate will need to be run before +running any SAT commands.

    +
    deactivate
    +echo export PATH=\"${SAT_VENV_PATH}/bin:${PATH}\" >> ~/.bash_profile
    +source ~/.bash_profile
    +
  12. +
  13. +

    (user@hostname>) Copy the file /etc/kubernetes/admin.conf from ncn-m001 to ~/.kube/config +on the external system.

    +

    Note that this file contains credentials to authenticate against the Kubernetes +API as the administrative user, so it should be treated as sensitive.

    +
    mkdir -p ~/.kube
    +scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config\
    +

    Example output:

    +
    admin.conf                                       100% 5566   3.0MB/s   00:00
    +
  14. +
  15. +

    (user@hostname>) Find the CAN IP address on ncn-m001 to determine the +corresponding kubernetes hostname.

    +
      +
    • +

      On CSM 1.2 and newer, query the IP address of the bond0.cmn0 +interface.

      +
      ssh ncn-m001 ip addr show bond0.cmn0
      +

      Example output:

      +
      13: bond0.cmn0@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
      +link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
      +inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007
      +   valid_lft forever preferred_lft forever
      +inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
      +   valid_lft forever preferred_lft forever
      +
    • +
    • +

      On CSM versions prior to 1.2, query the IP address of the vlan007 interface.

      +
      ssh ncn-m001 ip addr show vlan007
      +

      Example output:

      +
      13: vlan007@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
      +link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
      +inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007
      +   valid_lft forever preferred_lft forever
      +inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
      +   valid_lft forever preferred_lft forever
      +
    • +
    +
  16. +
  17. +

    (user@hostname>) Set the IP_ADDRESS variable to the value found in the +previous step.

    +
    IP_ADDRESS=10.102.1.11
    +
  18. +
  19. +

    (user@hostname>) Add an entry to /etc/hosts mapping the IP address to +the hostname kubernetes.

    +
    echo "${IP_ADDRESS} kubernetes" | sudo tee -a /etc/hosts
    +10.102.1.11 kubernetes
    +
  20. +
  21. +

    (user@hostname>) Modify ~/.kube/config to set the cluster server address.

    +

    The value of the server key for the kubernetes cluster under the clusters +section should be set to https://kubernetes:6443.

    +
    ---
    +clusters:
    +- cluster:
    +    certificate-authority-data: REDACTED
    +    server: https://kubernetes:6443
    +  name: kubernetes
    +...
    +
  22. +
  23. +

    (user@hostname>) Confirm that kubectl can access the CSM Kubernetes cluster.

    +
    kubectl get nodes
    +

    Example output:

    +
    NAME       STATUS   ROLES    AGE    VERSION
    +ncn-m001   Ready    master   135d   v1.19.9
    +ncn-m002   Ready    master   136d   v1.19.9
    +ncn-m003   Ready    master   136d   v1.19.9
    +ncn-w001   Ready    <none>   136d   v1.19.9
    +ncn-w002   Ready    <none>   136d   v1.19.9
    +ncn-w003   Ready    <none>   136d   v1.19.9
    +
  24. +
  25. +

    (user@hostname>) Use sat init to create a configuration file for SAT.

    +
    sat init
    +

    Example output:

    +
    INFO: Configuration file "/home/user/.config/sat/sat.toml" generated.
    +
  26. +
  27. +

    (user@hostname>) Copy the platform CA certificates from the management NCN +and configure the certificates for use with SAT.

    +

    If a shell other than bash is in use, replace ~/.bash_profile with the +appropriate profile path.

    +
    scp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt .
    +echo export REQUESTS_CA_BUNDLE=\"$(realpath platform-ca-certs.crt)\" >> ~/.bash_profile
    +source ~/.bash_profile
    +
  28. +
  29. +

    Edit the SAT configuration file to set the API and S3 hostnames.

    +

    Externally available API endpoints are given domain names in PowerDNS, so the +endpoints in the configuration file should each be set to the format +subdomain.system-name.site-domain. Here system-name and site-domain are +replaced with the values specified during csi config init, and subdomain +is the DNS name for the externally available service. For more information, +refer to Externally Exposed Services in the Cray System Management +Documentation.

    +

    The API gateway has the subdomain api, and S3 has the subdomain s3. The +S3 endpoint runs on port 8080. The following options should be set in the +SAT configuration file.

    +
    [api_gateway]
    +host = "api.system-name.site-domain"
    +
    +[s3]
    +endpoint = "http://s3.system-name.site-domain:8080"
    +
  30. +
  31. +

    Edit the SAT configuration file to specify the Keycloak user who will be +accessing the REST API.

    +
    [api_gateway]
    +username = "user"
    +
  32. +
  33. +

    (user@hostname>) Run sat auth, and enter the password when prompted.

    +

    The admin account used to authenticate with sat auth must be enabled in +Keycloak and must have its assigned role set to admin.

    +
    sat auth
    +

    Example output:

    +
    Password for user:
    +Succeeded!
    +

    For more information on authentication types and authentication credentials, +see SAT Command Authentication. +For more information on Keycloak accounts and changing Role Mappings, +refer to both Configure Keycloak Account and Create Internal User +Accounts in the Keycloak Shasta Realm in the Cray System Management +Documentation.

    +
  34. +
  35. +

    (user@hostname>) Ensure the files are readable only by the current user.

    +
    touch ~/.config/sat/s3_access_key \
    +    ~/.config/sat/s3_secret_key
    +
    chmod 600 ~/.config/sat/s3_access_key \
    +    ~/.config/sat/s3_secret_key
    +
  36. +
  37. +

    (user@hostname>) Write the credentials to local files using kubectl.

    +

    Generate S3 credentials and write them to a local file so the SAT user can +access S3 storage. In order to use the SAT S3 bucket, the user must generate +the S3 access key and secret keys and write them to a local file. SAT uses +S3 storage for several purposes, most importantly to store the site-specific +information set with sat setrev.

    +
    kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.access_key}' | base64 -d > \
    +    ~/.config/sat/s3_access_key
    +
    kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.secret_key}' | base64 -d > \
    +    ~/.config/sat/s3_secret_key
    +
  38. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/img/GitHub_Heading_Icon.png b/en-26/img/GitHub_Heading_Icon.png new file mode 100644 index 0000000000..7fcbbb3a31 Binary files /dev/null and b/en-26/img/GitHub_Heading_Icon.png differ diff --git a/en-26/img/GitHub_Heading_Navigation.png b/en-26/img/GitHub_Heading_Navigation.png new file mode 100644 index 0000000000..d5c6f490af Binary files /dev/null and b/en-26/img/GitHub_Heading_Navigation.png differ diff --git a/en-26/img/HTML_Heading_Icon.png b/en-26/img/HTML_Heading_Icon.png new file mode 100644 index 0000000000..29f55993ec Binary files /dev/null and b/en-26/img/HTML_Heading_Icon.png differ diff --git a/en-26/img/HTML_Heading_Navigation.png b/en-26/img/HTML_Heading_Navigation.png new file mode 100644 index 0000000000..034749f975 Binary files /dev/null and b/en-26/img/HTML_Heading_Navigation.png differ diff --git a/en-26/index.html b/en-26/index.html new file mode 100644 index 0000000000..aeb3ac8345 --- /dev/null +++ b/en-26/index.html @@ -0,0 +1,1307 @@ + + + + + + + + + + + + HPE Cray EX System Admin Toolkit (SAT) Guide :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ + + + + + navigation + + + +

HPE Cray EX System Admin Toolkit (SAT) Guide

+

IMPORTANT: Starting in CSM 1.6.0, SAT is fully included in CSM. There is no longer a separate SAT +product stream to install. SAT 2.6 releases, which accompanied CSM 1.5, are the last releases of +SAT as a separate product.

+

Similarly, the SAT documentation moved to be fully included within the CSM documentation. Starting in +CSM 1.6.0, find information on SAT in the +System Admin Toolkit (SAT) section +of the Cray System Management Documentation.

+

About SAT

+ +

SAT Installation

+ +

SAT Upgrade

+ +

SAT Uninstall and Downgrade

+ +

SAT on an External System

+ +

SAT Usage

+ +

SAT Release Notes

+ + + + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/index.json b/en-26/index.json new file mode 100644 index 0000000000..49878f21e8 --- /dev/null +++ b/en-26/index.json @@ -0,0 +1,204 @@ +[ +{ + "uri": "/docs-sat/en-26/usage/", + "title": "SAT Usage", + "tags": [], + "description": "", + "content": "SAT Usage SAT Bootprep SAT and IUF Change the BOS Version Configure Multi-tenancy " +}, +{ + "uri": "/docs-sat/en-26/about_sat/", + "title": "About SAT", + "tags": [], + "description": "", + "content": "About SAT View SAT Documentation Introduction to SAT SAT Command Authentication SAT in CSM SAT Dependencies " +}, +{ + "uri": "/docs-sat/en-26/", + "title": "HPE Cray EX System Admin Toolkit (SAT) Guide", + "tags": [], + "description": "", + "content": "HPE Cray EX System Admin Toolkit (SAT) Guide IMPORTANT: Starting in CSM 1.6.0, SAT is fully included in CSM. There is no longer a separate SAT product stream to install. SAT 2.6 releases, which accompanied CSM 1.5, are the last releases of SAT as a separate product.\nSimilarly, the SAT documentation moved to be fully included within the CSM documentation. Starting in CSM 1.6.0, find information on SAT in the System Admin Toolkit (SAT) section of the Cray System Management Documentation.\nAbout SAT View SAT Documentation Introduction to SAT SAT Command Authentication SAT in CSM SAT Dependencies SAT Installation Install and Upgrade Framework IUF Stage Details for SAT Post-Installation Procedures SAT Upgrade Install and Upgrade Framework IUF Stage Details for SAT Post-Upgrade Procedures SAT Uninstall and Downgrade Uninstall: Remove a Version of SAT Downgrade: Switch Between SAT Versions SAT on an External System Limitations Install and Configure SAT Authenticate SAT Commands Generate SAT S3 Credentials SAT Usage SAT Bootprep SAT and IUF Change the BOS Version Configure Multi-tenancy SAT Release Notes Changes in SAT Version 2.x SAT Changes in Shasta Version 1.x " +}, +{ + "uri": "/docs-sat/en-26/install/", + "title": "SAT Installation", + "tags": [], + "description": "", + "content": "SAT Installation Install and Upgrade Framework The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM. IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product.\nThis document does not replicate install, upgrade, or deployment procedures detailed in the Cray System Management Documentation. This document provides details regarding software and configuration content specific to SAT which is needed when installing, upgrading, or deploying a SAT release. The Cray System Management Documentation will indicate when sections of this document should be referred to for detailed information.\nIUF will perform the following tasks for a release of SAT.\nIUF deliver-product stage: Uploads SAT configuration content to VCS Uploads SAT information to the CSM product catalog Uploads SAT content to Nexus repositories IUF update-vcs-config stage: Updates the VCS integration branch with new SAT configuration content if a working branch is specified IUF update-cfs-config stage: Creates a new CFS configuration for management nodes with new SAT configuration content IUF prepare-images stage: Creates updated management NCN and managed node images with new SAT content IUF management-nodes-rollout stage: Boots management NCNs with an image containing new SAT content IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF section of the Cray System Management Documentation describes how to use these tools directly if it is desirable to use them instead of IUF.\nIUF Stage Details for SAT This section describes SAT details that an administrator must be aware of before running IUF stages. Entries are prefixed with Information if no administrative action is required or Action if an administrator needs to perform tasks outside of IUF.\nupdate-vcs-config Information: This stage is only run if a VCS working branch is specified for SAT. By default, SAT does not create or specify a VCS working branch.\nupdate-cfs-config Information: This stage only applies to the management configuration and not to the managed configuration.\nprepare-images Information: This stage only applies to management images and not to managed images.\nPost-Installation Procedures After installing SAT with IUF, complete the following SAT configuration procedures before using SAT:\nAuthenticate SAT Commands Generate SAT S3 Credentials (Optional) Configure Multi-tenancy Set System Revision Information Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace x.y.z with the version of the SAT product stream being installed. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. Authenticate SAT Commands To run SAT commands on the manager NCNs, first set up authentication to the API gateway. For more information on authentication types and authentication credentials, see SAT Command Authentication.\nThe admin account used to authenticate with sat auth must be enabled in Keycloak and must have its assigned role set to admin. For more information on Keycloak accounts and changing Role Mappings, refer to both Configure Keycloak Account and Create Internal User Accounts in the Keycloak Shasta Realm in the Cray System Management Documentation.\nPrerequisites The sat CLI has been installed following the IUF section of the Cray System Management Documentation. Procedure The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway.\n(ncn-m001#) Generate a default SAT configuration file if one does not exist.\nsat init Example output:\nConfiguration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; generated. Note: If the configuration file already exists, it will print out the following error.\nERROR: Configuration file \u0026#34;/root/.config/sat/sat.toml\u0026#34; already exists. Not generating configuration file. Edit ~/.config/sat/sat.toml and set the username option in the api_gateway section of the configuration file.\nusername = \u0026#34;crayadmin\u0026#34; (ncn-m001#) Run sat auth. Enter the password when prompted.\nsat auth Example output:\nPassword for crayadmin: Succeeded! (ncn-m001#) Other sat commands are now authenticated to make requests to the API gateway.\nsat status Generate SAT S3 Credentials Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes control plane node where SAT commands are run.\nSAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev (see Set System Revision Information).\nPrerequisites The SAT CLI has been installed following the IUF section of the Cray System Management Documentation. The SAT configuration file has been created (See Authenticate SAT Commands). CSM has been installed and verified. Procedure (ncn-m001#) Ensure the files are readable only by root.\ntouch /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key chmod 600 /root/.config/sat/s3_access_key \\ /root/.config/sat/s3_secret_key (ncn-m001#) Write the credentials to local files using kubectl.\nkubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.access_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_access_key kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.secret_key}\u0026#39; | base64 -d \u0026gt; \\ /root/.config/sat/s3_secret_key Verify the S3 endpoint specified in the SAT configuration file is correct.\n(ncn-m001#) Get the SAT configuration file\u0026rsquo;s endpoint value.\nNote: If the command\u0026rsquo;s output is commented out, indicated by an initial # character, the SAT configuration will take the default value – \u0026quot;https://rgw-vip.nmn\u0026quot;.\ngrep endpoint ~/.config/sat/sat.toml Example output:\n# endpoint = \u0026#34;https://rgw-vip.nmn\u0026#34; (ncn-m001#) Get the sat-s3-credentials secret\u0026rsquo;s endpoint value.\nkubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.s3_endpoint}\u0026#39; | base64 -d | xargs Example output:\nhttps://rgw-vip.nmn Compare the two endpoint values.\nIf the values differ, change the SAT configuration file\u0026rsquo;s endpoint value to match the secret\u0026rsquo;s.\n(ncn-m001#) Copy SAT configurations to each manager node on the system.\nfor i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \\ mkdir -p /root/.config/sat; \\ scp -pr /root/.config/sat ${i}:/root/.config; done Note: Depending on how many manager nodes are on the system, the list of manager nodes may be different. This example assumes three manager nodes, where the configuration files must be copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 and ncn-m003.\n(Optional) Configure Multi-tenancy If installing SAT on a multi-tenant system, the tenant name can be configured at this point. For more information, see Configure multi-tenancy.\nSet System Revision Information HPE service representatives use system revision information data to identify systems in support cases.\nPrerequisites SAT authentication has been set up. See Authenticate SAT Commands. S3 credentials have been generated. See Generate SAT S3 Credentials. Procedure (ncn-m001#) Set System Revision Information.\nRun sat setrev and follow the prompts to set the following site-specific values:\nSerial number System name System type System description Product number Company name Site name Country code System install date Tip: For \u0026ldquo;System type\u0026rdquo;, a system with any liquid-cooled components should be considered a liquid-cooled system. In other words, \u0026ldquo;System type\u0026rdquo; is EX-1C.\nsat setrev Example output:\n-------------------------------------------------------------------------------- Setting: Serial number Purpose: System identification. This will affect how snapshots are identified in the HPE backend services. Description: This is the top-level serial number which uniquely identifies the system. It can be requested from an HPE representative. Valid values: Alpha-numeric string, 4 - 20 characters. Type: \u0026lt;class \u0026#39;str\u0026#39;\u0026gt; Default: None Current value: None -------------------------------------------------------------------------------- Please do one of the following to set the value of the above setting: - Input a new value - Press CTRL-C to exit ... Verify System Revision Information.\n(ncn-m001#) Run sat showrev and verify the output shown in the \u0026ldquo;System Revision Information table.\u0026rdquo;\nsat showrev Example table output:\n################################################################################ System Revision Information ################################################################################ +---------------------+---------------+ | component | data | +---------------------+---------------+ | Company name | HPE | | Country code | US | | Interconnect | Sling | | Product number | R4K98A | | Serial number | 12345 | | Site name | HPE | | Slurm version | slurm 20.02.5 | | System description | Test System | | System install date | 2021-01-29 | | System name | eniac | | System type | EX-1C | +---------------------+---------------+ ################################################################################ Product Revision Information ################################################################################ +--------------+-----------------+------------------------------+------------------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+------------------------------+------------------------------+ | csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... | | sat | 2.0.1 | - | - | | sdu | 1.0.8 | - | - | | slingshot | 0.8.0 | - | - | | sma | 1.4.12 | - | - | +--------------+-----------------+------------------------------+------------------------------+ ################################################################################ Local Host Operating System ################################################################################ +-----------+----------------------+ | component | version | +-----------+----------------------+ | Kernel | 5.3.18-24.15-default | | SLES | SLES 15-SP2 | +-----------+----------------------+ " +}, +{ + "uri": "/docs-sat/en-26/release_notes/", + "title": "SAT Release Notes", + "tags": [], + "description": "", + "content": "SAT Release Notes Changes in SAT Version 2.x Changes in SAT 2.6 Changes in SAT 2.5 Changes in SAT 2.4 Changes in SAT 2.3 Changes in SAT 2.2 SAT Changes in Shasta Version 1.x SAT Changes in Shasta v1.5 SAT Changes in Shasta v1.4.1 SAT Changes in Shasta v1.4 SAT Changes in Shasta v1.3.2 SAT Changes in Shasta v1.3 " +}, +{ + "uri": "/docs-sat/en-26/usage/change_bos_version/", + "title": "Change the BOS Version", + "tags": [], + "description": "", + "content": "Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version two (v2). Select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command.\nAnother way to change the BOS version is by configuring it under the api_version setting in the bos section of the SAT configuration file. If the system is using an existing SAT configuration file from an older version of SAT, the bos section might not exist. In that case, add the bos section with the BOS version desired in the api_version setting.\nFind the SAT configuration file at ~/.config/sat/sat.toml, and look for a section like this:\n[bos] api_version = \u0026#34;v2\u0026#34; In this example, SAT is using BOS version \u0026quot;v2\u0026quot;.\nChange the line specifying the api_version to the BOS version desired (for example, \u0026quot;v1\u0026quot;).\n[bos] api_version = \u0026#34;v1\u0026#34; If applicable, uncomment the api_version line.\nIf the system is using an existing SAT configuration file from a recent version of SAT, the api_version line might be commented out like this:\n[bos] # api_version = \u0026#34;v2\u0026#34; If the line is commented out, SAT will still use the default BOS version. To ensure a different BOS version is used, uncomment the api_version line by removing # at the beginning of the line.\n" +}, +{ + "uri": "/docs-sat/en-26/release_notes/sat_2.2_release_notes/", + "title": "Changes in SAT 2.2", + "tags": [], + "description": "", + "content": "Changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022.\nThis version of the SAT product included:\nVersion 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components:\nVersion 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release.\nKnown Issues in SAT 2.2 sat Command Unavailable in sat bash Shell After launching a shell within the SAT container with sat bash, the sat command will not be found.\n((CONTAINER_ID) sat-container#) Here is an example output after running sat status:\nbash: sat: command not found ((CONTAINER_ID) sat-container#) This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be prepended to the $PATH environment variable:\nexport PATH=/sat/venv/bin:$PATH sat status ((CONTAINER_ID) sat-container#) Another option is to source the file /sat/venv/bin/activate:\nsource /sat/venv/bin/activate sat status Tab Completion Unavailable in sat bash Shell After launching a shell within the SAT container with sat bash, tab completion for sat commands does not work.\n((CONTAINER_ID) sat-container#) This can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash:\nsource /etc/bash_completion.d/sat-completion.bash OCI Runtime Permission Error when Running sat in Root Directory sat commands will not work if the current directory is /.\n(ncn-m001#) Here is an example output after running sat --help:\nError: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error To resolve, run sat in another directory.\nDuplicate Mount Error when Running sat in Configuration Directory sat commands will not work if the current directory is ~/.config/sat.\n(ncn-m001#) Here is an example output after running sat --help:\nError: /root/.config/sat: duplicate mount destination To resolve, run sat in another directory.\nNew sat Commands sat bootprep automates the creation of CFS configurations, the build and customization of IMS images, and the creation of BOS session templates. For more information, see SAT Bootprep. sat slscheck performs a check for consistency between the System Layout Service (SLS) and the Hardware State Manager (HSM). sat bmccreds provides a simple interface for interacting with the System Configuration Service (SCSD) to set BMC Redfish credentials. sat hwhist displays hardware component history by XName (location) or by its Field-Replaceable Unit ID (FRUID). This command queries the Hardware State Manager (HSM) API to obtain this information. Since the sat hwhist command supports querying for the history of a component by its FRUID, the FRUID of components has been added to the output of sat hwinv. Additional Install Automation The following automation has been added to the install script, install.sh:\nWait for the completion of the sat-config-import Kubernetes job, which is started when the sat-cfs-install Helm chart is deployed. Automate the modification of the CFS configuration, which applies to master management NCNs (for example, ncn-personalization). Changes to Product Catalog Data Schema The SAT product uploads additional information to the cray-product-catalog Kubernetes ConfigMap detailing the components it provides, including container (Docker) images, Helm charts, RPMs, and package repositories.\nThis information is used to support uninstall and downgrade of SAT product versions moving forward.\nSupport for Uninstall and Downgrade of SAT Versions Beginning with the 2.2 release, SAT now provides partial support for the uninstall and downgrade of the SAT product stream.\nFor more information, see Uninstall: Remove a Version of SAT and Downgrade: Switch Between SAT Versions.\nImprovements to sat status A Subrole column has been added to the output of sat status. This allows easy differentiation between master, worker, and storage nodes in the management role, for example.\nHostname information from SLS has been added to sat status output.\nAdded Support for JSON Output Support for JSON-formatted output has been added to commands which currently support the --format option, such as hwinv, status, and showrev.\nUsability Improvements Many usability improvements have been made to multiple sat commands, mostly related to filtering command output. The following are some highlights:\nAdded --fields option to display only specific fields for subcommands which display tabular reports. Added ability to filter on exact matches of a field name. Improved handling of multiple matches of a field name in --filter queries so that the first match is used, similar to --sort-by. Added support for --filter, --fields, and --reverse for summaries displayed by sat hwinv. Added borders to summary tables generated by sat hwinv. Improved documentation in the man pages. Default Log Level Changed The default log level for stderr has been changed from \u0026ldquo;WARNING\u0026rdquo; to \u0026ldquo;INFO\u0026rdquo;. For more information, see Update SAT Logging.\nMore Granular Log Level Configuration Options With the command-line options --loglevel-stderr and --loglevel-file, the log level can now be configured separately for stderr and the log file.\nThe existing --loglevel option is now an alias for the --loglevel-stderr option.\nPodman Wrapper Script Improvements The Podman wrapper script is the script installed at /usr/bin/sat on the master management NCNs by the cray-sat-podman RPM that runs the cray-sat container in podman. The following subsections detail improvements that were made to the wrapper script in this release.\nMounting of $HOME and Current Directories in cray-sat Container The Podman wrapper script that launches the cray-sat container with podman has been modified to mount the user\u0026rsquo;s current directory and home directory into the cray-sat container to provide access to local files in the container.\nPodman Wrapper Script Documentation Improvements The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:\nEnvironment variables that affect execution of the wrapper script Host files and directories mounted in the container Fixes to Podman Wrapper Script Output Redirection Fixed issues with redirecting stdout and stderr, and piping output to commands, such as awk, less, and more.\nConfigurable HTTP Timeout A new sat option has been added to configure the HTTP timeout length for requests to the API gateway. For more information, refer to sat-man sat.\nsat bootsys Improvements Many improvements and fixes have been made to sat bootsys. The following are some highlights:\nAdded the --excluded-ncns option, which can be used to omit NCNs from the platform-services and ncn-power stages in case they are inaccessible. Disruptive shutdown stages in sat bootsys shutdown now prompt the user to continue before proceeding. A new option, --disruptive, will bypass this. Improvements to Ceph service health checks and restart during the platform-services stage of sat bootsys boot. sat xname2nid Improvements sat xname2nid can now recursively expand slot, chassis, and cabinet XNames to a list of NIDs in those locations.\nA new --format option has been added to sat xname2nid. It sets the output format to either \u0026ldquo;range\u0026rdquo; (the default) or \u0026ldquo;NID\u0026rdquo;. The \u0026ldquo;range\u0026rdquo; format displays NIDs in a compressed range format suitable for use with a workload manager like Slurm.\nUsage of v2 HSM API The commands which interact with HSM (for example, sat status and sat hwinv) now use the v2 HSM API.\nsat diag Limited to HSN Switches sat diag will now only operate against HSN switches by default. These are the only controllers that support running diagnostics with HMJTD.\nsat showrev Enhancements A column has been added to the output of sat showrev that indicates whether a product version is \u0026ldquo;active\u0026rdquo;. The definition of \u0026ldquo;active\u0026rdquo; varies across products, and not all products may set an \u0026ldquo;active\u0026rdquo; version.\nFor SAT, the active version is the one with its hosted-type package repository in Nexus set as the member of the group-type package repository in Nexus, meaning that it will be used when installing the cray-sat-podman RPM.\ncray-sat Container Image Size Reduction The size of the cray-sat container image has been approximately cut in half by leveraging multi-stage builds. This also improved the repeatability of the unit tests by running them in the container.\nBug Fixes Minor bug fixes were made in cray-sat and in cray-sat-podman. For full change lists, refer to each repository\u0026rsquo;s CHANGELOG.md file.\n" +}, +{ + "uri": "/docs-sat/en-26/about_sat/command_authentication/", + "title": "SAT Command Authentication", + "tags": [], + "description": "", + "content": "SAT Command Authentication Some SAT subcommands make requests to the HPE Cray EX services through the API gateway and thus require authentication to the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to be configured. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be done on every Kubernetes control plane node where SAT commands are run.\nFor more information on authentication requests, see System Security and Authentication in the Cray System Management Documentation. The following is a table describing SAT commands and the types of authentication they require.\nSAT Subcommand Authentication/Credentials Required Man Page Description sat auth Responsible for authenticating to the API gateway and storing a token. sat-auth Authenticate to the API gateway and save the token. sat bmccreds Requires authentication to the API gateway. sat-bmccreds Set BMC passwords. sat bootprep Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install. sat-bootprep Prepare to boot nodes with images and configurations. sat bootsys Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. sat-bootsys Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. sat diag Requires authentication to the API gateway. sat-diag Launch diagnostics on the HSN switches and generate a report. sat firmware Requires authentication to the API gateway. sat-firmware Report firmware version. sat hwhist Requires authentication to the API gateway. sat-hwhist Report hardware component history. sat hwinv Requires authentication to the API gateway. sat-hwinv Give a listing of the hardware of the HPE Cray EX system. sat hwmatch Requires authentication to the API gateway. sat-hwmatch Report hardware mismatches. sat init None sat-init Create a default SAT configuration file. sat jobstat Requires authentication to the API gateway. sat-jobstat Check the status of jobs and applications. sat k8s Requires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install. sat-k8s Report on Kubernetes replica sets that have co-located (on the same node) replicas. sat linkhealth This command has been deprecated. sat nid2xname Requires authentication to the API gateway. sat-nid2xname Translate node IDs to node XNames. sat sensors Requires authentication to the API gateway. sat-sensors Report current sensor data. sat setrev Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-setrev Set HPE Cray EX system revision information. sat showrev Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. sat-showrev Print revision information for the HPE Cray EX system. sat slscheck Requires authentication to the API gateway. sat-slscheck Perform a cross-check between SLS and HSM. sat status Requires authentication to the API gateway. sat-status Report node status across the HPE Cray EX system. sat swap Requires authentication to the API gateway. sat-swap Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. sat xname2nid Requires authentication to the API gateway. sat-xname2nid Translate node and node BMC XNames to node IDs. sat switch This command has been deprecated. It has been replaced by sat swap. In order to authenticate to the API gateway, run the sat auth command. This command will prompt for a password on the command line. The username value is obtained from the following locations, in order of higher precedence to lower precedence:\nThe --username global command-line option. The username option in the api_gateway section of the configuration file at ~/.config/sat/sat.toml. The name of currently logged in user running the sat command. If credentials are entered correctly when prompted by sat auth, a token file will be obtained and saved to ~/.config/sat/tokens. Subsequent sat commands will determine the username the same way as sat auth described above and will use the token for that username if it has been obtained and saved by sat auth.\n" +}, +{ + "uri": "/docs-sat/en-26/usage/multi-tenancy/", + "title": "Configure Multi-tenancy", + "tags": [], + "description": "", + "content": "Configure Multi-tenancy SAT supports supplying tenant information to CSM services in order to allow tenant admins to use SAT within their tenant. By default, the tenant name is not set, and SAT will not send any tenant information with its requests to CSM services. Configure the tenant name either in the SAT configuration file or on the command line.\nConfigure the Tenant Name in the SAT Configuration File Set the tenant name in the SAT configuration file using the api_gateway.tenant_name option.\nHere is an example:\n[api_gateway] tenant_name = \u0026#34;my_tenant\u0026#34; Configure the Tenant Name on the Command Line Set the tenant name for each sat invocation using the --tenant-name option. The --tenant-name option must be specified before the subcommand name.\n(ncn-m001#) Here is an example:\nsat --tenant-name=my_tenant status " +}, +{ + "uri": "/docs-sat/en-26/release_notes/sat_2.3_release_notes/", + "title": "Changes in SAT 2.3", + "tags": [], + "description": "", + "content": "Changes in SAT 2.3 The 2.3.4 version of the SAT product includes:\nVersion 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None.\nCurrent Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share.\nFiles in the current working directory must be specified using relative paths to that directory, because the current working directory is always mounted on /sat/share. Absolute paths should be avoided, and paths that are outside of $HOME or $PWD are never accessible to the container environment.\nThe home directory is still mounted on the same path inside the container as it is on the host.\nChanges to sat bootsys The following options were added to sat bootsys.\n--bos-limit --recursive The --bos-limit option passes a given limit string to a BOS session. The --recursive option specifies a slot or other higher-level component in the limit string.\nChanges to sat bootprep The --delete-ims-jobs option was added to sat bootprep run. It deletes IMS jobs after sat bootprep is run. Jobs are no longer deleted by default.\nChanges to sat status sat status now includes information about nodes\u0026rsquo; CFS configuration statuses, such as desired configuration, configuration status, and error count.\nThe output of sat status now splits different component types into different report tables.\nThe following options were added to sat status.\n--hsm-fields, --sls-fields, --cfs-fields --bos-template The --hsm-fields, --sls-fields, --cfs-fields options limit the output columns according to specified CSM services.\nThe --bos-template option filters the status report according to the specified session template\u0026rsquo;s boot sets.\nCompatibility with CSM 1.2 The following components were modified to be compatible with CSM 1.2.\nsat-cfs-install container image and Helm chart sat-install-utility container image SAT product installer GPG Checking The sat-ncn Ansible role provided by sat-cfs-install was modified to enable GPG checks on packages while leaving GPG checks disabled on repository metadata.\nSecurity Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and refreshed Python dependency versions.\nBug Fixes Minor bug fixes were made in each of the repositories. For full change lists, refer to each repository’s CHANGELOG.md file.\nThe known issues listed under the SAT 2.2 release were fixed.\n" +}, +{ + "uri": "/docs-sat/en-26/about_sat/dependencies/", + "title": "SAT Dependencies", + "tags": [], + "description": "", + "content": "SAT Dependencies Most sat subcommands depend on services or components from other products in the HPE Cray EX software stack. The following list shows these dependencies for each subcommand. Each service or component is listed under the product it belongs to.\nsat auth CSM Keycloak sat bmccreds CSM System Configuration Service (SCSD) sat bootprep CSM Boot Orchestration Service (BOS) Configuration Framework Service (CFS) Image Management Service (IMS) Version Control Service (VCS) Kubernetes S3 sat bootsys CSM Boot Orchestration Service (BOS) Cray Advanced Platform Monitoring and Control (CAPMC) Ceph Etcd Firmware Action Service (FAS) Hardware State Manager (HSM) Kubernetes S3 HPE Cray Supercomputing User Services Software (USS) Node Memory Dump (NMD) sat diag CSM Hardware State Manager (HSM) CSM-Diags Fox sat firmware CSM Firmware Action Service (FAS) sat hwhist CSM Hardware State Manager (HSM) sat hwinv CSM Hardware State Manager (HSM) sat hwmatch CSM Hardware State Manager (HSM) sat init None\nsat jobstat PBS HPE State Checker sat k8s CSM Kubernetes sat nid2xname CSM Hardware State Manager (HSM) sat sensors CSM Hardware State Manager (HSM) HM Collector SMA Telemetry API sat setrev CSM S3 sat showrev CSM Hardware State Manager (HSM) Kubernetes S3 sat slscheck CSM Hardware State Manager (HSM) System Layout Service (SLS) sat status CSM Boot Orchestration Service (BOS) Configuration Framework Service (CFS) Hardware State Manager (HSM) Image Management Service (IMS) System Layout Service (SLS) sat swap Slingshot Fabric Manager sat switch Deprecated: See sat swap\nsat xname2nid CSM Hardware State Manager (HSM) " +}, +{ + "uri": "/docs-sat/en-26/release_notes/sat_2.4_release_notes/", + "title": "Changes in SAT 2.4", + "tags": [], + "description": "", + "content": "Changes in SAT 2.4 The 2.4.13 version of the SAT product includes:\nVersion 3.19.3 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.5.5 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. Because of installation refactoring efforts, the following two components are no longer delivered with SAT:\nsat-cfs-install container image sat-cfs-install Helm chart Inclusion of SAT in CSM A version of the cray-sat container image is now included in CSM. For more information, see SAT in CSM.\nSAT Installation Improvements The SAT install.sh script no longer uses a sat-cfs-install Helm chart and container image to upload its Ansible content to the sat-config-management repository in VCS. Instead, it uses Podman to run the cf-gitea-import container directly. Some of the benefits of this change include the following:\nFewer container images that need to be managed by the SAT product Simplified SAT installation without Helm charts or Loftsman manifests Reduced SAT installation time Decoupling of cray-sat container image and cray-sat-podman package Decoupling of cray-sat Container Image and cray-sat-podman Package In older SAT releases, the sat wrapper script that was provided by the cray-sat-podman package installed on Kubernetes control plane nodes included a hard-coded version of the cray-sat container image. As a result, every new version of the cray-sat image required a corresponding new version of the cray-sat-podman package.\nIn this release, this tight coupling of the cray-sat-podman package and the cray-sat container image was removed. The sat wrapper script provided by the cray-sat-podman package now looks for the version of the cray-sat container image in the /opt/cray/etc/sat/version file. This file is populated with the correct version of the cray-sat container image by the SAT layer of the CFS configuration that is applied to management NCNs. If the version file does not exist, the wrapper script defaults to the version of the cray-sat container image delivered with the latest version of CSM installed on the system.\nImproved NCN Personalization Automation The steps for performing NCN personalization as part of the SAT installation were moved out of the install.sh script and into a new update-mgmt-ncn-cfs-config.sh script that is provided in the SAT release distribution. The new script provides additional flexibility in how it modifies the NCN personalization CFS configuration for SAT. It can modify an existing CFS configuration by name, a CFS configuration being built in a JSON file, or an existing CFS configuration that applies to certain components.\nNew sat bootprep Features The following new features were added to the sat bootprep command:\nVariable substitutions using Jinja2 templates in certain fields of the sat bootprep input file\nFor more information, see HPC CSM Software Recipe Variable Substitutions and Dynamic Variable Substitutions.\nSchema version validation in the sat bootprep input files\nFor more information, see Provide a Schema Version.\nAbility to look up images and recipes provided by products\nFor more information, see Define IMS Images.\nThe schema of the sat bootprep input files was also changed to support these new features:\nThe base recipe or image used by an image in the input file should now be specified under a base key instead of under an ims key. The old ims key is deprecated. To specify an image that depends on another image in the input file, the dependent image should specify the dependency under base.image_ref. Going forward, do not use the IMS name of the image on which it depends. The image used by a session template should now be specified under image.ims.name, image.ims.id, or image.image_ref. Specifying a string value directly under the image key is deprecated. For more information on defining IMS images and BOS session templates in the sat bootprep input file, see Define IMS Images and Define BOS Session Templates.\nAdded Blade Swap Support to sat swap The sat swap command was updated to support swapping compute and UAN blades with sat swap blade. This functionality is described in the following processes of the Cray System Management Documentation:\nAdding a Liquid-cooled blade to a System Using SAT Removing a Liquid-cooled blade from a System Using SAT Replace a Compute Blade Using SAT Swap a Compute Blade with a Different System Using SAT Support for BOS v2 A new v2 version of the Boot Orchestration Service (BOS) is available in CSM 1.3.0. SAT has added support for BOS v2. This impacts the following commands that interact with BOS:\nsat bootprep sat bootsys sat status By default, SAT uses BOS v1. To change the default to a different BOS version, see Change the BOS Version.\nAdded BOS Fields to sat status When using BOS v2, sat status outputs additional fields. These fields show the most recent BOS session, session template, booted image, and boot status for each node. An additional --bos-fields option was added to limit the output of sat status to these fields. The fields are not displayed when using BOS v1.\nOpen Source Repositories This is the first release of SAT built from open source code repositories. As a result, build infrastructure was changed to use an external Jenkins instance, and artifacts are now published to an external Artifactory instance. These changes should not impact the functionality of the SAT product in any way.\nSecurity CVE Mitigation The paramiko Python package version was updated from 2.9.2 to 2.10.1 to mitigate CVE-2022-24302. The oauthlib Python package version was updated from 3.2.0 to 3.2.1 to mitigate CVE-2022-36087. Restricted Permissions on SAT Configuration Files and Directories SAT stores information used to authenticate to the API gateway with Keycloak. Token files are stored in the ~/.config/sat/tokens/ directory. Those files have always had permissions appropriately set to restrict them to be readable only by the user.\nKeycloak usernames used to authenticate to the API gateway are stored in the SAT configuration file at /.config/sat/sat.toml. Keycloak usernames are also used in the file names of tokens stored in /.config/sat/tokens. As an additional security measure, SAT now restricts the permissions of the SAT configuration file to be readable and writable only by the user. It also restricts the tokens directory and the entire SAT configuration directory ~/.config/sat to be accessible only by the user. This prevents other users on the system from viewing Keycloak usernames used to authenticate to the API gateway.\nBug Fixes Fixed an issue where sat init did not print a message confirming a new configuration file was created. Fixed an issue where sat showrev exited with a traceback if the file /opt/cray/etc/site_info.yaml existed but was empty. This could occur if the user exited sat setrev with Ctrl-C. Fixed outdated information in the sat bootsys man page, and added a description of the command stages. " +}, +{ + "uri": "/docs-sat/en-26/usage/sat_and_iuf/", + "title": "SAT and IUF", + "tags": [], + "description": "", + "content": "SAT and IUF The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM with the help of sat bootprep. Outside of IUF, it is uncommon to use sat bootprep. For more information on IUF, see the IUF section of the Cray System Management Documentation. For more information on sat bootprep, see SAT Bootprep.\nVariable Substitutions Both IUF and sat bootprep allow variable substitutions into the default HPC CSM Software Recipe bootprep input files. The default variables of the HPC CSM Software Recipe are available in a product_vars.yaml file. To override the default variables, specify any site variables in a site_vars.yaml file. Variables are sourced from the command line, any variable files directly provided, and the HPC CSM Software Recipe files used, in that order.\nIUF Session Variables IUF also has special session variables internal to the iuf command that override any matching entries. Session variables are the set of product and version combinations being installed by the current IUF activity, and they are found inside IUF\u0026rsquo;s internal session_vars.yaml file. For more information on IUF and variable substitutions, see the IUF section of the Cray System Management Documentation.\nSAT Variable Limitations When using sat bootprep outside of IUF, substituting variables into the default bootprep input files might cause problems. Complex variables like \u0026quot;{{ working_branch }}\u0026quot; cannot be completely resolved outside of IUF and its internal session variables. Thus, the default product_vars.yaml file is unusable with only the sat bootprep command when variables like \u0026quot;{{ working_branch }}\u0026quot; are used. To work around this limitation when substituting complex variables, use the internal IUF session_vars.yaml file with sat bootprep and the default bootprep input files.\nFind the session_vars.yaml file from the most recent IUF activity on the system.\nThis process is documented in the upgrade prerequisites procedure of the Cray System Management Documentation. For more information, see steps 1-6 of Stage 0.3 - Option 2.\n(ncn-m001#) Use the session_vars.yaml file to substitute variables into the default bootprep input files.\nsat bootprep run --vars-file session_vars.yaml Limit SAT Bootprep Run into Stages The sat bootprep run command uses information from the bootprep input files to create CFS configurations, IMS images, and BOS session templates. To restrict this creation into separate stages, use the --limit option and list whether to create configurations, images, session_templates, or some combination of these. IUF uses the --limit option in this way to install, upgrade, and deploy products on a system in stages.\n(ncn-m001#) For example, to create only CFS configurations, run the following command used by the IUF update-cfs-config stage:\nsat bootprep run --limit configurations example-bootprep-input-file.yaml Example output:\nINFO: Validating given input file example-bootprep-input-file.yaml INFO: Input file successfully validated against schema INFO: Creating 3 CFS configurations ... INFO: Skipping creation of IMS images based on value of --limit option. INFO: Skipping creation of BOS session templates based on value of --limit option. (ncn-m001#) To create only IMS images and BOS session templates, run the following command used by the IUF prepare-images stage:\nsat bootprep run --limit images --limit session_templates example-bootprep-input-file.yaml Example output:\nINFO: Validating given input file example-bootprep-input-file.yaml INFO: Input file successfully validated against schema INFO: Skipping creation of CFS configurations based on value of --limit option. " +}, +{ + "uri": "/docs-sat/en-26/about_sat/introduction/", + "title": "Introduction to SAT", + "tags": [], + "description": "", + "content": "Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components.\nSAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview.\nIn CSM 1.3 and newer, the sat command is automatically available on all the Kubernetes control plane. For more information, see SAT in CSM. Older versions of CSM do not have the sat command automatically available, and SAT must be installed as a separate product.\nSAT Command Overview Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides instruction on the SAT Container Environment.\nSAT Command Line Utility The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes control plane nodes (ncn-m nodes).\nIt is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are similarities between SAT commands and xt commands used on the Cray XC platform.\nSAT Commands The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each have their own set of options.\nSAT Container Environment The sat command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on Kubernetes control plane nodes. A few important points about the SAT container environment include the following:\nUsing either sat or sat bash always launches a container. The SAT container does not have access to the NCN file system. There are two ways to run sat.\nInteractive: Launching a container using sat bash, followed by a sat command. Non-interactive: Running a sat command directly on a Kubernetes control plane node. In both of these cases, a container is launched in the background to execute the command. The first option, running sat bash first, gives an interactive shell, at which point sat commands can be run. In the second option, the container is launched, executes the command, and upon the command\u0026rsquo;s completion the container exits. The following two examples show the same action, checking the system status, using both modes.\n(ncn-m001#) Here is an example using interactive mode:\nsat bash ((CONTAINER_ID) sat-container#) Example sat command after a container is launched:\nsat status (ncn-m001#) Here is an example using non-interactive mode:\nsat status Interactive Advantages Running sat using the interactive command prompt gives the ability to read and write local files on ephemeral container storage. If multiple sat commands are being run in succession, use sat bash to launch the container beforehand. This will save time because the container does not need to be launched for each sat command.\nNon-interactive Advantages The non-interactive mode is useful if calling sat with a script, or when running a single sat command as a part of several steps that need to be executed from a management NCN.\nMan Pages - Interactive and Non-interactive Modes To view a sat man page from a Kubernetes control plane node, use sat-man on the manager node.\n(ncn-m001#) Here is an example:\nsat-man status A man page describing the SAT container environment is available on the Kubernetes control plane nodes, which can be viewed either with man sat or man sat-podman from the manager node.\n(ncn-m001#) Here are examples:\nman sat man sat-podman Command Prompt Conventions in SAT The host name in a command prompt indicates where the command must be run. The user account that must run the command is also indicated in the prompt.\nThe root or super-user account always has host name in the prompt and the # character at the end of the prompt. Any non-root account is indicated with account@hostname\u0026gt;. A non-privileged account is referred to as user. The command prompt inside the SAT container environment is indicated with the string as follows. It also has the # character at the end of the prompt. Command Prompt Meaning ncn-m001# Run the command as root on the specific Kubernetes control plane server which has this hostname (ncn-m001 in this example). (Non-interactive) user@hostname\u0026gt; Run the command as any non-root user on the specified hostname. (Non-interactive) (venv) user@hostname\u0026gt; Run the command as any non-root user within a Python virtual environment on the specified hostname. (Non-interactive) (CONTAINER_ID) sat-container# Run the command inside the SAT container environment by first running sat bash. (Interactive) These command prompts should be inserted into text before the fenced code block instead of inside of it. This is a change from the documentation of SAT 2.5 and earlier. Here is an example of the new use of the command prompt:\n(ncn-m001#) Example first step.\nyes \u0026gt;/dev/null " +}, +{ + "uri": "/docs-sat/en-26/release_notes/sat_2.5_release_notes/", + "title": "Changes in SAT 2.5", + "tags": [], + "description": "", + "content": "Changes in SAT 2.5 The 2.5.17 version of the SAT product includes:\nVersion 3.21.4 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.0 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. New sat Commands sat jobstat allows access to application and job data through the command line. It provides a table summarizing information for all jobs on the system.\nChanges to sat bootprep A list-vars subcommand was added to sat bootprep.\nIt lists the variables available for use in bootprep input files at runtime.\nA --limit option was added to sat bootprep run.\nIt restricts the creation of CFS configurations, IMS images, and BOS session templates into separate stages. For more information, see Limit SAT Bootprep Run into Stages.\nsat bootprep now prompts individually for each CFS configuration that already exists.\nsat bootprep can now filter images provided by a product by using a prefix.\nThis is useful when specifying the base of an image in a bootprep input file. For more information, see Define IMS Images.\nTo support product names with hyphens, sat bootprep now converts hyphens to underscores within variables.\nFor more information, see Hyphens in HPC CSM Software Recipe Variables.\nIn sat bootprep input files, the value of the playbook property of CFS configuration layers can now be rendered with Jinja2 templates.\nFor more information, see Values Supporting Jinja2 Template Rendering.\nOutput was added to sat bootprep run that summarizes the CFS configurations, IMS images, and BOS session templates created.\nFor more information, see Summary of SAT Bootprep Results.\nImprovements were made to the sat bootprep output when CFS configuration and BOS session templates are created.\nChanges to sat bootsys A reboot subcommand was added to sat bootsys. It uses BOS to reboot nodes in the bos-operations stage. The --staged-session option was added to sat bootsys. It can be used to create staged BOS sessions. For more information, refer to Staging Changes with BOS in the Cray System Management Documentation. Changes to Other sat Commands When switching SAT versions with prodmgr, a version is no longer set as \u0026ldquo;active\u0026rdquo; in the product catalog. The \u0026ldquo;active\u0026rdquo; field was also removed from the output of sat showrev. Improvements were made to the performance of sat status when using BOS version two. New Install and Upgrade Framework The new Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products with the help of sat bootprep on HPE Cray EX systems managed by Cray System Management (CSM). IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product.\nBecause IUF now handles NCN personalization, information about this process was removed from the SAT documentation. Other sections in the documentation were also revised to support the new Install and Upgrade Framework. For example, the SAT Installation and SAT Upgrade sections of this guide now provide details on software and configuration content specific to SAT. The Cray System Management Documentation will indicate when these sections should be referred to for detailed information.\nFor more information on the relationship between sat bootprep and IUF, see SAT and IUF.\nNew Default BOS Version By default, SAT now uses version two of the Boot Orchestration Service (BOS). This change to BOS v2 impacts the following commands that interact with BOS:\nsat bootprep sat bootsys sat status To change the default to a different BOS version, see Change the BOS Version.\nSecurity Updated the version of certifi in the sat python package and CLI from 2021.10.8 to 2022.12.7 to resolve CVE-2022-23491. Updated the version of certifi in the sat-install-utility container image from 2021.5.30 to 2022.12.7 to resolve CVE-2022-23491. Updated the version of oauthlib from 3.2.1 to 3.2.2 to resolve CVE-2022-36087. Updated the version of cryptography from 36.0.1 to 39.0.1 to resolve CVE-2023-23931. Bug Fixes Fixed a bug that prevented sat init from creating a configuration file in the current directory when not prefixed with ./. Fixed a bug in which sat status failed with a traceback when using BOS version two and reported components whose most recent image did not exist. Fixed a build issue where the sat container could contain a different version of kubectl than the version found in CSM. Fixed error handling and improved command messages for sat bootprep and sat swap blade. " +}, +{ + "uri": "/docs-sat/en-26/usage/sat_bootprep/", + "title": "SAT Bootprep", + "tags": [], + "description": "", + "content": "SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates. The solution is based on a given input file that defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similar to other SAT commands.\n(ncn-m001#) Here is an example:\nsat-man sat-bootprep The sat bootprep command helps the Install and Upgrade Framework (IUF) install, upgrade, and deploy products on systems managed by CSM. Outside of IUF, it is uncommon to use sat bootprep. For more information on this relationship, see SAT and IUF. For more information on IUF, see the IUF section of the Cray System Management Documentation.\nSAT Bootprep vs SAT Bootsys sat bootprep is used to create CFS configurations, build and rename IMS images, and create BOS session templates which tie the configurations and images together during a BOS session.\nsat bootsys automates several portions of the boot and shutdown processes, including (but not limited to) performing BOS operations (such as creating BOS sessions), powering on and off cabinets, and checking the state of the system prior to shutdown.\nEdit a Bootprep Input File The input file provided to sat bootprep is a YAML-formatted file containing information which CFS, IMS, and BOS use to create configurations, images, and BOS session templates respectively. Writing and modifying these input files is the main task associated with using sat bootprep. An input file is composed of three main sections, one each for configurations, images, and session templates. These sections may be specified in any order, and any of the sections may be omitted if desired.\nProvide a Schema Version The sat bootprep input file is validated against a versioned schema definition. The input file should specify the version of the schema with which it is compatible under a schema_version key. For example:\n--- schema_version: 1.0.2 (ncn-m001#) The current sat bootprep input file schema version can be viewed with the following command:\nsat bootprep view-schema | grep \u0026#39;^version:\u0026#39; Example output:\nversion: \u0026#39;1.0.2\u0026#39; The sat bootprep run command validates the schema version specified in the input file. The command also makes sure that the schema version of the input file is compatible with the schema version understood by the current version of sat bootprep. For more information on schema version validation, refer to the schema_version property description in the bootprep input file schema. For more information on viewing the bootprep input file schema in either raw form or user-friendly HTML form, see View SAT Bootprep Schema.\nThe default HPC CSM Software Recipe bootprep input files provided by the hpc-csm-software-recipe release distribution already contain the correct schema version.\nDefine CFS Configurations The CFS configurations are defined under a configurations key. Under this key, list one or more configurations to create. For each configuration, give a name in addition to the list of layers that comprise the configuration.\nEach layer can be defined by a product name and optionally a version number, commit hash, or branch in the product\u0026rsquo;s configuration repository. If this method is used, the layer is created in CFS by looking up relevant configuration information (including the configuration repository and commit information) from the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be supplied. However, if it is absent, the version is assumed to be the latest version found in the cray-product-catalog.\nAlternatively, a configuration layer can be defined by explicitly referencing the desired configuration repository. Specify the intended version of the Ansible playbooks by providing a branch name or commit hash with branch or commit.\nThe following example shows a CFS configuration with two layers. The first layer is defined in terms of a product name and version, and the second layer is defined in terms of a Git clone URL and branch:\n--- configurations: - name: example-configuration layers: - name: example-product playbook: example.yml product: name: example version: 1.2.3 - name: another-example-product playbook: another-example.yml git: url: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34; branch: main When sat bootprep is run against an input file, a CFS configuration is created corresponding to each configuration in the configurations section. For example, the configuration created from an input file with the layers listed above might look something like the following:\n{ \u0026#34;lastUpdated\u0026#34;: \u0026#34;2022-02-07T21:47:49Z\u0026#34;, \u0026#34;layers\u0026#34;: [ { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;example.yml\u0026#34; }, { \u0026#34;cloneUrl\u0026#34;: \u0026#34;https://vcs.local/vcs/another-example-config-management.git\u0026#34;, \u0026#34;commit\u0026#34;: \u0026#34;\u0026lt;commit hash\u0026gt;\u0026#34;, \u0026#34;name\u0026#34;: \u0026#34;another example product\u0026#34;, \u0026#34;playbook\u0026#34;: \u0026#34;another-example.yml\u0026#34; } ], \u0026#34;name\u0026#34;: \u0026#34;example-configuration\u0026#34; } Define IMS Images The IMS images are defined under an images key. Under the images key, the user may define one or more images to be created in a list. Each element of the list defines a separate IMS image to be built and/or configured. Images must contain a name key and a base key.\nThe name key defines the name of the resulting IMS image. The base key defines the base image to be configured or the base recipe to be built and optionally configured. One of the following keys must be present under the base key:\nUse an ims key to specify an existing image or recipe in IMS. Use a product key to specify an image or recipe provided by a particular version of a product. If a product provides more than one image or recipe, specify a filter to select one. For more information, see Filter Base Images or Recipes from a Product. Use an image_ref key to specify another image from the input file using its ref_name. Images may also contain the following keys:\nUse a configuration key to specify a CFS configuration with which to customize the built image. If a configuration is specified, then configuration groups must also be specified using the configuration_group_names key. Use a ref_name key to specify a unique name that can refer to this image within the input file in other images or in session templates. The ref_name key allows references to images from the input file that have dynamically generated names as described in Dynamic Variable Substitutions. Use a description key to describe the image in the bootprep input file. Note that this key is not currently used. Use Base Images or Recipes from IMS Here is an example of an image using an existing IMS recipe as its base. This example builds an IMS image from that recipe. It then configures it with a CFS configuration named example-compute-config. The example-compute-config CFS configuration can be defined under the configurations key in the same input file, or it can be an existing CFS configuration. Running sat bootprep against this input file results in an image named example-compute-image.\nimages: - name: example-compute-image description: \u0026gt; An example compute node image built from an existing IMS recipe. base: ims: name: example-compute-image-recipe type: recipe configuration: example-compute-config configuration_group_names: - Compute Use Base Images or Recipes from a Product Here is an example showing the definition of two images. The first image is built from a recipe provided by the uss product. The second image uses the first image as a base and configures it with a configuration named example-compute-config. The value of the first image\u0026rsquo;s ref_name key is used in the second image\u0026rsquo;s base.image_ref key to specify it as a dependency. Running sat bootprep against this input file results in two images, the first named example-uss-image and the second named example-compute-image.\nimages: - name: example-uss-image ref_name: example-uss-image description: \u0026gt; An example image built from the recipe provided by the USS product. base: product: name: uss version: 1.0.0 type: recipe - name: example-compute-image description: \u0026gt; An example image that is configured from an image built from the recipe provided by the USS product. base: image_ref: example-uss-image configuration: example-compute-config configuration_group_names: - Compute This example assumes that the given version of the uss product provides only a single IMS recipe. If more than one recipe is provided by the given version of the uss product, use a filter as described in Filter Base Images or Recipes from a Product.\nFilter Base Images or Recipes from a Product A product may provide more than one image or recipe. If this happens, filter the product\u0026rsquo;s images or recipes whenever a base image or recipe from that product is used. Beneath the base.product value within an image, specify a filter key to create a filter using the following criteria:\nUse the prefix key to filter based on a prefix matching the name of the image or recipe. Use the wildcard key to filter based on a shell-style wildcard matching the name of the image or recipe. Use the arch key to filter based on the target architecture of the image or recipe in IMS. When specifying more than one filter key, all filters must match only the desired image or recipe. An error occurs if either no images or recipes match the given filters or if more than one image or recipe matches the given filters.\nHere is an example of three IMS images built from the Kubernetes image and the Ceph storage image provided by the csm product. This example uses a prefix filter to select from the multiple images provided by the CSM product. The first two IMS images in the example find any image from the specified csm product version whose name starts with secure-kubernetes. The third image in the example finds any csm image whose name starts with secure-storage-ceph. All three images are then configured with a configuration named example-management-config. Running sat bootprep against this input file results in three IMS images named worker-example-csm-image, master-example-csm-image, and storage-example-csm-image.\nimages: - name: worker-example-csm-image base: product: name: csm version: 1.4.1 type: image filter: prefix: secure-kubernetes configuration: example-management-config configuration_group_names: - Management_Worker - name: master-example-csm-image base: product: name: csm version: 1.4.1 type: image filter: prefix: secure-kubernetes configuration: example-management-config configuration_group_names: - Management_Master - name: storage-example-csm-image base: product: name: csm version: 1.4.1 type: image filter: prefix: secure-storage-ceph configuration: example-management-config configuration_group_names: - Management_Storage Here is an example of two IMS images built from recipes provided by the uss product. This example uses an architecture filter to select from the multiple recipes provided by the USS product. The first image will be built from the x86_64 version of the IMS recipe provided by the specified version of the uss product. The second image will be built from the aarch64 version of the IMS recipe provided by the specified version of the uss product.\nimages: - name: example-uss-image-x86_64 ref_name: example-uss-image-x86_64 description: \u0026gt; An example image built from the x86_64 recipe provided by the USS product. base: product: name: uss version: 1.0.0 type: recipe filter: arch: x86_64 - name: example-uss-image-aarch64 ref_name: example-uss-image-aarch64 description: \u0026gt; An example image built from the aarch64 recipe provided by the USS product. base: product: name: uss version: 1.0.0 type: recipe filter: arch: aarch64 Define BOS Session Templates The BOS session templates are defined under the session_templates key. Each session template must provide values for the name, image, configuration, and bos_parameters keys. The name key defines the name of the resulting BOS session template. The image key defines the image to use in the BOS session template. One of the following keys must be present under the image key:\nUse an ims key to specify an existing image or recipe in IMS. Use an image_ref key to specify another image from the input file using its ref_name. The configuration key defines the CFS configuration specified in the BOS session template.\nThe bos_parameters key defines parameters that are passed through directly to the BOS session template. The bos_parameters key should contain a boot_sets key, and each boot set in the session template should be specified under boot_sets. Each boot set can contain the following keys, all of which are optional:\nUse an arch key to specify the architecture of the nodes that should be targeted by the boot set. Valid values are the same as those used by Hardware State Manager (HSM). Use a kernel_parameters key to specify the parameters passed to the kernel on the command line. Use a network key to specify the network over which the nodes boot. Use a node_list key to specify the nodes to add to the boot set. Use a node_roles_groups key to specify the HSM roles to add to the boot set. Use a node_groups key to specify the HSM groups to add to the boot set. Use a rootfs_provider key to specify the root file system provider. Use a rootfs_provider_passthrough key to specify the parameters to add to the rootfs= kernel parameter. As mentioned above, the parameters under bos_parameters are passed through directly to BOS. For more information on the properties of a BOS boot set, refer to BOS Session Templates in the Cray System Management Documentation.\nHere is an example of a BOS session template that refers to an existing IMS image by name and targets nodes with the role Compute and the architecture X86 in HSM:\nsession_templates: - name: example-session-template image: ims: name: example-image configuration: example-configuration bos_parameters: boot_sets: example_boot_set: arch: X86 kernel_parameters: ip=dhcp quiet node_roles_groups: - Compute rootfs_provider: cpss3 rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0 Here is an example of a BOS session template that refers to an image from the input file by its ref_name and targets nodes with the role Compute and the architecture ARM in HSM. Note that using the image_ref key requires that an image defined in the input file specifies example-image as the value of its ref_name key.\nsession_templates: - name: example-session-template image: image_ref: example-image configuration: example-configuration bos_parameters: boot_sets: example_boot_set: arch: ARM kernel_parameters: ip=dhcp quiet node_roles_groups: - Compute rootfs_provider: cpss3 rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0 HPC CSM Software Recipe Variable Substitutions The sat bootprep command takes any variables provided and substitutes them into the input file. Variables are sourced from the command line, any variable files directly provided, and the HPC CSM Software Recipe files used, in that order. When providing values through a variable file, sat bootprep substitutes the values with Jinja2 template syntax. The HPC CSM Software Recipe provides default variables in a product_vars.yaml variable file. This file defines information about each HPC software product included in the recipe.\nVariables are primarily substituted into the default HPC CSM Software Recipe bootprep input files through IUF. However, variable files can also be given to sat bootprep directly from IUF\u0026rsquo;s use of the recipe. When using variables directly with sat bootprep, there are some limitations. For more information on SAT variable limitations, see SAT and IUF. For more information on IUF and variable substitutions, see the IUF section of the Cray System Management Documentation.\nSelect an HPC CSM Software Recipe Version View a listing of the default HPC CSM Software Recipe variables and their values by running sat bootprep list-vars. For more information on options that can be used with the list-vars subcommand, refer to the man page for the sat bootprep subcommand.\nBy default, the sat bootprep command uses the variables from the latest installed version of the HPC CSM Software Recipe. Override this with the --recipe-version command line argument to sat bootprep run.\n(ncn-m001#) For example, to explicitly select the 22.11.0 version of the HPC CSM Software Recipe default variables, specify --recipe-version 22.11.0:\nsat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml Values Supporting Jinja2 Template Rendering The entire sat bootprep input file is not rendered by the Jinja2 template engine. Jinja2 template rendering of the input file is performed individually for each supported value. The values of the following keys in the bootprep input file support rendering as a Jinja2 template and thus support variables:\nThe name key of each configuration under the configurations key. The following keys of each layer under the layers key in a configuration: name playbook git.branch product.version product.branch The following keys of each image under the images key: name base.product.version base.product.filter.arch base.product.filter.prefix base.product.filter.wildcard configuration The following keys of each session template under the session_templates key: name configuration You can use Jinja2 built-in filters in values of any of the keys listed above. In addition, Python string methods can be called on the string variables.\nHyphens in HPC CSM Software Recipe Variables Variable names with hyphens are not allowed in Jinja2 expressions because they are parsed as an arithmetic expression instead of a single variable. To support product names with hyphens, sat bootprep converts hyphens to underscores in all top-level keys of the default HPC CSM Software Recipe variables. It also converts any variables sourced from the command line or any variable files provided directly. When referring to a variable with hyphens in the bootprep input file, keep this in mind. For example, to refer to the product version variable for slingshot-host-software in the bootprep input file, write \u0026quot;{{slingshot_host_software.version}}\u0026quot;.\nHPC CSM Software Recipe Variable Substitution Example The following example bootprep input file shows how a variable of a USS version can be used in an input file that creates a CFS configuration for computes. Only one layer is shown for brevity.\n--- configurations: - name: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; layers: - name: uss-compute-{{uss.working_branch}} playbook: cos-compute.yml product: name: uss version: \u0026#34;{{uss.version}}\u0026#34; branch: \u0026#34;{{uss.working_branch}}\u0026#34; Note: When the value of a key in the bootprep input file is a Jinja2 expression, it must be quoted to pass YAML syntax checking.\nJinja2 expressions can also use filters and Python\u0026rsquo;s built-in string methods to manipulate the variable values. For example, suppose only the major and minor components of a USS version are to be used in the branch name for the USS layer of the CFS configuration. Use the split string method to achieve this as follows:\n--- configurations: - name: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; layers: - name: uss-compute-{{uss.working_branch}} playbook: cos-compute.yml product: name: uss version: \u0026#34;{{uss.version}}\u0026#34; branch: integration-{{uss.version.split(\u0026#39;.\u0026#39;)[0]}}-{{uss.version.split(\u0026#39;.\u0026#39;)[1]}} Dynamic Variable Substitutions Additional variables are available besides the default variables provided by the HPC CSM Software Recipe. (For more information, see HPC CSM Software Recipe Variable Substitutions.) These additional variables are dynamic because their values are determined at run-time based on the context in which they appear. Available dynamic variables include the following:\nThe variable base.name can be used in the name of an image under the images key. The value of this variable is the name of the IMS image or recipe used as the base of this image.\nThe variable image.name can be used in the name of a session template under the session_templates key. The value of this variable is the name of the IMS image used in this session template.\nNote: The name of a session template is restricted to 45 characters. Keep this in mind when using image.name in the name of a session template.\nThese variables reduce the need to duplicate values throughout the sat bootprep input file and make the following use cases possible:\nBuilding an image from a recipe provided by a product and using the name of the recipe in the name of the resulting image Using the name of the image in the name of a session template when the image is generated as described in the previous use case Example Bootprep Input Files This section provides an example bootprep input file. It also gives instructions for obtaining the default bootprep input files delivered with a release of the HPC CSM Software Recipe.\nExample Bootprep Input File The following bootprep input file provides an example of using most of the features described in previous sections. It is not intended to be a complete bootprep file for the entire CSM product.\n--- configurations: - name: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; layers: - name: uss-compute-{{uss.working_branch}} playbook: cos-compute.yml product: name: uss version: \u0026#34;{{uss.version}}\u0026#34; branch: \u0026#34;{{uss.working_branch}}\u0026#34; - name: cpe-pe_deploy-{{cpe.working_branch}} playbook: pe_deploy.yml product: name: cpe version: \u0026#34;{{cpe.version}}\u0026#34; branch: \u0026#34;{{cpe.working_branch}}\u0026#34; images: - name: \u0026#34;{{default.note}}{{base.name}}{{default.suffix}}\u0026#34; ref_name: base_uss_image base: product: name: uss type: recipe version: \u0026#34;{{uss.version}}\u0026#34; - name: \u0026#34;compute-{{base.name}}\u0026#34; ref_name: compute_image base: image_ref: base_uss_image configuration: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; configuration_group_names: - Compute session_templates: - name: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; image: image_ref: compute_image configuration: \u0026#34;{{default.note}}compute-{{recipe.version}}{{default.suffix}}\u0026#34; bos_parameters: boot_sets: compute: kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN} node_roles_groups: - Compute rootfs_provider_passthrough: \u0026#34;dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0\u0026#34; Access Default Bootprep Input Files Default bootprep input files are delivered by the HPC CSM Software Recipe product. Access these files by cloning the hpc-csm-software-recipe repository, as described in the Accessing sat bootprep files process of the Cray System Management Documentation.\n(ncn-m001#) Find the default input files in the bootprep directory of the cloned repository:\nls bootprep/ Generate an Example Bootprep Input File The sat bootprep generate-example command was not updated for recent bootprep schema changes. It is recommended to instead use the default bootprep input files described in Access Default Bootprep Input Files. The sat bootprep generate-example command will be updated in a future release of SAT.\nSummary of SAT Bootprep Results The sat bootprep run command uses information from the bootprep input file to create CFS configurations, IMS images, and BOS session templates. For easy reference, the command also includes output summarizing the final creation results.\n(ncn-m001#) Here is a sample table output after running sat bootprep run:\n################################################################################ CFS configurations ################################################################################ +------------------+ | name | +------------------+ | example-config-1 | | example-config-2 | +------------------+ ################################################################################ IMS images ################################################################################ +---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+ | name | preconfigured_image_id | final_image_id | configuration | configuration_group_names | +---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+ | example-image | c1bcaf00-109d-470f-b665-e7b37dedb62f | a22fb912-22be-449b-a51b-081af2d7aff6 | example-config | Compute | +---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+ ################################################################################ BOS session templates ################################################################################ +------------------+----------------+ | name | configuration | +------------------+----------------+ | example-template | example-config | +------------------+----------------+ View SAT Bootprep Schema The contents of the YAML input files used by sat bootprep must conform to a schema which defines the structure of the data. The schema definition is written using the JSON Schema format. (Although the format is named \u0026ldquo;JSON Schema\u0026rdquo;, the schema itself is written in YAML as well.) More information, including introductory materials and a formal specification of the JSON Schema metaschema, can be found on the JSON Schema website.\nView the Exact Schema Specification (ncn-m001#) To view the exact schema specification, run sat bootprep view-schema.\nsat bootprep view-schema --- $schema: \u0026#34;https://json-schema.org/draft/2020-12/schema\u0026#34; Example output:\ntitle: Bootprep Input File description: \u0026gt; A description of the set of CFS configurations to create, the set of IMS images to create and optionally customize with the defined CFS configurations, and the set of BOS session templates to create that reference the defined images and configurations. type: object additionalProperties: false properties: ... Generate User-Friendly Documentation The raw schema definition can be difficult to understand without experience working with JSON Schema specifications. For this reason, a feature is included with sat bootprep that generates user-friendly HTML documentation for the input file schema. This HTML documentation can be browsed with a web browser.\n(ncn-m001#) Create a documentation tarball using sat bootprep.\nsat bootprep generate-docs Example output:\nINFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz An alternate output directory can be specified with the --output-dir option. The generated tarball is always named bootprep-schema-docs.tar.gz.\nsat bootprep generate-docs --output-dir /tmp Example output:\nINFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz (user@hostname\u0026gt;) From another machine, copy the tarball to a local directory.\nscp root@ncn-m001:bootprep-schema-docs.tar.gz . (user@hostname\u0026gt;) Extract the contents of the tarball and open the contained index.html.\ntar xzvf bootprep-schema-docs.tar.gz Example output:\nx bootprep-schema-docs/ x bootprep-schema-docs/index.html x bootprep-schema-docs/schema_doc.css x bootprep-schema-docs/schema_doc.min.js another-machine$ open bootprep-schema-docs/index.html " +}, +{ + "uri": "/docs-sat/en-26/about_sat/sat_in_csm/", + "title": "SAT in CSM", + "tags": [], + "description": "", + "content": "SAT in CSM In CSM 1.3 and newer, the sat command is automatically available on the Kubernetes control plane, but it is still possible to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the sat command available in CSM. Installing the SAT product stream allows additional supporting components to be added:\nAn entry for SAT in the cray-product-catalog Kubernetes ConfigMap is only created by installing the SAT product stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev.\nThe sat-install-utility container image is only available with the full SAT product stream. This container image provides uninstall and downgrade functionality when used with the prodmgr command. (In SAT 2.3 and older, SAT was only available to install as a separate product stream. Because these versions were packaged with sat-install-utility, it is still possible to uninstall these versions of SAT.)\nThe docs-sat RPM package is only available with the full SAT product stream.\nThe sat-config-management git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is only available with the full SAT product stream.\nIf the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS configurations that apply to management NCNs (for example, management-23.5.0) should not include a SAT layer.\nThe SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the Keycloak username that authenticates to the API gateway cannot be read by users other than root. Specifically, it does the following:\nModifies the sat.toml configuration file which contains the username so that it is only readable by root.\nModifies the /root/.config/sat/tokens directory so that the directory is only readable by root. This is needed because the names of the files within the tokens directory contain the username.\nRegardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other users. These permission changes only apply to files created by previous installations of SAT. In the current version of SAT all files and directories are created with the appropriate permissions.\n" +}, +{ + "uri": "/docs-sat/en-26/release_notes/sat_2.6_release_notes/", + "title": "Changes in SAT 2.6", + "tags": [], + "description": "", + "content": "Changes in SAT 2.6 The 2.6.14 version of the SAT product includes:\nVersion 3.25.10 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.2 of the sat-install-utility container image. New sat Commands No new sat commands were added in SAT 2.6.\nChanges to sat bootsys Functionality was added to the platform-services and cabinet-power stages of sat bootsys boot. This allows SAT to automatically recreate Kubernetes CronJobs that may have become stuck during shutdown, boot, or reboot.\nsat bootsys boot more reliably determines if the hms-discovery CronJob was scheduled during the cabinet-power stage.\nSAT now uses the BatchV1 Kubernetes API to manipulate CronJobs instead of the BatchV1Beta1 API.\nsat bootsys now logs the ID of all BOS sessions when performing BOS operations. A warning is logged for any BOS sessions with failed components.\nSupport for the Compute Rolling Upgrade Service (CRUS) has been removed, and the sat bootsys command will no longer interact with CRUS.\nThe bos-operations stage of sat bootsys no longer checks whether BOS session templates need any operations to be performed before creating a BOS session. BOS instead determines whether the session will need to boot or shut down any nodes to reach the desired state.\nChanges to sat bootprep Wildcard matching was added for images in sat bootprep input files. Use wildcards similar to how prefix filters were used in older versions of SAT. For more information, see Define IMS Images.\nSupport for multiple architectures was added to sat bootprep. It is now possible to filter base IMS images and recipes from products based on their target architecture. This support also allows specifying target architectures in boot sets of BOS session templates. For more information, see Filter Base Images or Recipes from a Product and Define BOS Session Templates.\nWhen specifying a base image or recipe from a product, sat bootprep can combine multiple image or recipe filters. When specifying multiple filters, the unique base image or recipe that satisfies all of the given filters is selected. An error occurs if either no images or recipes match the given filters or if more than one image or recipe matches the given filters.\nIn CFS configuration layers, support was added for the new imsRequireDkms field under the specialParameters section. CFS configurations in bootprep input files can specify an ims_require_dkms field in a new, optional special_parameters section for each layer.\nOther SAT Changes The SAT Kibana and Grafana dashboards were moved to the System Monitoring Application (SMA) beside other dashboards. For more information on how to view these dashboards going forward, see the HPE Cray EX System Monitoring Application Administration Guide (S-8029).\nAdd the new s3.cert_verify option to the SAT configuration file to control whether certificate verification is performed when accessing S3.\nLog messages spanning multiple lines now print the log level on each line instead of only at the beginning of the message.\nWhen certificate verification is disabled for CSM API requests, only a single warning now prints at the beginning of SAT\u0026rsquo;s invocation instead of for each request.\nsat swap blade more reliably determines if the hms-discovery CronJob was scheduled when enabling a blade following a hardware swap.\nsat swap blade will use the BatchV1 Kubernetes API to manipulate CronJobs, instead of the BatchV1Beta1 API as previously.\nCommand prompts in this guide are now inserted into text before the fenced code block instead of inside of it. This is a change from the documentation of SAT 2.5 and earlier. In addition, two new command prompts were added for better clarity. For more information, see Command Prompt Conventions in SAT.\nMulti-tenancy Support SAT 2.6 supports supplying tenant information to CSM services in order to allow tenant admins to use SAT within their tenant. For more information, see Configure multi-tenancy.\nSecurity Updated the version of cryptography from 36.0.1 to 41.0.0 to resolve CVE-2023-2650.\nUpdated the version of requests from 2.27.1 to 2.31.0 to resolve CVE-2023-32681.\nUpdated the version of curl/libcurl from 7.80.0-r6 to 8.1.2-r0 to address CVE-2023-27536.\nBug Fixes Improved extreme slowness in the platform-services stage of sat bootsys shutdown in cases where a large known_hosts file is used on the host where SAT is running.\nFixed a bug that caused the wrong container name to be logged when CFS configuration sessions failed on newer CSM systems.\n" +}, +{ + "uri": "/docs-sat/en-26/about_sat/view_sat_docs/", + "title": "View SAT Documentation", + "tags": [], + "description": "", + "content": "View SAT Documentation View the System Admin Toolkit (SAT) documentation both online and offline by using the information in this section.\nOnline Documentation The SAT documentation can be found online in HTML form at the following link: SAT Documentation. The navigation pane on the left of the HTML page orders topics alphabetically. Navigate an individual topic\u0026rsquo;s headings by using the Headings icon at the top of the page, as shown in the following images.\nThe documentation can also be viewed online in GitHub by navigating to the docs/ subdirectory of the docs-sat repository. Navigate an individual topic\u0026rsquo;s headings with a similar Headings icon at the top of the page, as shown in the following images.\nOffline Documentation The SAT documentation is available offline as markdown, which can be viewed with a markdown viewer or with a text editor. The offline documentation is available in the docs/ directory of the SAT release distribution as well as in RPM package format. The RPM package is installed as a part of the Ansible plays launched by the Configuration Framework Service (CFS). Its files are installed to /usr/share/doc/sat.\n" +}, +{ + "uri": "/docs-sat/en-26/release_notes/shasta_1.3.2_release_notes/", + "title": "SAT Changes in Shasta v1.3.2", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.\nThe following sections detail the changes in this release.\nsat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable.\nThe sat swap switch command is equivalent to sat switch. The sat switch command will be removed in a future release.\nAddition of Stages to sat bootsys Command The sat bootsys command now has multiple stages for both the boot and shutdown actions. Please refer to the \u0026ldquo;System Power On Procedures\u0026rdquo; and \u0026ldquo;System Power Off Procedures\u0026rdquo; sections of the Cray Shasta Administration Guide (S-8001) for more details on using this command in the context of a full system power off and power on.\n" +}, +{ + "uri": "/docs-sat/en-26/external_system/", + "title": "SAT on an External System", + "tags": [], + "description": "", + "content": "SAT on an External System SAT can optionally be installed and configured on an external system to interact with CSM over the CAN.\nLimitations Most SAT subcommands work by accessing APIs which are reachable via the CAN. However, certain SAT commands depend on host-based functionality on the management NCNs and will not work from an external system. This includes the following:\nThe platform-services and ncn-power stages of sat bootsys The local host information displayed by the --local option of sat showrev Installing SAT on an external system is not an officially supported configuration. These instructions are provided \u0026ldquo;as-is\u0026rdquo; with the hope that they can be useful for users who desire additional flexibility.\nCertain additional steps may need to be taken to install and configure SAT depending on the configuration of the external system in use. These additional steps may include provisioning virtual machines, installing packages, or configuring TLS certificates, and these steps are outside the scope of this documentation. This section covers only the steps needed to configure SAT to use externally-accessible API endpoints exposed by CSM.\nInstall and Configure SAT Prerequisites The external system must be on the Customer Access Network (CAN). Python 3.7 or newer is installed on the system. kubectl, openssh, git, and curl are installed on the external system. The root CA certificates used when installing CSM have been added to the external system\u0026rsquo;s trust store such that authenticated TLS connections can be made to the CSM REST API gateway. For more information, refer to Certificate Authority in the Cray System Management Documentation. Procedure (user@hostname\u0026gt;) Create a Python virtual environment.\nSAT_VENV_PATH=\u0026#34;$(pwd)/venv\u0026#34; python3 -m venv ${SAT_VENV_PATH} . ${SAT_VENV_PATH}/bin/activate ((venv) user@hostname\u0026gt;) Clone the SAT source code.\nTo use SAT version 3.21, this example clones the release/3.21 branch of Cray-HPE/sat.\ngit clone --branch=release/3.21 https://github.com/Cray-HPE/sat.git Set up the SAT CSM Python dependencies to be installed from their source code.\nSAT CSM Python dependency packages are not currently distributed publicly as source packages or binary distributions. They must be installed from their source code hosted on GitHub. Also, to install the cray-product-catalog Python package, first clone it locally. Use the following steps to modify the SAT CSM Python dependencies so they can be installed from their source code.\n((venv) user@hostname\u0026gt;) Clone the source code for cray-product-catalog.\ngit clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog ((venv) user@hostname\u0026gt;) In the cray-product-catalog directory, create a file named .version that contains the version of cray-product-catalog.\necho 1.6.0 \u0026gt; cray-product-catalog/.version ((venv) user@hostname\u0026gt;) Open the \u0026ldquo;locked\u0026rdquo; requirements file in a text editor.\nvim sat/requirements.lock.txt Update the line containing cray-product-catalog so that it reflects the local path to cray-product-catalog.\nIt should read as follows:\n./cray-product-catalog For versions of SAT newer than 3.19, change the line containing csm-api-client to read as follows.\ncsm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1 (Optional) ((venv) user@hostname\u0026gt;) Confirm that requirements.lock.txt is modified as expected.\ngrep -E \u0026#39;cray-product-catalog|csm-api-client\u0026#39; sat/requirements.lock.txt Example output:\n./cray-product-catalog csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1 Note: For versions newer than 3.19, the output will show both cray-product-catalog and csm-api-client. For version 3.19 and older, the output will only show cray-product-catalog.\n((venv) user@hostname\u0026gt;) Install the modified SAT dependencies.\npip install -r sat/requirements.lock.txt ((venv) user@hostname\u0026gt;) Install the SAT Python package.\npip install ./sat (Optional) ((venv) user@hostname\u0026gt;) Add the sat virtual environment to the user\u0026rsquo;s PATH environment variable.\nIf a shell other than bash is in use, replace ~/.bash_profile with the appropriate profile path.\nIf the virtual environment is not added to the user\u0026rsquo;s PATH environment variable, then source ${SAT_VENV_PATH}/bin/activate will need to be run before running any SAT commands.\ndeactivate echo export PATH=\\\u0026#34;${SAT_VENV_PATH}/bin:${PATH}\\\u0026#34; \u0026gt;\u0026gt; ~/.bash_profile source ~/.bash_profile (user@hostname\u0026gt;) Copy the file /etc/kubernetes/admin.conf from ncn-m001 to ~/.kube/config on the external system.\nNote that this file contains credentials to authenticate against the Kubernetes API as the administrative user, so it should be treated as sensitive.\nmkdir -p ~/.kube scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config\\ Example output:\nadmin.conf 100% 5566 3.0MB/s 00:00 (user@hostname\u0026gt;) Find the CAN IP address on ncn-m001 to determine the corresponding kubernetes hostname.\nOn CSM 1.2 and newer, query the IP address of the bond0.cmn0 interface.\nssh ncn-m001 ip addr show bond0.cmn0 Example output:\n13: bond0.cmn0@bond0: \u0026lt;BROADCAST,MULTICAST,UP,LOWER_UP\u0026gt; mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007 valid_lft forever preferred_lft forever inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link valid_lft forever preferred_lft forever On CSM versions prior to 1.2, query the IP address of the vlan007 interface.\nssh ncn-m001 ip addr show vlan007 Example output:\n13: vlan007@bond0: \u0026lt;BROADCAST,MULTICAST,UP,LOWER_UP\u0026gt; mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007 valid_lft forever preferred_lft forever inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link valid_lft forever preferred_lft forever (user@hostname\u0026gt;) Set the IP_ADDRESS variable to the value found in the previous step.\nIP_ADDRESS=10.102.1.11 (user@hostname\u0026gt;) Add an entry to /etc/hosts mapping the IP address to the hostname kubernetes.\necho \u0026#34;${IP_ADDRESS} kubernetes\u0026#34; | sudo tee -a /etc/hosts 10.102.1.11 kubernetes (user@hostname\u0026gt;) Modify ~/.kube/config to set the cluster server address.\nThe value of the server key for the kubernetes cluster under the clusters section should be set to https://kubernetes:6443.\n--- clusters: - cluster: certificate-authority-data: REDACTED server: https://kubernetes:6443 name: kubernetes ... (user@hostname\u0026gt;) Confirm that kubectl can access the CSM Kubernetes cluster.\nkubectl get nodes Example output:\nNAME STATUS ROLES AGE VERSION ncn-m001 Ready master 135d v1.19.9 ncn-m002 Ready master 136d v1.19.9 ncn-m003 Ready master 136d v1.19.9 ncn-w001 Ready \u0026lt;none\u0026gt; 136d v1.19.9 ncn-w002 Ready \u0026lt;none\u0026gt; 136d v1.19.9 ncn-w003 Ready \u0026lt;none\u0026gt; 136d v1.19.9 (user@hostname\u0026gt;) Use sat init to create a configuration file for SAT.\nsat init Example output:\nINFO: Configuration file \u0026#34;/home/user/.config/sat/sat.toml\u0026#34; generated. (user@hostname\u0026gt;) Copy the platform CA certificates from the management NCN and configure the certificates for use with SAT.\nIf a shell other than bash is in use, replace ~/.bash_profile with the appropriate profile path.\nscp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt . echo export REQUESTS_CA_BUNDLE=\\\u0026#34;$(realpath platform-ca-certs.crt)\\\u0026#34; \u0026gt;\u0026gt; ~/.bash_profile source ~/.bash_profile Edit the SAT configuration file to set the API and S3 hostnames.\nExternally available API endpoints are given domain names in PowerDNS, so the endpoints in the configuration file should each be set to the format subdomain.system-name.site-domain. Here system-name and site-domain are replaced with the values specified during csi config init, and subdomain is the DNS name for the externally available service. For more information, refer to Externally Exposed Services in the Cray System Management Documentation.\nThe API gateway has the subdomain api, and S3 has the subdomain s3. The S3 endpoint runs on port 8080. The following options should be set in the SAT configuration file.\n[api_gateway] host = \u0026#34;api.system-name.site-domain\u0026#34; [s3] endpoint = \u0026#34;http://s3.system-name.site-domain:8080\u0026#34; Edit the SAT configuration file to specify the Keycloak user who will be accessing the REST API.\n[api_gateway] username = \u0026#34;user\u0026#34; (user@hostname\u0026gt;) Run sat auth, and enter the password when prompted.\nThe admin account used to authenticate with sat auth must be enabled in Keycloak and must have its assigned role set to admin.\nsat auth Example output:\nPassword for user: Succeeded! For more information on authentication types and authentication credentials, see SAT Command Authentication. For more information on Keycloak accounts and changing Role Mappings, refer to both Configure Keycloak Account and Create Internal User Accounts in the Keycloak Shasta Realm in the Cray System Management Documentation.\n(user@hostname\u0026gt;) Ensure the files are readable only by the current user.\ntouch ~/.config/sat/s3_access_key \\ ~/.config/sat/s3_secret_key chmod 600 ~/.config/sat/s3_access_key \\ ~/.config/sat/s3_secret_key (user@hostname\u0026gt;) Write the credentials to local files using kubectl.\nGenerate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT S3 bucket, the user must generate the S3 access key and secret keys and write them to a local file. SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev.\nkubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.access_key}\u0026#39; | base64 -d \u0026gt; \\ ~/.config/sat/s3_access_key kubectl get secret sat-s3-credentials -o json -o \\ jsonpath=\u0026#39;{.data.secret_key}\u0026#39; | base64 -d \u0026gt; \\ ~/.config/sat/s3_secret_key " +}, +{ + "uri": "/docs-sat/en-26/release_notes/shasta_1.3_release_notes/", + "title": "SAT Changes in Shasta v1.3", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI.\nThis version of the sat CLI contained the following commands:\nauth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch For more information on each of these commands, see the SAT Command Overview and the table of commands in the SAT Command Authentication section of this document.\n" +}, +{ + "uri": "/docs-sat/en-26/release_notes/shasta_1.4.1_release_notes/", + "title": "SAT Changes in Shasta v1.4.1", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1.\nThis version of the SAT product included:\nVersion 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release.\nNew Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames:\nsat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API.\nBug Fixes Fixed a problem in sat swap where creating the offline port policy failed. Changed sat bootsys shutdown --stage bos-operations to no longer forcefully power off all compute nodes and application nodes using CAPMC when BOS sessions complete or time out. Fixed an issue with the command sat bootsys boot --stage cabinet-power. " +}, +{ + "uri": "/docs-sat/en-26/release_notes/shasta_1.4_release_notes/", + "title": "SAT Changes in Shasta v1.4", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4.\nThis version of the SAT product included the following components:\nVersion 3.4.0 of the sat python package and CLI It also added the following new component:\nVersion 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release.\nSAT as an Independent Product SAT is now packaged and released as an independent product. The product deliverable is called a \u0026ldquo;release distribution\u0026rdquo;. The release distribution is a gzipped tar file containing an install script. This install script loads the cray/cray-sat container image into the Docker registry in Nexus and loads the cray-sat-podman RPM into a package repository in Nexus.\nIn this release, the cray-sat-podman package is still installed in the master and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in Shasta v1.5.\nSAT Running in a Container Under Podman The sat command now runs in a container under Podman. The sat executable is now installed on all nodes in the Kubernetes cluster (workers and control plane nodes). This executable is a wrapper script that starts a SAT container in Podman and invokes the sat Python CLI within that container. The admin can run individual sat commands directly on the master or worker NCNs as before, or they can run sat commands inside the SAT container after using sat bash to enter an interactive shell inside the SAT container.\nTo view man pages for sat commands, the user can run sat-man SAT_COMMAND, replacing SAT_COMMAND with the name of the sat command. Alternatively, the user can enter the sat container with sat bash and use the man command.\nNew sat init Command and Configuration File Location Change The default location of the SAT configuration file has been changed from /etc/sat.toml to ~/.config/sat/sat.toml. A new command, sat init, has been added that initializes a configuration file in the new default directory. This better supports individual users on the system who want their own configuration files.\n~/.config/sat is mounted into the container that runs under Podman, so changes are persistent across invocations of the sat container. If desired, an alternate configuration directory can be specified with the SAT_CONFIG_DIR environment variable.\nAdditionally, if a configuration file does not yet exist when a user runs a sat command, one is generated automatically.\nAdditional Types Added to sat hwinv Additional functionality has been added to sat hwinv including:\nList node enclosure power supplies with the --list-node-enclosure-power-supplies option. List node accelerators (for example, GPUs) with the --list-node-accels option. The count of node accelerators is also included for each node. List node accelerator risers (for example, Redstone modules) with the --list-node-accel-risers option. The count of node accelerator risers is also included for each node. List High-Speed Node Network Interface Cards (HSN NICs) with the --list-node-hsn-nics option. The count of HSN NICs is also included for each node. Documentation for these new options has been added to the man page for sat hwinv.\nSite Information Stored by sat setrev in S3 The sat setrev and sat showrev commands now use S3 to store and obtain site information, including system name, site name, serial number, install date, and system type. Since the information is stored in S3, it will now be consistent regardless of the node on which sat is executed.\nAs a result of this change, S3 credentials must be configured for SAT. For more information, see Generate SAT S3 Credentials.\nProduct Version Information Shown by sat showrev sat showrev now shows product information from the cray-product-catalog ConfigMap in Kubernetes.\nAdditional Changes to sat showrev The output from sat showrev has also been changed in the following ways:\nThe --docker and --packages options were considered misleading and have been removed. Information pertaining to only to the local host, where the command is run, has been moved to the output of the --local option. Removal of sat cablecheck The sat cablecheck command has been removed. To verify that the system\u0026rsquo;s Slingshot network is cabled correctly, admins should now use the show cables command in the Slingshot Topology Tool (STT).\nsat swap Command Compatibility with Next-gen Fabric Controller The sat swap command was added in Shasta v1.3.2. This command used the Fabric Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the Fabric Controller API, so this command has been rewritten to use the new backwards-incompatible API. Usage of the command did not change.\nsat bootsys Functionality Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken by changes introduced in Shasta v1.4, which removed the Ansible inventory and playbooks.\nThe functionality in the platform-services stage of sat bootsys has been re-implemented to use python directly instead of Ansible. This resulted in a more robust procedure with better logging to the sat log file. Failures to stop containers on Kubernetes nodes are handled more gracefully, and more information about the containers that failed to stop, including how to debug the problem, is included.\nImprovements were made to console logging setup for non-compute nodes (NCNs) when they are shut down and booted.\nThe following improvements were made to the bos-operations stage of sat bootsys:\nMore information about the BOS sessions, BOA jobs, and BOA pods is printed. A command-line option, --bos-templates, and a corresponding configuration file option, bos_templates, were added, and the --cle-bos-template and --uan-bos-template options and their corresponding configuration file options were deprecated. The following functionality has been removed from sat bootsys:\nThe hsn-bringup stage of sat bootsys boot has been removed due to removal of the underlying Ansible playbook. The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is now a manual procedure. Log File Location Change The location of the sat log file has changed from /var/log/cray/sat.log to /var/log/cray/sat/sat.log. This change simplifies mounting this file into the sat container running under Podman.\n" +}, +{ + "uri": "/docs-sat/en-26/release_notes/shasta_1.5_release_notes/", + "title": "SAT Changes in Shasta v1.5", + "tags": [], + "description": "", + "content": "SAT Changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5.\nThis version of the SAT product included:\nVersion 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component:\nVersion 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release.\nInstall Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. The cray-sat-podman RPM is no longer installed in the management non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on all master management NCNs via an Ansible playbook which is referenced by a layer of the CFS configuration that applies to management NCNs. This CFS configuration is typically named ncn-personalization.\nThe SAT product now includes a Docker image and a Helm chart named sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart with Loftsman. This helm chart deploys a Kubernetes job that imports the SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. This repository is referenced by the layer added to the NCN personalization CFS configuration.\nRemoval of Direct Redfish Access All commands which used to access Redfish directly have either been removed or modified to use higher-level service APIs. This includes the following commands:\nsat sensors sat diag sat linkhealth The sat sensors command has been rewritten to use the SMA telemetry API to obtain the latest sensor values. The command\u0026rsquo;s usage has changed slightly, but legacy options work as before, so it is backwards compatible. Additionally, new commands have been added.\nThe sat diag command has been rewritten to use a new service called Fox, which is delivered with the CSM-Diags product. The sat diag command now launches diagnostics using the Fox service, which launches the corresponding diagnostic programs on controllers using the Hardware Management Job and Task Daemon (HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start diagnostics over Redfish.\nThe sat linkhealth command has been removed. Its functionality has been replaced by functionality from the Slingshot Topology Tool (STT) in the fabric manager pod.\nThe Redfish username and password command line options and configuration file options have been removed. For more information, see Remove Obsolete Configuration File Sections.\nAdditional Fields in sat setrev and sat showrev sat setrev now collects the following information from the admin, which is then displayed by sat showrev:\nSystem description Product number Company name Country code Additional guidance and validation has been added to each field collected by sat setrev. This sets the stage for sdu setup to stop collecting this information and instead collect it from sat showrev or its S3 bucket.\nImprovements to sat bootsys The platform-services stage of the sat bootsys boot command has been improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph health in the correct order. The ceph-check stage has been removed as it is no longer needed.\nThe platform-services stage of sat bootsys boot now prompts for confirmation of the storage NCN hostnames in addition to the Kubernetes control plane and worker nodes.\nBug Fixes and Security Fixes Improved error handling in sat firmware. Incremented version of Alpine Linux to 3.13.2 to address a security vulnerability. Other Notable Changes Ansible has been removed from the cray-sat container image. Support for the Firmware Update Service (FUS) has been removed from the sat firmware command. " +}, +{ + "uri": "/docs-sat/en-26/uninstall_and_downgrade/", + "title": "SAT Uninstall and Downgrade", + "tags": [], + "description": "", + "content": "SAT Uninstall and Downgrade Uninstall: Remove a Version of SAT This procedure can be used to uninstall a version of SAT.\nPrerequisites Only versions 2.2 or newer of SAT can be uninstalled with prodmgr. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure (ncn-m001#) Use sat showrev to list versions of SAT.\nsat showrev --products --filter product_name=sat Example output:\n############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+-------------------+-----------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+-------------------+-----------------------+ | sat | 2.3.3 | - | - | | sat | 2.2.10 | - | - | +--------------+-----------------+-------------------+-----------------------+ (ncn-m001#) Use prodmgr to uninstall a version of SAT.\nThis command will do three things:\nRemove all hosted-type package repositories associated with the given version of SAT. Group-type repositories are not removed. Remove all container images associated with the given version of SAT. Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up in the output of sat showrev. prodmgr uninstall sat 2.2.10 Example output:\nRepository sat-2.2.10-sle-15sp2 has been removed. Removed Docker image cray/cray-sat:3.9.0 Removed Docker image cray/sat-cfs-install:1.0.2 Removed Docker image cray/sat-install-utility:1.4.0 Deleted sat-2.2.10 from product catalog. Downgrade: Switch Between SAT Versions This procedure can be used to downgrade the active version of SAT.\nNote: The prodmgr activate command is deprecated in SAT 2.6, and the ability to switch between SAT versions will be removed in a future release.\nPrerequisites Only versions 2.2 or newer of SAT can be switched. Older versions must be switched manually. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure (ncn-m001#) Use sat showrev to list versions of SAT.\nsat showrev --products --filter product_name=sat Example output:\n############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+--------------------+-----------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+--------------------+-----------------------+ | sat | 2.3.3 | - | - | | sat | 2.2.10 | - | - | +--------------+-----------------+--------------------+-----------------------+ (ncn-m001#) Use prodmgr to switch to a different version of SAT.\nThis command will do two things:\nFor all hosted-type package repositories associated with this version of SAT, set them as the sole member of their corresponding group-type repository. For example, switching to SAT version 2.2.10 sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group. Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are associated with NCNs with the role \u0026ldquo;Management\u0026rdquo; and subrole \u0026ldquo;Master\u0026rdquo; (for example, the CFS configuration management-23.5.0). Specifically, it will ensure that the layer refers to the version of SAT CFS configuration content associated with the version of SAT to which the system is switching. prodmgr activate sat 2.5.15 Example output:\nRepository sat-2.5.15-sle-15sp4 is now the default in sat-sle-15sp4. Updated CFS configurations: [management-23.5.0] Apply the modified CFS configuration to the management NCNs.\nAt this point, Nexus package repositories have been modified to set a particular package repository as active, but the SAT package may not have been updated on management NCNs.\nTo ensure that management NCNs have been updated to use the active SAT version, follow the Procedure to Apply CFS Configuration.\nProcedure to Apply CFS Configuration (ncn-m001#) Set an environment variable that refers to the name of the CFS configuration to be applied to the management NCNs.\nexport CFS_CONFIG_NAME=\u0026#34;management-23.5.0\u0026#34; Note: Refer to the output from the prodmgr activate command to find the name of the modified CFS configuration. If more than one CFS configuration was modified, use the first one.\nINFO: Successfully saved CFS configuration \u0026#34;management-23.5.0\u0026#34; (ncn-m001#) Obtain the name of the CFS configuration layer for SAT and save it in an environment variable:\nexport SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \\ | jq -r \u0026#39;.layers | map(select(.cloneUrl | contains(\u0026#34;sat-config-management.git\u0026#34;)))[0].name\u0026#39;) (ncn-m001#) Create a CFS session that executes only the SAT layer of the given CFS configuration.\nThe --configuration-limit option limits the configuration session to run only the SAT layer of the configuration.\ncray cfs sessions create --name \u0026#34;sat-session-${CFS_CONFIG_NAME}\u0026#34; --configuration-name \\ \u0026#34;${CFS_CONFIG_NAME}\u0026#34; --configuration-limit \u0026#34;${SAT_LAYER_NAME}\u0026#34; Monitor the progress of the CFS session.\n(ncn-m001#) Set an environment variable to name of the Ansible container within the pod for the CFS session:\nexport ANSIBLE_CONTAINER=$(kubectl get pod -n services \\ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \\ -o json | jq -r \u0026#39;.items[0].spec.containers | map(select(.name | contains(\u0026#34;ansible\u0026#34;))) | .[0].name\u0026#39;) (ncn-m001#) Next, get the logs for the Ansible container.\nkubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \\ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} Ansible plays, which are run by the CFS session, will install SAT on all the master management NCNs on the system. A summary of results can be found at the end of the log output.\n(ncn-m001#) The following example shows a successful session:\n... PLAY RECAP ********************************************************************* x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 Note: Ensure that the PLAY RECAPs for each session show successes for all manager NCNs before proceeding.\n(ncn-m001#) Verify that SAT was successfully configured.\nIf sat is configured, the --version command will indicate which version is installed. If sat is not properly configured, the command will fail.\nNote: This version number will differ from the version number of the SAT release distribution. This is the semantic version of the sat Python package, which is different from the version number of the overall SAT release distribution.\nsat --version Example output:\nsat 3.7.0 Note: Upon first running sat, there might be additional output while the sat container image is downloaded. This occurs the first time sat is run on each manager NCN. For example, when running sat for the first time on ncn-m001 and then for the first time on ncn-m002, this additional output is seen both times.\nTrying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037... Getting image source signatures Copying blob da64e8df3afc done Copying blob 0f36fd81d583 done Copying blob 12527cf455ba done ... sat 3.7.0 (ncn-m001#) Stop the typescript.\nexit SAT version x.y.z is now installed and configured:\nThe SAT RPM package is installed on the associated NCNs. Note on Procedure to Apply CFS Configuration The previous procedure is not always necessary because the CFS Batcher service automatically detects configuration changes and will automatically create new sessions to apply configuration changes according to certain rules. For more information on these rules, refer to Configuration Management with the CFS Batcher in the Cray System Management Documentation.\nThe main scenario in which the CFS batcher will not automatically re-apply the SAT layer is when the commit hash of the sat-config-management git repository has not changed between SAT versions. The previous procedure ensures the configuration is re-applied in all cases, and it is harmless if the batcher has already applied an updated configuration.\n" +}, +{ + "uri": "/docs-sat/en-26/upgrade/", + "title": "SAT Upgrade", + "tags": [], + "description": "", + "content": "SAT Upgrade Install and Upgrade Framework The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM. IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product.\nThis document does not replicate install, upgrade, or deployment procedures detailed in the Cray System Management Documentation. This document provides details regarding software and configuration content specific to SAT which is needed when installing, upgrading, or deploying a SAT release. The Cray System Management Documentation will indicate when sections of this document should be referred to for detailed information.\nIUF will perform the following tasks for a release of SAT.\nIUF deliver-product stage: Uploads SAT configuration content to VCS Uploads SAT information to the CSM product catalog Uploads SAT content to Nexus repositories IUF update-vcs-config stage: Updates the VCS integration branch with new SAT configuration content if a working branch is specified IUF update-cfs-config stage: Creates a new CFS configuration for management nodes with new SAT configuration content IUF prepare-images stage: Creates updated management NCN and managed node images with new SAT content IUF management-nodes-rollout stage: Boots management NCNs with an image containing new SAT content IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF section of the Cray System Management Documentation describes how to use these tools directly if it is desirable to use them instead of IUF.\nIUF Stage Details for SAT This section describes SAT details that an administrator must be aware of before running IUF stages. Entries are prefixed with Information if no administrative action is required or Action if an administrator needs to perform tasks outside of IUF.\nupdate-vcs-config Information: This stage is only run if a VCS working branch is specified for SAT. By default, SAT does not create or specify a VCS working branch.\nupdate-cfs-config Information: This stage only applies to the management configuration and not to the managed configuration.\nprepare-images Information: This stage only applies to management images and not to managed images.\nPost-Upgrade Procedures After upgrading SAT with IUF, it is recommended to complete the following procedures before using SAT:\nRemove Obsolete Configuration File Sections Update SAT Logging Set System Revision Information Notes on the Procedures Ellipses (...) in shell output indicate omitted lines. In the examples below, replace x.y.z with the version of the SAT product stream being upgraded. \u0026lsquo;manager\u0026rsquo; and \u0026lsquo;master\u0026rsquo; are used interchangeably in the steps below. Remove Obsolete Configuration File Sections After upgrading SAT, if using the configuration file from a previous version, there may be configuration file sections no longer used in the new version. For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used.\n(ncn-m001#) In that case, the following warning may appear upon running sat commands.\nWARNING: Ignoring unknown section \u0026#39;redfish\u0026#39; in config file. Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.\n[redfish] username = \u0026#34;admin\u0026#34; password = \u0026#34;adminpass\u0026#34; Repeat this process for any configuration file sections for which there are \u0026ldquo;unknown section\u0026rdquo; warnings.\nUpdate SAT Logging As of SAT version 2.2, some command output that was previously printed to stdout is now logged to stderr. These messages are logged at the INFO level. The default logging threshold was changed from WARNING to INFO to accommodate this logging change. Additionally, some messages previously logged at the INFO are now logged at the DEBUG level.\nThese changes take effect automatically. However, if the default output threshold has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure that important output is shown in the terminal.\nUpdate Configuration (ncn-m001#) In the following example, the stderr log level, logging.stderr_level, is set to WARNING, which will exclude INFO-level logging from terminal output.\ngrep -A 3 logging ~/.config/sat/sat.toml Example output:\n[logging] ... stderr_level = \u0026#34;WARNING\u0026#34; To enable the new default behavior, comment this line out, delete it, or set the value to \u0026ldquo;INFO\u0026rdquo;.\nIf logging.stderr_level is commented out, its value will not affect logging behavior. However, it may be helpful to set its value to INFO as a reminder of the new default behavior.\nAffected Commands The following commands trigger messages that have been changed from stdout print calls to INFO-level (or WARNING- or ERROR-level) log messages:\nsat bootsys --stage shutdown --stage session-checks sat sensors The following commands trigger messages that have been changed from INFO-level log messages to DEBUG-level log messages:\nsat nid2xname sat xname2nid sat swap Set System Revision Information HPE service representatives use system revision information data to identify systems in support cases.\nPrerequisites SAT authentication has been set up during installation. See Authenticate SAT Commands. S3 credentials have been generated during installation. See Generate SAT S3 Credentials. Notes on the Procedure This procedure is not required if SAT was upgraded from 2.1 (Shasta v1.5) or later. It is required if SAT was upgraded from 2.0 (Shasta v1.4) or earlier.\nProcedure Set System Revision Information.\n(ncn-m001#) Run sat setrev and follow the prompts to set the following site-specific values:\nSerial number System name System type System description Product number Company name Site name Country code System install date Tip: For \u0026ldquo;System type\u0026rdquo;, a system with any liquid-cooled components should be considered a liquid-cooled system. In other words, \u0026ldquo;System type\u0026rdquo; is EX-1C.\nsat setrev Example output:\n-------------------------------------------------------------------------------- Setting: Serial number Purpose: System identification. This will affect how snapshots are identified in the HPE backend services. Description: This is the top-level serial number which uniquely identifies the system. It can be requested from an HPE representative. Valid values: Alpha-numeric string, 4 - 20 characters. Type: \u0026lt;class \u0026#39;str\u0026#39;\u0026gt; Default: None Current value: None -------------------------------------------------------------------------------- Please do one of the following to set the value of the above setting: - Input a new value - Press CTRL-C to exit ... Verify System Revision Information.\n(ncn-m001#) Run sat showrev and verify the output shown in the \u0026ldquo;System Revision Information table.\u0026rdquo;\nThe following example shows sample table output.\nsat showrev Example output:\n################################################################################ System Revision Information ################################################################################ +---------------------+---------------+ | component | data | +---------------------+---------------+ | Company name | HPE | | Country code | US | | Interconnect | Sling | | Product number | R4K98A | | Serial number | 12345 | | Site name | HPE | | Slurm version | slurm 20.02.5 | | System description | Test System | | System install date | 2021-01-29 | | System name | eniac | | System type | EX-1C | +---------------------+---------------+ ################################################################################ Product Revision Information ################################################################################ +--------------+-----------------+------------------------------+------------------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+------------------------------+------------------------------+ | csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... | | sat | 2.0.1 | - | - | | sdu | 1.0.8 | - | - | | slingshot | 0.8.0 | - | - | | sma | 1.4.12 | - | - | +--------------+-----------------+------------------------------+------------------------------+ ################################################################################ Local Host Operating System ################################################################################ +-----------+----------------------+ | component | version | +-----------+----------------------+ | Kernel | 5.3.18-24.15-default | | SLES | SLES 15-SP2 | +-----------+----------------------+ " +}, +{ + "uri": "/docs-sat/en-26/categories/", + "title": "Categories", + "tags": [], + "description": "", + "content": "" +}, +{ + "uri": "/docs-sat/en-26/tags/", + "title": "Tags", + "tags": [], + "description": "", + "content": "" +}] \ No newline at end of file diff --git a/en-26/index.xml b/en-26/index.xml new file mode 100644 index 0000000000..d4a70ba1c3 --- /dev/null +++ b/en-26/index.xml @@ -0,0 +1,173 @@ + + + + HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + /docs-sat/en-26/ + Recent content in HPE Cray EX System Admin Toolkit (SAT) Guide on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-26 + Wed, 11 Dec 2024 03:40:01 +0000 + + + SAT Installation + /docs-sat/en-26/install/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/install/ + SAT Installation Install and Upgrade Framework The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM. IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product. + + + Change the BOS Version + /docs-sat/en-26/usage/change_bos_version/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/usage/change_bos_version/ + Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version two (v2). Select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command. Another way to change the BOS version is by configuring it under the api_version setting in the bos section of the SAT configuration file. If the system is using an existing SAT configuration file from an older version of SAT, the bos section might not exist. + + + Changes in SAT 2.2 + /docs-sat/en-26/release_notes/sat_2.2_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/release_notes/sat_2.2_release_notes/ + Changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022. This version of the SAT product included: Version 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components: Version 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release. + + + SAT Command Authentication + /docs-sat/en-26/about_sat/command_authentication/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/command_authentication/ + SAT Command Authentication Some SAT subcommands make requests to the HPE Cray EX services through the API gateway and thus require authentication to the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat commands require S3 to be configured. In order to use the SAT S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. + + + Configure Multi-tenancy + /docs-sat/en-26/usage/multi-tenancy/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/usage/multi-tenancy/ + Configure Multi-tenancy SAT supports supplying tenant information to CSM services in order to allow tenant admins to use SAT within their tenant. By default, the tenant name is not set, and SAT will not send any tenant information with its requests to CSM services. Configure the tenant name either in the SAT configuration file or on the command line. Configure the Tenant Name in the SAT Configuration File Set the tenant name in the SAT configuration file using the api_gateway. + + + Changes in SAT 2.3 + /docs-sat/en-26/release_notes/sat_2.3_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/release_notes/sat_2.3_release_notes/ + Changes in SAT 2.3 The 2.3.4 version of the SAT product includes: Version 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None. Current Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share. + + + SAT Dependencies + /docs-sat/en-26/about_sat/dependencies/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/dependencies/ + SAT Dependencies Most sat subcommands depend on services or components from other products in the HPE Cray EX software stack. The following list shows these dependencies for each subcommand. Each service or component is listed under the product it belongs to. sat auth CSM Keycloak sat bmccreds CSM System Configuration Service (SCSD) sat bootprep CSM Boot Orchestration Service (BOS) Configuration Framework Service (CFS) Image Management Service (IMS) Version Control Service (VCS) Kubernetes S3 sat bootsys CSM Boot Orchestration Service (BOS) Cray Advanced Platform Monitoring and Control (CAPMC) Ceph Etcd Firmware Action Service (FAS) Hardware State Manager (HSM) Kubernetes S3 HPE Cray Supercomputing User Services Software (USS) Node Memory Dump (NMD) sat diag CSM Hardware State Manager (HSM) CSM-Diags Fox sat firmware CSM Firmware Action Service (FAS) sat hwhist CSM Hardware State Manager (HSM) sat hwinv CSM Hardware State Manager (HSM) sat hwmatch CSM Hardware State Manager (HSM) sat init None + + + Changes in SAT 2.4 + /docs-sat/en-26/release_notes/sat_2.4_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/sat_2.4_release_notes/ + Changes in SAT 2.4 The 2.4.13 version of the SAT product includes: Version 3.19.3 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.5.5 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. Because of installation refactoring efforts, the following two components are no longer delivered with SAT: sat-cfs-install container image sat-cfs-install Helm chart Inclusion of SAT in CSM A version of the cray-sat container image is now included in CSM. + + + SAT and IUF + /docs-sat/en-26/usage/sat_and_iuf/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/usage/sat_and_iuf/ + SAT and IUF The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM with the help of sat bootprep. Outside of IUF, it is uncommon to use sat bootprep. For more information on IUF, see the IUF section of the Cray System Management Documentation. For more information on sat bootprep, see SAT Bootprep. Variable Substitutions Both IUF and sat bootprep allow variable substitutions into the default HPC CSM Software Recipe bootprep input files. + + + Introduction to SAT + /docs-sat/en-26/about_sat/introduction/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/introduction/ + Introduction to SAT About System Admin Toolkit (SAT) The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview. + + + Changes in SAT 2.5 + /docs-sat/en-26/release_notes/sat_2.5_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/sat_2.5_release_notes/ + Changes in SAT 2.5 The 2.5.17 version of the SAT product includes: Version 3.21.4 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.0 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. New sat Commands sat jobstat allows access to application and job data through the command line. It provides a table summarizing information for all jobs on the system. + + + SAT Bootprep + /docs-sat/en-26/usage/sat_bootprep/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/usage/sat_bootprep/ + SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates. The solution is based on a given input file that defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similar to other SAT commands. (ncn-m001#) Here is an example: + + + SAT in CSM + /docs-sat/en-26/about_sat/sat_in_csm/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/sat_in_csm/ + SAT in CSM In CSM 1.3 and newer, the sat command is automatically available on the Kubernetes control plane, but it is still possible to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the sat command available in CSM. Installing the SAT product stream allows additional supporting components to be added: An entry for SAT in the cray-product-catalog Kubernetes ConfigMap is only created by installing the SAT product stream. + + + Changes in SAT 2.6 + /docs-sat/en-26/release_notes/sat_2.6_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/sat_2.6_release_notes/ + Changes in SAT 2.6 The 2.6.14 version of the SAT product includes: Version 3.25.10 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.2 of the sat-install-utility container image. New sat Commands No new sat commands were added in SAT 2.6. Changes to sat bootsys Functionality was added to the platform-services and cabinet-power stages of sat bootsys boot. This allows SAT to automatically recreate Kubernetes CronJobs that may have become stuck during shutdown, boot, or reboot. + + + View SAT Documentation + /docs-sat/en-26/about_sat/view_sat_docs/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/about_sat/view_sat_docs/ + View SAT Documentation View the System Admin Toolkit (SAT) documentation both online and offline by using the information in this section. Online Documentation The SAT documentation can be found online in HTML form at the following link: SAT Documentation. The navigation pane on the left of the HTML page orders topics alphabetically. Navigate an individual topic&rsquo;s headings by using the Headings icon at the top of the page, as shown in the following images. + + + SAT Changes in Shasta v1.3.2 + /docs-sat/en-26/release_notes/shasta_1.3.2_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.3.2_release_notes/ + SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI. The following sections detail the changes in this release. sat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable. The sat swap switch command is equivalent to sat switch. + + + SAT on an External System + /docs-sat/en-26/external_system/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/external_system/ + SAT on an External System SAT can optionally be installed and configured on an external system to interact with CSM over the CAN. Limitations Most SAT subcommands work by accessing APIs which are reachable via the CAN. However, certain SAT commands depend on host-based functionality on the management NCNs and will not work from an external system. This includes the following: The platform-services and ncn-power stages of sat bootsys The local host information displayed by the --local option of sat showrev Installing SAT on an external system is not an officially supported configuration. + + + SAT Changes in Shasta v1.3 + /docs-sat/en-26/release_notes/shasta_1.3_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.3_release_notes/ + SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI. This version of the sat CLI contained the following commands: auth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch For more information on each of these commands, see the SAT Command Overview and the table of commands in the SAT Command Authentication section of this document. + + + SAT Changes in Shasta v1.4.1 + /docs-sat/en-26/release_notes/shasta_1.4.1_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.4.1_release_notes/ + SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1. This version of the SAT product included: Version 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release. New Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames: sat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API. + + + SAT Changes in Shasta v1.4 + /docs-sat/en-26/release_notes/shasta_1.4_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.4_release_notes/ + SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4. This version of the SAT product included the following components: Version 3.4.0 of the sat python package and CLI It also added the following new component: Version 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release. + + + SAT Changes in Shasta v1.5 + /docs-sat/en-26/release_notes/shasta_1.5_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.5_release_notes/ + SAT Changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5. This version of the SAT product included: Version 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component: Version 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release. Install Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. + + + SAT Uninstall and Downgrade + /docs-sat/en-26/uninstall_and_downgrade/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/uninstall_and_downgrade/ + SAT Uninstall and Downgrade Uninstall: Remove a Version of SAT This procedure can be used to uninstall a version of SAT. Prerequisites Only versions 2.2 or newer of SAT can be uninstalled with prodmgr. CSM version 1.2 or newer must be installed, so that the prodmgr command is available. Procedure (ncn-m001#) Use sat showrev to list versions of SAT. sat showrev --products --filter product_name=sat Example output: ############################################################################### Product Revision Information ############################################################################### +--------------+-----------------+-------------------+-----------------------+ | product_name | product_version | images | image_recipes | +--------------+-----------------+-------------------+-----------------------+ | sat | 2. + + + SAT Upgrade + /docs-sat/en-26/upgrade/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/upgrade/ + SAT Upgrade Install and Upgrade Framework The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM. IUF capabilities are described in detail in the IUF section of the Cray System Management Documentation. The initial install and upgrade workflows described in the HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM (S-8052) detail when and how to use IUF with a new release of SAT or any other HPE Cray EX product. + + + diff --git a/en-26/install/index.html b/en-26/install/index.html new file mode 100644 index 0000000000..6ae1b23543 --- /dev/null +++ b/en-26/install/index.html @@ -0,0 +1,1617 @@ + + + + + + + + + + + + SAT Installation :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Installation

+

Install and Upgrade Framework

+

The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM. IUF capabilities are +described in detail in the IUF +section of the +Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.

+

This document does not replicate install, upgrade, or deployment procedures +detailed in the Cray System Management +Documentation. This document provides +details regarding software and configuration content specific to SAT which is +needed when installing, upgrading, or deploying a SAT release. The Cray +System Management Documentation will +indicate when sections of this document should be referred to for detailed +information.

+

IUF will perform the following tasks for a release of SAT.

+
    +
  • IUF deliver-product stage: +
      +
    • Uploads SAT configuration content to VCS
    • +
    • Uploads SAT information to the CSM product catalog
    • +
    • Uploads SAT content to Nexus repositories
    • +
    +
  • +
  • IUF update-vcs-config stage: +
      +
    • Updates the VCS integration branch with new SAT configuration content if a +working branch is specified
    • +
    +
  • +
  • IUF update-cfs-config stage: +
      +
    • Creates a new CFS configuration for management nodes with new SAT configuration content
    • +
    +
  • +
  • IUF prepare-images stage: +
      +
    • Creates updated management NCN and managed node images with new SAT content
    • +
    +
  • +
  • IUF management-nodes-rollout stage: +
      +
    • Boots management NCNs with an image containing new SAT content
    • +
    +
  • +
+

IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF +section of the +Cray System Management Documentation +describes how to use these tools directly if it is desirable to use them +instead of IUF.

+

IUF Stage Details for SAT

+

This section describes SAT details that an administrator must be aware of +before running IUF stages. Entries are prefixed with Information if no +administrative action is required or Action if an administrator needs +to perform tasks outside of IUF.

+

update-vcs-config

+

Information: This stage is only run if a VCS working branch is specified for +SAT. By default, SAT does not create or specify a VCS working branch.

+

update-cfs-config

+

Information: This stage only applies to the management configuration and +not to the managed configuration.

+

prepare-images

+

Information: This stage only applies to management images and not to +managed images.

+

Post-Installation Procedures

+

After installing SAT with IUF, complete the following SAT configuration +procedures before using SAT:

+ +

Notes on the Procedures

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace x.y.z with the version of the SAT product stream +being installed.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
+

Authenticate SAT Commands

+

To run SAT commands on the manager NCNs, first set up authentication +to the API gateway. For more information on authentication types and +authentication credentials, see SAT Command +Authentication.

+

The admin account used to authenticate with sat auth must be enabled in +Keycloak and must have its assigned role set to admin. For more information +on Keycloak accounts and changing Role Mappings, refer to both Configure Keycloak +Account and Create Internal User Accounts in the Keycloak Shasta Realm in +the Cray System Management Documentation.

+

Prerequisites

+ +

Procedure

+

The following is the procedure to globally configure the username used by SAT and +authenticate to the API gateway.

+
    +
  1. +

    (ncn-m001#) Generate a default SAT configuration file if one does not exist.

    +
    sat init
    +

    Example output:

    +
    Configuration file "/root/.config/sat/sat.toml" generated.
    +

    Note: If the configuration file already exists, it will print out the +following error.

    +
    ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
    +Not generating configuration file.
    +
  2. +
  3. +

    Edit ~/.config/sat/sat.toml and set the username option in the api_gateway +section of the configuration file.

    +
    username = "crayadmin"
    +
  4. +
  5. +

    (ncn-m001#) Run sat auth. Enter the password when prompted.

    +
    sat auth
    +

    Example output:

    +
    Password for crayadmin:
    +Succeeded!
    +
  6. +
  7. +

    (ncn-m001#) Other sat commands are now authenticated to make requests to the API gateway.

    +
    sat status
    +
  8. +
+

Generate SAT S3 Credentials

+

Generate S3 credentials and write them to a local file so the SAT user can access +S3 storage. In order to use the SAT S3 bucket, the System Administrator must +generate the S3 access key and secret keys and write them to a local file. This +must be done on every Kubernetes control plane node where SAT commands are run.

+

SAT uses S3 storage for several purposes, most importantly to store the +site-specific information set with sat setrev (see Set System Revision +Information).

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    (ncn-m001#) Ensure the files are readable only by root.

    +
    touch /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
    chmod 600 /root/.config/sat/s3_access_key \
    +    /root/.config/sat/s3_secret_key
    +
  2. +
  3. +

    (ncn-m001#) Write the credentials to local files using kubectl.

    +
    kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.access_key}' | base64 -d > \
    +    /root/.config/sat/s3_access_key
    +
    kubectl get secret sat-s3-credentials -o json -o \
    +    jsonpath='{.data.secret_key}' | base64 -d > \
    +    /root/.config/sat/s3_secret_key
    +
  4. +
  5. +

    Verify the S3 endpoint specified in the SAT configuration file is correct.

    +
      +
    1. +

      (ncn-m001#) Get the SAT configuration file’s endpoint value.

      +

      Note: If the command’s output is commented out, indicated by an initial # +character, the SAT configuration will take the default value – "https://rgw-vip.nmn".

      +
      grep endpoint ~/.config/sat/sat.toml
      +

      Example output:

      +
      # endpoint = "https://rgw-vip.nmn"
      +
    2. +
    3. +

      (ncn-m001#) Get the sat-s3-credentials secret’s endpoint value.

      +
      kubectl get secret sat-s3-credentials -o json -o \
      +    jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
      +

      Example output:

      +
      https://rgw-vip.nmn
      +
    4. +
    5. +

      Compare the two endpoint values.

      +

      If the values differ, change the SAT configuration file’s endpoint value to +match the secret’s.

      +
    6. +
    +
  6. +
  7. +

    (ncn-m001#) Copy SAT configurations to each manager node on the system.

    +
    for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
    +    mkdir -p /root/.config/sat; \
    +    scp -pr /root/.config/sat ${i}:/root/.config; done
    +

    Note: Depending on how many manager nodes are on the system, the list of +manager nodes may be different. This example assumes three manager nodes, where +the configuration files must be copied from ncn-m001 to ncn-m002 and +ncn-m003. Therefore, the list of hosts above is ncn-m002 and ncn-m003.

    +
  8. +
+

(Optional) Configure Multi-tenancy

+

If installing SAT on a multi-tenant system, the tenant name can be configured +at this point. For more information, see Configure multi-tenancy.

+

Set System Revision Information

+

HPE service representatives use system revision information data to identify +systems in support cases.

+

Prerequisites

+ +

Procedure

+
    +
  1. +

    (ncn-m001#) Set System Revision Information.

    +

    Run sat setrev and follow the prompts to set the following site-specific values:

    +
      +
    • Serial number
    • +
    • System name
    • +
    • System type
    • +
    • System description
    • +
    • Product number
    • +
    • Company name
    • +
    • Site name
    • +
    • Country code
    • +
    • System install date
    • +
    +

    Tip: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.

    +
    sat setrev
    +

    Example output:

    +
    --------------------------------------------------------------------------------
    +Setting:        Serial number
    +Purpose:        System identification. This will affect how snapshots are
    +                identified in the HPE backend services.
    +Description:    This is the top-level serial number which uniquely identifies
    +                the system. It can be requested from an HPE representative.
    +Valid values:   Alpha-numeric string, 4 - 20 characters.
    +Type:           <class 'str'>
    +Default:        None
    +Current value:  None
    +--------------------------------------------------------------------------------
    +Please do one of the following to set the value of the above setting:
    +    - Input a new value
    +    - Press CTRL-C to exit
    +...
    +
  2. +
  3. +

    Verify System Revision Information.

    +

    (ncn-m001#) Run sat showrev and verify the output shown in the “System Revision Information table.”

    +
    sat showrev
    +

    Example table output:

    +
    ################################################################################
    +System Revision Information
    +################################################################################
    ++---------------------+---------------+
    +| component           | data          |
    ++---------------------+---------------+
    +| Company name        | HPE           |
    +| Country code        | US            |
    +| Interconnect        | Sling         |
    +| Product number      | R4K98A        |
    +| Serial number       | 12345         |
    +| Site name           | HPE           |
    +| Slurm version       | slurm 20.02.5 |
    +| System description  | Test System   |
    +| System install date | 2021-01-29    |
    +| System name         | eniac         |
    +| System type         | EX-1C         |
    ++---------------------+---------------+
    +################################################################################
    +Product Revision Information
    +################################################################################
    ++--------------+-----------------+------------------------------+------------------------------+
    +| product_name | product_version | images                       | image_recipes                |
    ++--------------+-----------------+------------------------------+------------------------------+
    +| csm          | 0.8.14          | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
    +| sat          | 2.0.1           | -                            | -                            |
    +| sdu          | 1.0.8           | -                            | -                            |
    +| slingshot    | 0.8.0           | -                            | -                            |
    +| sma          | 1.4.12          | -                            | -                            |
    ++--------------+-----------------+------------------------------+------------------------------+
    +################################################################################
    +Local Host Operating System
    +################################################################################
    ++-----------+----------------------+
    +| component | version              |
    ++-----------+----------------------+
    +| Kernel    | 5.3.18-24.15-default |
    +| SLES      | SLES 15-SP2          |
    ++-----------+----------------------+
    +
  4. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/index.html b/en-26/release_notes/index.html new file mode 100644 index 0000000000..ca5c34524e --- /dev/null +++ b/en-26/release_notes/index.html @@ -0,0 +1,1330 @@ + + + + + + + + + + + + SAT Release Notes :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Release Notes

+

Changes in SAT Version 2.x

+ +

SAT Changes in Shasta Version 1.x

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/index.xml b/en-26/release_notes/index.xml new file mode 100644 index 0000000000..ec48c0ba18 --- /dev/null +++ b/en-26/release_notes/index.xml @@ -0,0 +1,82 @@ + + + + SAT Release Notes on System Admin Toolkit (SAT) + /docs-sat/en-26/release_notes/ + Recent content in SAT Release Notes on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-26 + Wed, 11 Dec 2024 03:40:01 +0000 + + + Changes in SAT 2.2 + /docs-sat/en-26/release_notes/sat_2.2_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/release_notes/sat_2.2_release_notes/ + Changes in SAT 2.2 SAT 2.2.16 was released on February 25th, 2022. This version of the SAT product included: Version 3.14.0 of the sat python package and CLI Version 1.6.4 of the sat-podman wrapper script Version 1.0.4 of the sat-cfs-install container image and Helm chart It also added the following new components: Version 1.4.3 of the sat-install-utility container image Version 2.0.2 of the cfs-config-util container image The following sections detail the changes in this release. + + + Changes in SAT 2.3 + /docs-sat/en-26/release_notes/sat_2.3_release_notes/ + Wed, 11 Dec 2024 03:40:00 +0000 + /docs-sat/en-26/release_notes/sat_2.3_release_notes/ + Changes in SAT 2.3 The 2.3.4 version of the SAT product includes: Version 3.15.4 of the sat python package and CLI Version 1.6.11 of the sat-podman wrapper script Version 1.2.0 of the sat-cfs-install container image Version 2.0.0 of the sat-cfs-install Helm chart Version 1.5.0 of the sat-install-utility container image Version 2.0.3 of the cfs-config-util container image New sat Commands None. Current Working Directory in SAT Container When running sat commands, the current working directory is now mounted in the container as /sat/share, and the current working directory within the container is also /sat/share. + + + Changes in SAT 2.4 + /docs-sat/en-26/release_notes/sat_2.4_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/sat_2.4_release_notes/ + Changes in SAT 2.4 The 2.4.13 version of the SAT product includes: Version 3.19.3 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.5.5 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. Because of installation refactoring efforts, the following two components are no longer delivered with SAT: sat-cfs-install container image sat-cfs-install Helm chart Inclusion of SAT in CSM A version of the cray-sat container image is now included in CSM. + + + Changes in SAT 2.5 + /docs-sat/en-26/release_notes/sat_2.5_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/sat_2.5_release_notes/ + Changes in SAT 2.5 The 2.5.17 version of the SAT product includes: Version 3.21.4 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.0 of the sat-install-utility container image. Version 3.3.1 of the cfs-config-util container image. New sat Commands sat jobstat allows access to application and job data through the command line. It provides a table summarizing information for all jobs on the system. + + + Changes in SAT 2.6 + /docs-sat/en-26/release_notes/sat_2.6_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/sat_2.6_release_notes/ + Changes in SAT 2.6 The 2.6.14 version of the SAT product includes: Version 3.25.10 of the sat python package and CLI. Version 2.0.0-1 of the sat-podman wrapper script. Version 1.6.2 of the sat-install-utility container image. New sat Commands No new sat commands were added in SAT 2.6. Changes to sat bootsys Functionality was added to the platform-services and cabinet-power stages of sat bootsys boot. This allows SAT to automatically recreate Kubernetes CronJobs that may have become stuck during shutdown, boot, or reboot. + + + SAT Changes in Shasta v1.3.2 + /docs-sat/en-26/release_notes/shasta_1.3.2_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.3.2_release_notes/ + SAT Changes in Shasta v1.3.2 Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI. The following sections detail the changes in this release. sat swap Command for Switch and Cable Replacement The sat switch command which supported operations for replacing a switch has been deprecated and replaced with the sat swap command, which now supports replacing a switch OR cable. The sat swap switch command is equivalent to sat switch. + + + SAT Changes in Shasta v1.3 + /docs-sat/en-26/release_notes/shasta_1.3_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.3_release_notes/ + SAT Changes in Shasta v1.3 Shasta v1.3 included version 2.2.3 of the sat python package and CLI. This version of the sat CLI contained the following commands: auth bootsys cablecheck diag firmware hwinv hwmatch k8s linkhealth sensors setrev showrev status swap switch For more information on each of these commands, see the SAT Command Overview and the table of commands in the SAT Command Authentication section of this document. + + + SAT Changes in Shasta v1.4.1 + /docs-sat/en-26/release_notes/shasta_1.4.1_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.4.1_release_notes/ + SAT Changes in Shasta v1.4.1 We released version 2.0.4 of the SAT product in Shasta v1.4.1. This version of the SAT product included: Version 3.5.0 of the sat python package and CLI. Version 1.4.3 of the sat-podman wrapper script. The following sections detail the changes in this release. New Commands to Translate Between NIDs and XNames Two new commands were added to translate between NIDs and XNames: sat nid2xname sat xname2nid These commands perform this translation by making requests to the Hardware State Manager (HSM) API. + + + SAT Changes in Shasta v1.4 + /docs-sat/en-26/release_notes/shasta_1.4_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.4_release_notes/ + SAT Changes in Shasta v1.4 In Shasta v1.4, SAT became an independent product, which meant we began to designate a version number for the entire SAT product. We released version 2.0.3 of the SAT product in Shasta v1.4. This version of the SAT product included the following components: Version 3.4.0 of the sat python package and CLI It also added the following new component: Version 1.4.2 of the sat-podman wrapper script The following sections detail the changes in this release. + + + SAT Changes in Shasta v1.5 + /docs-sat/en-26/release_notes/shasta_1.5_release_notes/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/release_notes/shasta_1.5_release_notes/ + SAT Changes in Shasta v1.5 We released version 2.1.16 of the SAT product in Shasta v1.5. This version of the SAT product included: Version 3.7.4 of the sat python package and CLI Version 1.4.10 of the sat-podman wrapper script It also added the following new component: Version 1.0.3 of the sat-cfs-install docker image and helm chart The following sections detail the changes in this release. Install Changes to Separate Product from CSM This release further decouples the installation of the SAT product from the CSM product. + + + diff --git a/en-26/release_notes/sat_2.2_release_notes/index.html b/en-26/release_notes/sat_2.2_release_notes/index.html new file mode 100644 index 0000000000..0e994e8486 --- /dev/null +++ b/en-26/release_notes/sat_2.2_release_notes/index.html @@ -0,0 +1,1434 @@ + + + + + + + + + + + + Changes in SAT 2.2 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.2

+

SAT 2.2.16 was released on February 25th, 2022.

+

This version of the SAT product included:

+
    +
  • Version 3.14.0 of the sat python package and CLI
  • +
  • Version 1.6.4 of the sat-podman wrapper script
  • +
  • Version 1.0.4 of the sat-cfs-install container image and Helm chart
  • +
+

It also added the following new components:

+
    +
  • Version 1.4.3 of the sat-install-utility container image
  • +
  • Version 2.0.2 of the cfs-config-util container image
  • +
+

The following sections detail the changes in this release.

+

Known Issues in SAT 2.2

+

sat Command Unavailable in sat bash Shell

+

After launching a shell within the SAT container with sat bash, the sat +command will not be found.

+

((CONTAINER_ID) sat-container#) Here is an example output after running sat status:

+
bash: sat: command not found
+

((CONTAINER_ID) sat-container#) This can be resolved temporarily in one of two ways. /sat/venv/bin/ may be +prepended to the $PATH environment variable:

+
export PATH=/sat/venv/bin:$PATH
+sat status
+

((CONTAINER_ID) sat-container#) Another option is to source the file /sat/venv/bin/activate:

+
source /sat/venv/bin/activate
+sat status
+

Tab Completion Unavailable in sat bash Shell

+

After launching a shell within the SAT container with sat bash, tab completion +for sat commands does not work.

+

((CONTAINER_ID) sat-container#) This can be resolved temporarily by sourcing the file +/etc/bash_completion.d/sat-completion.bash:

+
source /etc/bash_completion.d/sat-completion.bash
+

OCI Runtime Permission Error when Running sat in Root Directory

+

sat commands will not work if the current directory is /.

+

(ncn-m001#) Here is an example output after running sat --help:

+
Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+

To resolve, run sat in another directory.

+

Duplicate Mount Error when Running sat in Configuration Directory

+

sat commands will not work if the current directory is ~/.config/sat.

+

(ncn-m001#) Here is an example output after running sat --help:

+
Error: /root/.config/sat: duplicate mount destination
+

To resolve, run sat in another directory.

+

New sat Commands

+
    +
  • sat bootprep automates the creation of CFS configurations, the build and +customization of IMS images, and the creation of BOS session templates. For +more information, see SAT Bootprep.
  • +
  • sat slscheck performs a check for consistency between the System Layout +Service (SLS) and the Hardware State Manager (HSM).
  • +
  • sat bmccreds provides a simple interface for interacting with the System +Configuration Service (SCSD) to set BMC Redfish credentials.
  • +
  • sat hwhist displays hardware component history by XName (location) or by +its Field-Replaceable Unit ID (FRUID). This command queries the Hardware +State Manager (HSM) API to obtain this information. Since the sat hwhist +command supports querying for the history of a component by its FRUID, the +FRUID of components has been added to the output of sat hwinv.
  • +
+

Additional Install Automation

+

The following automation has been added to the install script, install.sh:

+
    +
  • Wait for the completion of the sat-config-import Kubernetes job, which is +started when the sat-cfs-install Helm chart is deployed.
  • +
  • Automate the modification of the CFS configuration, which applies to master +management NCNs (for example, ncn-personalization).
  • +
+

Changes to Product Catalog Data Schema

+

The SAT product uploads additional information to the cray-product-catalog +Kubernetes ConfigMap detailing the components it provides, including container +(Docker) images, Helm charts, RPMs, and package repositories.

+

This information is used to support uninstall and downgrade of SAT product +versions moving forward.

+

Support for Uninstall and Downgrade of SAT Versions

+

Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and downgrade of the SAT product stream.

+

For more information, see +Uninstall: Remove a Version of SAT and +Downgrade: Switch Between SAT Versions.

+

Improvements to sat status

+

A Subrole column has been added to the output of sat status. This allows +easy differentiation between master, worker, and storage nodes in the +management role, for example.

+

Hostname information from SLS has been added to sat status output.

+

Added Support for JSON Output

+

Support for JSON-formatted output has been added to commands which currently +support the --format option, such as hwinv, status, and showrev.

+

Usability Improvements

+

Many usability improvements have been made to multiple sat commands, +mostly related to filtering command output. The following are some highlights:

+
    +
  • Added --fields option to display only specific fields for subcommands which +display tabular reports.
  • +
  • Added ability to filter on exact matches of a field name.
  • +
  • Improved handling of multiple matches of a field name in --filter queries +so that the first match is used, similar to --sort-by.
  • +
  • Added support for --filter, --fields, and --reverse for summaries +displayed by sat hwinv.
  • +
  • Added borders to summary tables generated by sat hwinv.
  • +
  • Improved documentation in the man pages.
  • +
+

Default Log Level Changed

+

The default log level for stderr has been changed from “WARNING” to “INFO”. For +more information, see Update SAT Logging.

+

More Granular Log Level Configuration Options

+

With the command-line options --loglevel-stderr and --loglevel-file, the log +level can now be configured separately for stderr and the log file.

+

The existing --loglevel option is now an alias for the --loglevel-stderr +option.

+

Podman Wrapper Script Improvements

+

The Podman wrapper script is the script installed at /usr/bin/sat on the +master management NCNs by the cray-sat-podman RPM that runs the cray-sat +container in podman. The following subsections detail improvements that were +made to the wrapper script in this release.

+

Mounting of $HOME and Current Directories in cray-sat Container

+

The Podman wrapper script that launches the cray-sat container with podman +has been modified to mount the user’s current directory and home directory into +the cray-sat container to provide access to local files in the container.

+

Podman Wrapper Script Documentation Improvements

+

The man page for the Podman wrapper script, which is accessed by typing man sat on a master management NCN, has been improved to document the following:

+
    +
  • Environment variables that affect execution of the wrapper script
  • +
  • Host files and directories mounted in the container
  • +
+

Fixes to Podman Wrapper Script Output Redirection

+

Fixed issues with redirecting stdout and stderr, and piping output to +commands, such as awk, less, and more.

+

Configurable HTTP Timeout

+

A new sat option has been added to configure the HTTP timeout length for +requests to the API gateway. For more information, refer to sat-man sat.

+

sat bootsys Improvements

+

Many improvements and fixes have been made to sat bootsys. The following are +some highlights:

+
    +
  • Added the --excluded-ncns option, which can be used to omit NCNs +from the platform-services and ncn-power stages in case they are +inaccessible.
  • +
  • Disruptive shutdown stages in sat bootsys shutdown now prompt the user to +continue before proceeding. A new option, --disruptive, will bypass this.
  • +
  • Improvements to Ceph service health checks and restart during the +platform-services stage of sat bootsys boot.
  • +
+

sat xname2nid Improvements

+

sat xname2nid can now recursively expand slot, chassis, and cabinet XNames to +a list of NIDs in those locations.

+

A new --format option has been added to sat xname2nid. It sets the output +format to either “range” (the default) or “NID”. The “range” format displays NIDs +in a compressed range format suitable for use with a workload manager like Slurm.

+

Usage of v2 HSM API

+

The commands which interact with HSM (for example, sat status and sat hwinv) +now use the v2 HSM API.

+

sat diag Limited to HSN Switches

+

sat diag will now only operate against HSN switches by default. These are the +only controllers that support running diagnostics with HMJTD.

+

sat showrev Enhancements

+

A column has been added to the output of sat showrev that indicates whether a +product version is “active”. The definition of “active” varies across products, +and not all products may set an “active” version.

+

For SAT, the active version is the one with its hosted-type package repository +in Nexus set as the member of the group-type package repository in Nexus, +meaning that it will be used when installing the cray-sat-podman RPM.

+

cray-sat Container Image Size Reduction

+

The size of the cray-sat container image has been approximately cut in half by +leveraging multi-stage builds. This also improved the repeatability of the unit +tests by running them in the container.

+

Bug Fixes

+

Minor bug fixes were made in cray-sat and in cray-sat-podman. For full +change lists, refer to each repository’s CHANGELOG.md file.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/sat_2.3_release_notes/index.html b/en-26/release_notes/sat_2.3_release_notes/index.html new file mode 100644 index 0000000000..41bcd89a20 --- /dev/null +++ b/en-26/release_notes/sat_2.3_release_notes/index.html @@ -0,0 +1,1313 @@ + + + + + + + + + + + + Changes in SAT 2.3 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.3

+

The 2.3.4 version of the SAT product includes:

+
    +
  • Version 3.15.4 of the sat python package and CLI
  • +
  • Version 1.6.11 of the sat-podman wrapper script
  • +
  • Version 1.2.0 of the sat-cfs-install container image
  • +
  • Version 2.0.0 of the sat-cfs-install Helm chart
  • +
  • Version 1.5.0 of the sat-install-utility container image
  • +
  • Version 2.0.3 of the cfs-config-util container image
  • +
+

New sat Commands

+

None.

+

Current Working Directory in SAT Container

+

When running sat commands, the current working directory is now mounted in the +container as /sat/share, and the current working directory within the container +is also /sat/share.

+

Files in the current working directory must be specified using relative paths to +that directory, because the current working directory is always mounted on +/sat/share. Absolute paths should be avoided, and paths that are outside of +$HOME or $PWD are never accessible to the container environment.

+

The home directory is still mounted on the same path inside the container as it +is on the host.

+

Changes to sat bootsys

+

The following options were added to sat bootsys.

+
    +
  • --bos-limit
  • +
  • --recursive
  • +
+

The --bos-limit option passes a given limit string to a BOS session. The +--recursive option specifies a slot or other higher-level component in the +limit string.

+

Changes to sat bootprep

+

The --delete-ims-jobs option was added to sat bootprep run. It deletes IMS +jobs after sat bootprep is run. Jobs are no longer deleted by default.

+

Changes to sat status

+

sat status now includes information about nodes’ CFS configuration statuses, +such as desired configuration, configuration status, and error count.

+

The output of sat status now splits different component types into different +report tables.

+

The following options were added to sat status.

+
    +
  • --hsm-fields, --sls-fields, --cfs-fields
  • +
  • --bos-template
  • +
+

The --hsm-fields, --sls-fields, --cfs-fields options limit the output +columns according to specified CSM services.

+

The --bos-template option filters the status report according to the specified +session template’s boot sets.

+

Compatibility with CSM 1.2

+

The following components were modified to be compatible with CSM 1.2.

+
    +
  • sat-cfs-install container image and Helm chart
  • +
  • sat-install-utility container image
  • +
  • SAT product installer
  • +
+

GPG Checking

+

The sat-ncn Ansible role provided by sat-cfs-install was modified to enable +GPG checks on packages while leaving GPG checks disabled on repository metadata.

+

Security

+

Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and +refreshed Python dependency versions.

+

Bug Fixes

+

Minor bug fixes were made in each of the repositories. For full change lists, +refer to each repository’s CHANGELOG.md file.

+

The known issues listed under the SAT 2.2 release +were fixed.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/sat_2.4_release_notes/index.html b/en-26/release_notes/sat_2.4_release_notes/index.html new file mode 100644 index 0000000000..40fe6c3014 --- /dev/null +++ b/en-26/release_notes/sat_2.4_release_notes/index.html @@ -0,0 +1,1404 @@ + + + + + + + + + + + + Changes in SAT 2.4 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.4

+

The 2.4.13 version of the SAT product includes:

+
    +
  • Version 3.19.3 of the sat python package and CLI.
  • +
  • Version 2.0.0-1 of the sat-podman wrapper script.
  • +
  • Version 1.5.5 of the sat-install-utility container image.
  • +
  • Version 3.3.1 of the cfs-config-util container image.
  • +
+

Because of installation refactoring efforts, the following two components +are no longer delivered with SAT:

+
    +
  • sat-cfs-install container image
  • +
  • sat-cfs-install Helm chart
  • +
+

Inclusion of SAT in CSM

+

A version of the cray-sat container image is now included in CSM. For more +information, see SAT in CSM.

+

SAT Installation Improvements

+

The SAT install.sh script no longer uses a sat-cfs-install Helm chart and +container image to upload its Ansible content to the sat-config-management +repository in VCS. Instead, it uses Podman to run the cf-gitea-import container +directly. Some of the benefits of this change include the following:

+ +

Decoupling of cray-sat Container Image and cray-sat-podman Package

+

In older SAT releases, the sat wrapper script that was provided by the +cray-sat-podman package installed on Kubernetes control plane nodes included a +hard-coded version of the cray-sat container image. As a result, every new +version of the cray-sat image required a corresponding new version of the +cray-sat-podman package.

+

In this release, this tight coupling of the cray-sat-podman package and the +cray-sat container image was removed. The sat wrapper script provided +by the cray-sat-podman package now looks for the version of the cray-sat +container image in the /opt/cray/etc/sat/version file. This file is populated +with the correct version of the cray-sat container image by the SAT layer of +the CFS configuration that is applied to management NCNs. If the version file +does not exist, the wrapper script defaults to the version of the cray-sat +container image delivered with the latest version of CSM installed on the system.

+

Improved NCN Personalization Automation

+

The steps for performing NCN personalization as part of the SAT installation +were moved out of the install.sh script and into a new +update-mgmt-ncn-cfs-config.sh script that is provided in the SAT release +distribution. The new script provides additional flexibility in how it modifies +the NCN personalization CFS configuration for SAT. It can modify an existing CFS +configuration by name, a CFS configuration being built in a JSON file, or an +existing CFS configuration that applies to certain components.

+

New sat bootprep Features

+

The following new features were added to the sat bootprep command:

+ +

The schema of the sat bootprep input files was also changed to support these +new features:

+
    +
  • The base recipe or image used by an image in the input file should now be +specified under a base key instead of under an ims key. The old ims +key is deprecated.
  • +
  • To specify an image that depends on another image in the input file, the +dependent image should specify the dependency under base.image_ref. +Going forward, do not use the IMS name of the image on which it depends.
  • +
  • The image used by a session template should now be specified under +image.ims.name, image.ims.id, or image.image_ref. Specifying a string +value directly under the image key is deprecated.
  • +
+

For more information on defining IMS images and BOS session templates in the +sat bootprep input file, see Define IMS Images +and Define BOS Session Templates.

+

Added Blade Swap Support to sat swap

+

The sat swap command was updated to support swapping compute and UAN blades +with sat swap blade. This functionality is described in the following processes +of the Cray System Management Documentation:

+
    +
  • Adding a Liquid-cooled blade to a System Using SAT
  • +
  • Removing a Liquid-cooled blade from a System Using SAT
  • +
  • Replace a Compute Blade Using SAT
  • +
  • Swap a Compute Blade with a Different System Using SAT
  • +
+

Support for BOS v2

+

A new v2 version of the Boot Orchestration Service (BOS) is available in CSM +1.3.0. SAT has added support for BOS v2. This impacts the following commands +that interact with BOS:

+
    +
  • sat bootprep
  • +
  • sat bootsys
  • +
  • sat status
  • +
+

By default, SAT uses BOS v1. To change the default to a different BOS version, +see Change the BOS Version.

+

Added BOS Fields to sat status

+

When using BOS v2, sat status outputs additional fields. These fields show +the most recent BOS session, session template, booted image, and boot status for +each node. An additional --bos-fields option was added to limit the output of +sat status to these fields. The fields are not displayed when using BOS v1.

+

Open Source Repositories

+

This is the first release of SAT built from open source code repositories. +As a result, build infrastructure was changed to use an external Jenkins instance, +and artifacts are now published to an external Artifactory instance. These +changes should not impact the functionality of the SAT product in any way.

+

Security

+

CVE Mitigation

+
    +
  • The paramiko Python package version was updated from 2.9.2 to 2.10.1 to +mitigate CVE-2022-24302.
  • +
  • The oauthlib Python package version was updated from 3.2.0 to 3.2.1 to +mitigate CVE-2022-36087.
  • +
+

Restricted Permissions on SAT Configuration Files and Directories

+

SAT stores information used to authenticate to the API gateway with Keycloak. +Token files are stored in the ~/.config/sat/tokens/ directory. Those files +have always had permissions appropriately set to restrict them to be readable +only by the user.

+

Keycloak usernames used to authenticate to the API gateway are stored in the +SAT configuration file at /.config/sat/sat.toml. Keycloak usernames are also +used in the file names of tokens stored in /.config/sat/tokens. As an +additional security measure, SAT now restricts the permissions of the SAT +configuration file to be readable and writable only by the user. It also +restricts the tokens directory and the entire SAT configuration directory +~/.config/sat to be accessible only by the user. This prevents other users on +the system from viewing Keycloak usernames used to authenticate to the API +gateway.

+

Bug Fixes

+
    +
  • Fixed an issue where sat init did not print a message confirming a new +configuration file was created.
  • +
  • Fixed an issue where sat showrev exited with a traceback if the file +/opt/cray/etc/site_info.yaml existed but was empty. This could occur if the +user exited sat setrev with Ctrl-C.
  • +
  • Fixed outdated information in the sat bootsys man page, and added a +description of the command stages.
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/sat_2.5_release_notes/index.html b/en-26/release_notes/sat_2.5_release_notes/index.html new file mode 100644 index 0000000000..8147d8cd77 --- /dev/null +++ b/en-26/release_notes/sat_2.5_release_notes/index.html @@ -0,0 +1,1368 @@ + + + + + + + + + + + + Changes in SAT 2.5 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.5

+

The 2.5.17 version of the SAT product includes:

+
    +
  • Version 3.21.4 of the sat python package and CLI.
  • +
  • Version 2.0.0-1 of the sat-podman wrapper script.
  • +
  • Version 1.6.0 of the sat-install-utility container image.
  • +
  • Version 3.3.1 of the cfs-config-util container image.
  • +
+

New sat Commands

+

sat jobstat allows access to application and job data through the command +line. It provides a table summarizing information for all jobs on the system.

+

Changes to sat bootprep

+
    +
  • +

    A list-vars subcommand was added to sat bootprep.

    +

    It lists the variables available for use in bootprep input files at runtime.

    +
  • +
  • +

    A --limit option was added to sat bootprep run.

    +

    It restricts the creation of CFS configurations, IMS images, and BOS session +templates into separate stages. For more information, see +Limit SAT Bootprep Run into Stages.

    +
  • +
  • +

    sat bootprep now prompts individually for each CFS configuration that +already exists.

    +
  • +
  • +

    sat bootprep can now filter images provided by a product by using a prefix.

    +

    This is useful when specifying the base of an image in a bootprep input +file. For more information, see +Define IMS Images.

    +
  • +
  • +

    To support product names with hyphens, sat bootprep now converts hyphens to +underscores within variables.

    +

    For more information, see +Hyphens in HPC CSM Software Recipe Variables.

    +
  • +
  • +

    In sat bootprep input files, the value of the playbook property of CFS +configuration layers can now be rendered with Jinja2 templates.

    +

    For more information, see +Values Supporting Jinja2 Template Rendering.

    +
  • +
  • +

    Output was added to sat bootprep run that summarizes the CFS configurations, +IMS images, and BOS session templates created.

    +

    For more information, see +Summary of SAT Bootprep Results.

    +
  • +
  • +

    Improvements were made to the sat bootprep output when CFS configuration +and BOS session templates are created.

    +
  • +
+

Changes to sat bootsys

+
    +
  • A reboot subcommand was added to sat bootsys. It uses BOS to reboot +nodes in the bos-operations stage.
  • +
  • The --staged-session option was added to sat bootsys. It can be used to +create staged BOS sessions. For more information, refer to Staging Changes +with BOS in the Cray System Management Documentation.
  • +
+

Changes to Other sat Commands

+
    +
  • When switching SAT versions with prodmgr, a version is no longer set as +“active” in the product catalog. The “active” field was also removed from the +output of sat showrev.
  • +
  • Improvements were made to the performance of sat status when using BOS +version two.
  • +
+

New Install and Upgrade Framework

+

The new Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products with the help of sat bootprep on HPE Cray EX +systems managed by Cray System Management (CSM). IUF capabilities are described +in detail in the IUF section +of the Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.

+

Because IUF now handles NCN personalization, information about this process was +removed from the SAT documentation. Other sections in the documentation were +also revised to support the new Install and Upgrade Framework. For example, the +SAT Installation and SAT Upgrade sections of this +guide now provide details on software and configuration content specific to SAT. +The Cray System Management Documentation +will indicate when these sections should be referred to for detailed information.

+

For more information on the relationship between sat bootprep and IUF, see +SAT and IUF.

+

New Default BOS Version

+

By default, SAT now uses version two of the Boot Orchestration Service (BOS). +This change to BOS v2 impacts the following commands that interact with BOS:

+
    +
  • sat bootprep
  • +
  • sat bootsys
  • +
  • sat status
  • +
+

To change the default to a different BOS version, see +Change the BOS Version.

+

Security

+
    +
  • Updated the version of certifi in the sat python package and CLI from +2021.10.8 to 2022.12.7 to resolve CVE-2022-23491.
  • +
  • Updated the version of certifi in the sat-install-utility container image +from 2021.5.30 to 2022.12.7 to resolve CVE-2022-23491.
  • +
  • Updated the version of oauthlib from 3.2.1 to 3.2.2 to resolve CVE-2022-36087.
  • +
  • Updated the version of cryptography from 36.0.1 to 39.0.1 to resolve +CVE-2023-23931.
  • +
+

Bug Fixes

+
    +
  • Fixed a bug that prevented sat init from creating a configuration file in +the current directory when not prefixed with ./.
  • +
  • Fixed a bug in which sat status failed with a traceback when using BOS +version two and reported components whose most recent image did not exist.
  • +
  • Fixed a build issue where the sat container could contain a different +version of kubectl than the version found in CSM.
  • +
  • Fixed error handling and improved command messages for sat bootprep and +sat swap blade.
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/sat_2.6_release_notes/index.html b/en-26/release_notes/sat_2.6_release_notes/index.html new file mode 100644 index 0000000000..aeb8204104 --- /dev/null +++ b/en-26/release_notes/sat_2.6_release_notes/index.html @@ -0,0 +1,1364 @@ + + + + + + + + + + + + Changes in SAT 2.6 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Changes in SAT 2.6

+

The 2.6.14 version of the SAT product includes:

+
    +
  • Version 3.25.10 of the sat python package and CLI.
  • +
  • Version 2.0.0-1 of the sat-podman wrapper script.
  • +
  • Version 1.6.2 of the sat-install-utility container image.
  • +
+

New sat Commands

+

No new sat commands were added in SAT 2.6.

+

Changes to sat bootsys

+
    +
  • +

    Functionality was added to the platform-services and cabinet-power +stages of sat bootsys boot. This allows SAT to automatically recreate +Kubernetes CronJobs that may have become stuck during shutdown, boot, or +reboot.

    +
  • +
  • +

    sat bootsys boot more reliably determines if the hms-discovery CronJob +was scheduled during the cabinet-power stage.

    +
  • +
  • +

    SAT now uses the BatchV1 Kubernetes API to manipulate CronJobs instead of the +BatchV1Beta1 API.

    +
  • +
  • +

    sat bootsys now logs the ID of all BOS sessions when performing BOS +operations. A warning is logged for any BOS sessions with failed +components.

    +
  • +
  • +

    Support for the Compute Rolling Upgrade Service (CRUS) has been removed, +and the sat bootsys command will no longer interact with CRUS.

    +
  • +
  • +

    The bos-operations stage of sat bootsys no longer checks whether BOS +session templates need any operations to be performed before creating a BOS +session. BOS instead determines whether the session will need to boot or +shut down any nodes to reach the desired state.

    +
  • +
+

Changes to sat bootprep

+
    +
  • +

    Wildcard matching was added for images in sat bootprep input files. Use +wildcards similar to how prefix filters were used in older versions of SAT. +For more information, see Define IMS Images.

    +
  • +
  • +

    Support for multiple architectures was added to sat bootprep. It is now +possible to filter base IMS images and recipes from products based on their +target architecture. This support also allows specifying target architectures +in boot sets of BOS session templates. For more information, see +Filter Base Images or Recipes from a Product +and +Define BOS Session Templates.

    +
  • +
  • +

    When specifying a base image or recipe from a product, sat bootprep +can combine multiple image or recipe filters. When specifying multiple +filters, the unique base image or recipe that satisfies all of the given +filters is selected. An error occurs if either no images or recipes match the +given filters or if more than one image or recipe matches the given filters.

    +
  • +
  • +

    In CFS configuration layers, support was added for the new imsRequireDkms +field under the specialParameters section. CFS configurations in bootprep +input files can specify an ims_require_dkms field in a new, optional +special_parameters section for each layer.

    +
  • +
+

Other SAT Changes

+
    +
  • +

    The SAT Kibana and Grafana dashboards were moved to the System Monitoring +Application (SMA) beside other dashboards. For more information on how to +view these dashboards going forward, see the HPE Cray EX System Monitoring +Application Administration Guide (S-8029).

    +
  • +
  • +

    Add the new s3.cert_verify option to the SAT configuration file to +control whether certificate verification is performed when accessing S3.

    +
  • +
  • +

    Log messages spanning multiple lines now print the log level on each line +instead of only at the beginning of the message.

    +
  • +
  • +

    When certificate verification is disabled for CSM API requests, only a single +warning now prints at the beginning of SAT’s invocation instead of for +each request.

    +
  • +
  • +

    sat swap blade more reliably determines if the hms-discovery CronJob was +scheduled when enabling a blade following a hardware swap.

    +
  • +
  • +

    sat swap blade will use the BatchV1 Kubernetes API to manipulate CronJobs, +instead of the BatchV1Beta1 API as previously.

    +
  • +
  • +

    Command prompts in this guide are now inserted into text before the +fenced code block instead of inside of it. This is a change from the +documentation of SAT 2.5 and earlier. In addition, two new command prompts +were added for better clarity. For more information, see +Command Prompt Conventions in SAT.

    +
  • +
+

Multi-tenancy Support

+

SAT 2.6 supports supplying tenant information to CSM services in order to allow +tenant admins to use SAT within their tenant. For more information, see +Configure multi-tenancy.

+

Security

+
    +
  • +

    Updated the version of cryptography from 36.0.1 to 41.0.0 to resolve +CVE-2023-2650.

    +
  • +
  • +

    Updated the version of requests from 2.27.1 to 2.31.0 to resolve +CVE-2023-32681.

    +
  • +
  • +

    Updated the version of curl/libcurl from 7.80.0-r6 to 8.1.2-r0 to address +CVE-2023-27536.

    +
  • +
+

Bug Fixes

+
    +
  • +

    Improved extreme slowness in the platform-services stage of +sat bootsys shutdown in cases where a large known_hosts file is used on +the host where SAT is running.

    +
  • +
  • +

    Fixed a bug that caused the wrong container name to be logged when CFS +configuration sessions failed on newer CSM systems.

    +
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/shasta_1.3.2_release_notes/index.html b/en-26/release_notes/shasta_1.3.2_release_notes/index.html new file mode 100644 index 0000000000..03e8d52c16 --- /dev/null +++ b/en-26/release_notes/shasta_1.3.2_release_notes/index.html @@ -0,0 +1,1255 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.3.2 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.3.2

+

Shasta v1.3.2 included version 2.4.0 of the sat python package and CLI.

+

The following sections detail the changes in this release.

+

sat swap Command for Switch and Cable Replacement

+

The sat switch command which supported operations for replacing a switch has +been deprecated and replaced with the sat swap command, which now supports +replacing a switch OR cable.

+

The sat swap switch command is equivalent to sat switch. The sat switch +command will be removed in a future release.

+

Addition of Stages to sat bootsys Command

+

The sat bootsys command now has multiple stages for both the boot and +shutdown actions. Please refer to the “System Power On Procedures” and “System +Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001) +for more details on using this command in the context of a full system power off +and power on.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/shasta_1.3_release_notes/index.html b/en-26/release_notes/shasta_1.3_release_notes/index.html new file mode 100644 index 0000000000..5377f0e87e --- /dev/null +++ b/en-26/release_notes/shasta_1.3_release_notes/index.html @@ -0,0 +1,1259 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.3 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.3

+

Shasta v1.3 included version 2.2.3 of the sat python package and CLI.

+

This version of the sat CLI contained the following commands:

+
    +
  • auth
  • +
  • bootsys
  • +
  • cablecheck
  • +
  • diag
  • +
  • firmware
  • +
  • hwinv
  • +
  • hwmatch
  • +
  • k8s
  • +
  • linkhealth
  • +
  • sensors
  • +
  • setrev
  • +
  • showrev
  • +
  • status
  • +
  • swap
  • +
  • switch
  • +
+

For more information on each of these commands, see the +SAT Command Overview and the table +of commands in the SAT Command Authentication +section of this document.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/shasta_1.4.1_release_notes/index.html b/en-26/release_notes/shasta_1.4.1_release_notes/index.html new file mode 100644 index 0000000000..4dc92c42e6 --- /dev/null +++ b/en-26/release_notes/shasta_1.4.1_release_notes/index.html @@ -0,0 +1,1264 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.4.1 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.4.1

+

We released version 2.0.4 of the SAT product in Shasta v1.4.1.

+

This version of the SAT product included:

+
    +
  • Version 3.5.0 of the sat python package and CLI.
  • +
  • Version 1.4.3 of the sat-podman wrapper script.
  • +
+

The following sections detail the changes in this release.

+

New Commands to Translate Between NIDs and XNames

+

Two new commands were added to translate between NIDs and XNames:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
+

These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.

+

Bug Fixes

+
    +
  • Fixed a problem in sat swap where creating the offline port policy failed.
  • +
  • Changed sat bootsys shutdown --stage bos-operations to no longer forcefully +power off all compute nodes and application nodes using CAPMC when BOS +sessions complete or time out.
  • +
  • Fixed an issue with the command sat bootsys boot --stage cabinet-power.
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/shasta_1.4_release_notes/index.html b/en-26/release_notes/shasta_1.4_release_notes/index.html new file mode 100644 index 0000000000..98a1b7427b --- /dev/null +++ b/en-26/release_notes/shasta_1.4_release_notes/index.html @@ -0,0 +1,1366 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.4 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.4

+

In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.

+

This version of the SAT product included the following components:

+
    +
  • Version 3.4.0 of the sat python package and CLI
  • +
+

It also added the following new component:

+
    +
  • Version 1.4.2 of the sat-podman wrapper script
  • +
+

The following sections detail the changes in this release.

+

SAT as an Independent Product

+

SAT is now packaged and released as an independent product. The product +deliverable is called a “release distribution”. The release distribution is a +gzipped tar file containing an install script. This install script loads the +cray/cray-sat container image into the Docker registry in Nexus and loads the +cray-sat-podman RPM into a package repository in Nexus.

+

In this release, the cray-sat-podman package is still installed in the master +and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in +Shasta v1.5.

+

SAT Running in a Container Under Podman

+

The sat command now runs in a container under Podman. The sat executable is +now installed on all nodes in the Kubernetes cluster (workers and +control plane nodes). This executable is a wrapper script that starts a SAT container in +Podman and invokes the sat Python CLI within that container. The admin can run +individual sat commands directly on the master or worker NCNs as before, or +they can run sat commands inside the SAT container after using sat bash to +enter an interactive shell inside the SAT container.

+

To view man pages for sat commands, the user can run sat-man SAT_COMMAND, +replacing SAT_COMMAND with the name of the sat command. Alternatively, +the user can enter the sat container with sat bash and use the man command.

+

New sat init Command and Configuration File Location Change

+

The default location of the SAT configuration file has been changed from /etc/sat.toml +to ~/.config/sat/sat.toml. A new command, sat init, has been added that +initializes a configuration file in the new default directory. This better supports +individual users on the system who want their own configuration files.

+

~/.config/sat is mounted into the container that runs under Podman, so changes +are persistent across invocations of the sat container. If desired, an alternate +configuration directory can be specified with the SAT_CONFIG_DIR environment +variable.

+

Additionally, if a configuration file does not yet exist when a user runs a sat +command, one is generated automatically.

+

Additional Types Added to sat hwinv

+

Additional functionality has been added to sat hwinv including:

+
    +
  • List node enclosure power supplies with the --list-node-enclosure-power-supplies +option.
  • +
  • List node accelerators (for example, GPUs) with the --list-node-accels option. +The count of node accelerators is also included for each node.
  • +
  • List node accelerator risers (for example, Redstone modules) with the +--list-node-accel-risers option. The count of node accelerator risers is also +included for each node.
  • +
  • List High-Speed Node Network Interface Cards (HSN NICs) with the +--list-node-hsn-nics option. The count of HSN NICs is also included for each node.
  • +
+

Documentation for these new options has been added to the man page for sat hwinv.

+

Site Information Stored by sat setrev in S3

+

The sat setrev and sat showrev commands now use S3 to store and obtain site +information, including system name, site name, serial number, install date, and +system type. Since the information is stored in S3, it will now be consistent +regardless of the node on which sat is executed.

+

As a result of this change, S3 credentials must be configured for SAT. For more +information, see Generate SAT S3 Credentials.

+

Product Version Information Shown by sat showrev

+

sat showrev now shows product information from the cray-product-catalog +ConfigMap in Kubernetes.

+

Additional Changes to sat showrev

+

The output from sat showrev has also been changed in the following ways:

+
    +
  • The --docker and --packages options were considered misleading and have +been removed.
  • +
  • Information pertaining to only to the local host, where the command is run, +has been moved to the output of the --local option.
  • +
+

Removal of sat cablecheck

+

The sat cablecheck command has been removed. To verify that the system’s Slingshot +network is cabled correctly, admins should now use the show cables command in the +Slingshot Topology Tool (STT).

+

sat swap Command Compatibility with Next-gen Fabric Controller

+

The sat swap command was added in Shasta v1.3.2. This command used the Fabric +Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the +Fabric Controller API, so this command has been rewritten to use the new +backwards-incompatible API. Usage of the command did not change.

+

sat bootsys Functionality

+

Much of the functionality added to sat bootsys in Shasta v1.3.2 was broken +by changes introduced in Shasta v1.4, which removed the Ansible inventory +and playbooks.

+

The functionality in the platform-services stage of sat bootsys has been +re-implemented to use python directly instead of Ansible. This resulted in +a more robust procedure with better logging to the sat log file. Failures +to stop containers on Kubernetes nodes are handled more gracefully, and +more information about the containers that failed to stop, including how to +debug the problem, is included.

+

Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.

+

The following improvements were made to the bos-operations stage +of sat bootsys:

+
    +
  • More information about the BOS sessions, BOA jobs, and BOA pods is printed.
  • +
  • A command-line option, --bos-templates, and a corresponding configuration +file option, bos_templates, were added, and the --cle-bos-template and +--uan-bos-template options and their corresponding configuration file +options were deprecated.
  • +
+

The following functionality has been removed from sat bootsys:

+
    +
  • The hsn-bringup stage of sat bootsys boot has been removed due to removal +of the underlying Ansible playbook.
  • +
  • The bgp-check stage of sat bootys {boot,shutdown} has been removed. It is +now a manual procedure.
  • +
+

Log File Location Change

+

The location of the sat log file has changed from /var/log/cray/sat.log to +/var/log/cray/sat/sat.log. This change simplifies mounting this file into the +sat container running under Podman.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/release_notes/shasta_1.5_release_notes/index.html b/en-26/release_notes/shasta_1.5_release_notes/index.html new file mode 100644 index 0000000000..290a7ce78c --- /dev/null +++ b/en-26/release_notes/shasta_1.5_release_notes/index.html @@ -0,0 +1,1320 @@ + + + + + + + + + + + + SAT Changes in Shasta v1.5 :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Changes in Shasta v1.5

+

We released version 2.1.16 of the SAT product in Shasta v1.5.

+

This version of the SAT product included:

+
    +
  • Version 3.7.4 of the sat python package and CLI
  • +
  • Version 1.4.10 of the sat-podman wrapper script
  • +
+

It also added the following new component:

+
    +
  • Version 1.0.3 of the sat-cfs-install docker image and helm chart
  • +
+

The following sections detail the changes in this release.

+

Install Changes to Separate Product from CSM

+

This release further decouples the installation of the SAT product from the CSM +product. The cray-sat-podman RPM is no longer installed in the management +non-compute node (NCN) image. Instead, the cray-sat-podman RPM is installed on +all master management NCNs via an Ansible playbook which is referenced by a +layer of the CFS configuration that applies to management NCNs. This CFS +configuration is typically named ncn-personalization.

+

The SAT product now includes a Docker image and a Helm chart named +sat-cfs-install. The SAT install script, install.sh, deploys the Helm chart +with Loftsman. This helm chart deploys a Kubernetes job that imports the +SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management. +This repository is referenced by the layer added to the NCN personalization +CFS configuration.

+

Removal of Direct Redfish Access

+

All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:

+
    +
  • sat sensors
  • +
  • sat diag
  • +
  • sat linkhealth
  • +
+

The sat sensors command has been rewritten to use the SMA telemetry API to +obtain the latest sensor values. The command’s usage has changed slightly, but +legacy options work as before, so it is backwards compatible. Additionally, new +commands have been added.

+

The sat diag command has been rewritten to use a new service called Fox, which +is delivered with the CSM-Diags product. The sat diag command now launches +diagnostics using the Fox service, which launches the corresponding diagnostic +programs on controllers using the Hardware Management Job and Task Daemon +(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start +diagnostics over Redfish.

+

The sat linkhealth command has been removed. Its functionality has been +replaced by functionality from the Slingshot Topology Tool (STT) in the +fabric manager pod.

+

The Redfish username and password command line options and configuration file +options have been removed. For more information, see +Remove Obsolete Configuration File Sections.

+

Additional Fields in sat setrev and sat showrev

+

sat setrev now collects the following information from the admin, which is then +displayed by sat showrev:

+
    +
  • System description
  • +
  • Product number
  • +
  • Company name
  • +
  • Country code
  • +
+

Additional guidance and validation has been added to each field collected by +sat setrev. This sets the stage for sdu setup to stop collecting this +information and instead collect it from sat showrev or its S3 bucket.

+

Improvements to sat bootsys

+

The platform-services stage of the sat bootsys boot command has been +improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph +health in the correct order. The ceph-check stage has been removed as it is no +longer needed.

+

The platform-services stage of sat bootsys boot now prompts for confirmation +of the storage NCN hostnames in addition to the Kubernetes control plane and worker nodes.

+

Bug Fixes and Security Fixes

+
    +
  • Improved error handling in sat firmware.
  • +
  • Incremented version of Alpine Linux to 3.13.2 to address a security +vulnerability.
  • +
+

Other Notable Changes

+
    +
  • Ansible has been removed from the cray-sat container image.
  • +
  • Support for the Firmware Update Service (FUS) has been removed from the sat firmware command.
  • +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/sitemap.xml b/en-26/sitemap.xml new file mode 100644 index 0000000000..cd6e8d75ad --- /dev/null +++ b/en-26/sitemap.xml @@ -0,0 +1,425 @@ + + + + /docs-sat/en-26/usage/ + 2024-12-11T03:40:01+00:00 + + + + + + + /docs-sat/en-26/about_sat/ + 2024-12-11T03:40:00+00:00 + + /docs-sat/en-26/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-26/install/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-26/release_notes/ + 2024-12-11T03:40:00+00:00 + + + + + + + + /docs-sat/en-26/usage/change_bos_version/ + 2024-12-11T03:40:01+00:00 + + + + + /docs-sat/en-26/release_notes/sat_2.2_release_notes/ + 2024-12-11T03:40:00+00:00 + + + + /docs-sat/en-26/about_sat/command_authentication/ + 2024-12-11T03:40:00+00:00 + + /docs-sat/en-26/usage/multi-tenancy/ + 2024-12-11T03:40:01+00:00 + + /docs-sat/en-26/release_notes/sat_2.3_release_notes/ + 2024-12-11T03:40:00+00:00 + + + + /docs-sat/en-26/about_sat/dependencies/ + 2024-12-11T03:40:00+00:00 + + /docs-sat/en-26/release_notes/sat_2.4_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/usage/sat_and_iuf/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/about_sat/introduction/ + 2024-12-11T03:40:00+00:00 + + /docs-sat/en-26/release_notes/sat_2.5_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/usage/sat_bootprep/ + 2024-12-11T03:40:01+00:00 + + + + + /docs-sat/en-26/about_sat/sat_in_csm/ + 2024-12-11T03:40:00+00:00 + + /docs-sat/en-26/release_notes/sat_2.6_release_notes/ + 2024-12-11T03:40:01+00:00 + + /docs-sat/en-26/about_sat/view_sat_docs/ + 2024-12-11T03:40:00+00:00 + + /docs-sat/en-26/release_notes/shasta_1.3.2_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/external_system/ + 2024-12-11T03:40:00+00:00 + + + + /docs-sat/en-26/release_notes/shasta_1.3_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/release_notes/shasta_1.4.1_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/release_notes/shasta_1.4_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/release_notes/shasta_1.5_release_notes/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/uninstall_and_downgrade/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/upgrade/ + 2024-12-11T03:40:01+00:00 + + + + /docs-sat/en-26/categories/ + + + + + + + + /docs-sat/en-26/tags/ + + + + + + + + diff --git a/en-26/tags/index.html b/en-26/tags/index.html new file mode 100644 index 0000000000..4cf4e0c36f --- /dev/null +++ b/en-26/tags/index.html @@ -0,0 +1,1308 @@ + + + + + + + + + + + + Tags :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ + tag :: + +

+ + + + + + + + +
    + +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-26/tags/index.xml b/en-26/tags/index.xml new file mode 100644 index 0000000000..2ec967b886 --- /dev/null +++ b/en-26/tags/index.xml @@ -0,0 +1,11 @@ + + + + Tags on System Admin Toolkit (SAT) + /docs-sat/en-26/tags/ + Recent content in Tags on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-26 + + + diff --git a/en-26/uninstall_and_downgrade/index.html b/en-26/uninstall_and_downgrade/index.html new file mode 100644 index 0000000000..34eea1e4cb --- /dev/null +++ b/en-26/uninstall_and_downgrade/index.html @@ -0,0 +1,1423 @@ + + + + + + + + + + + + SAT Uninstall and Downgrade :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Uninstall and Downgrade

+

Uninstall: Remove a Version of SAT

+

This procedure can be used to uninstall a version of SAT.

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be uninstalled with prodmgr.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is available.
  • +
+

Procedure

+
    +
  1. +

    (ncn-m001#) Use sat showrev to list versions of SAT.

    +
    sat showrev --products --filter product_name=sat
    +

    Example output:

    +
    ###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+-------------------+-----------------------+
    +| product_name | product_version | images            | image_recipes         |
    ++--------------+-----------------+-------------------+-----------------------+
    +| sat          | 2.3.3           | -                 | -                     |
    +| sat          | 2.2.10          | -                 | -                     |
    ++--------------+-----------------+-------------------+-----------------------+
    +
  2. +
  3. +

    (ncn-m001#) Use prodmgr to uninstall a version of SAT.

    +

    This command will do three things:

    +
      +
    • Remove all hosted-type package repositories associated with the given version of SAT. Group-type +repositories are not removed.
    • +
    • Remove all container images associated with the given version of SAT.
    • +
    • Remove SAT from the cray-product-catalog Kubernetes ConfigMap, so that it will no longer show up +in the output of sat showrev.
    • +
    +
    prodmgr uninstall sat 2.2.10
    +

    Example output:

    +
    Repository sat-2.2.10-sle-15sp2 has been removed.
    +Removed Docker image cray/cray-sat:3.9.0
    +Removed Docker image cray/sat-cfs-install:1.0.2
    +Removed Docker image cray/sat-install-utility:1.4.0
    +Deleted sat-2.2.10 from product catalog.
    +
  4. +
+

Downgrade: Switch Between SAT Versions

+

This procedure can be used to downgrade the active version of SAT.

+

Note: The prodmgr activate command is deprecated in SAT 2.6, and the +ability to switch between SAT versions will be removed in a future release.

+

Prerequisites

+
    +
  • Only versions 2.2 or newer of SAT can be switched. Older versions must be +switched manually.
  • +
  • CSM version 1.2 or newer must be installed, so that the prodmgr command is +available.
  • +
+

Procedure

+
    +
  1. +

    (ncn-m001#) Use sat showrev to list versions of SAT.

    +
    sat showrev --products --filter product_name=sat
    +

    Example output:

    +
    ###############################################################################
    +Product Revision Information
    +###############################################################################
    ++--------------+-----------------+--------------------+-----------------------+
    +| product_name | product_version | images             | image_recipes         |
    ++--------------+-----------------+--------------------+-----------------------+
    +| sat          | 2.3.3           | -                  | -                     |
    +| sat          | 2.2.10          | -                  | -                     |
    ++--------------+-----------------+--------------------+-----------------------+
    +
  2. +
  3. +

    (ncn-m001#) Use prodmgr to switch to a different version of SAT.

    +

    This command will do two things:

    +
      +
    • For all hosted-type package repositories associated with this version of SAT, set them as the sole member +of their corresponding group-type repository. For example, switching to SAT version 2.2.10 +sets the repository sat-2.2.10-sle-15sp2 as the only member of the sat-sle-15sp2 group.
    • +
    • Ensure that the SAT CFS configuration content exists as a layer in all CFS configurations that are +associated with NCNs with the role “Management” and subrole “Master” (for example, the CFS configuration +management-23.5.0). Specifically, it will ensure that the layer refers to the version of SAT CFS +configuration content associated with the version of SAT to which the system is switching.
    • +
    +
    prodmgr activate sat 2.5.15
    +

    Example output:

    +
    Repository sat-2.5.15-sle-15sp4 is now the default in sat-sle-15sp4.
    +Updated CFS configurations: [management-23.5.0]
    +
  4. +
  5. +

    Apply the modified CFS configuration to the management NCNs.

    +

    At this point, Nexus package repositories have been modified to set a +particular package repository as active, but the SAT package may not have +been updated on management NCNs.

    +

    To ensure that management NCNs have been updated to use the active SAT +version, follow the Procedure to Apply CFS Configuration.

    +
  6. +
+

Procedure to Apply CFS Configuration

+
    +
  1. +

    (ncn-m001#) Set an environment variable that refers to the name of the CFS configuration +to be applied to the management NCNs.

    +
    export CFS_CONFIG_NAME="management-23.5.0"
    +

    Note: Refer to the output from the prodmgr activate command to find +the name of the modified CFS configuration. If more than one CFS configuration +was modified, use the first one.

    +
    INFO: Successfully saved CFS configuration "management-23.5.0"
    +
  2. +
  3. +

    (ncn-m001#) Obtain the name of the CFS configuration layer for SAT and save it in an +environment variable:

    +
    export SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \
    +    | jq -r '.layers | map(select(.cloneUrl | contains("sat-config-management.git")))[0].name')
    +
  4. +
  5. +

    (ncn-m001#) Create a CFS session that executes only the SAT layer of the given CFS +configuration.

    +

    The --configuration-limit option limits the configuration session to run +only the SAT layer of the configuration.

    +
    cray cfs sessions create --name "sat-session-${CFS_CONFIG_NAME}" --configuration-name \
    +    "${CFS_CONFIG_NAME}" --configuration-limit "${SAT_LAYER_NAME}"
    +
  6. +
  7. +

    Monitor the progress of the CFS session.

    +

    (ncn-m001#) Set an environment variable to name of the Ansible container within the pod +for the CFS session:

    +
    export ANSIBLE_CONTAINER=$(kubectl get pod -n services \
    +    --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \
    +    -o json | jq -r '.items[0].spec.containers | map(select(.name | contains("ansible"))) | .[0].name')
    +

    (ncn-m001#) Next, get the logs for the Ansible container.

    +
    kubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \
    +    --selector=cfsession=sat-session-${CFS_CONFIG_NAME}
    +

    Ansible plays, which are run by the CFS session, will install SAT on all the +master management NCNs on the system. A summary of results can be found at +the end of the log output.

    +

    (ncn-m001#) The following example shows a successful session:

    +
    ...
    +PLAY RECAP *********************************************************************
    +x3000c0s1b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s3b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +x3000c0s5b0n0              : ok=3    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0
    +

    Note: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.

    +
  8. +
  9. +

    (ncn-m001#) Verify that SAT was successfully configured.

    +

    If sat is configured, the --version command will indicate which version +is installed. If sat is not properly configured, the command will fail.

    +

    Note: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the sat Python package, +which is different from the version number of the overall SAT release distribution.

    +
    sat --version
    +

    Example output:

    +
    sat 3.7.0
    +

    Note: Upon first running sat, there might be additional output while +the sat container image is downloaded. This occurs the first time sat +is run on each manager NCN. For example, when running sat for the first time +on ncn-m001 and then for the first time on ncn-m002, this additional +output is seen both times.

    +
    Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
    +Getting image source signatures
    +Copying blob da64e8df3afc done
    +Copying blob 0f36fd81d583 done
    +Copying blob 12527cf455ba done
    +...
    +sat 3.7.0
    +
  10. +
  11. +

    (ncn-m001#) Stop the typescript.

    +
    exit
    +
  12. +
+

SAT version x.y.z is now installed and configured:

+
    +
  • The SAT RPM package is installed on the associated NCNs.
  • +
+

Note on Procedure to Apply CFS Configuration

+

The previous procedure is not always necessary because the CFS Batcher service +automatically detects configuration changes and will automatically create new +sessions to apply configuration changes according to certain rules. For more +information on these rules, refer to Configuration Management with +the CFS Batcher in the Cray System Management Documentation.

+

The main scenario in which the CFS batcher will not automatically re-apply the +SAT layer is when the commit hash of the sat-config-management git repository +has not changed between SAT versions. The previous procedure ensures the +configuration is re-applied in all cases, and it is harmless if the batcher has +already applied an updated configuration.

+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/upgrade/index.html b/en-26/upgrade/index.html new file mode 100644 index 0000000000..49b6b32fe5 --- /dev/null +++ b/en-26/upgrade/index.html @@ -0,0 +1,1466 @@ + + + + + + + + + + + + SAT Upgrade :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Upgrade

+

Install and Upgrade Framework

+

The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM. IUF capabilities are +described in detail in the IUF +section of the +Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.

+

This document does not replicate install, upgrade, or deployment procedures +detailed in the Cray System Management +Documentation. This document provides +details regarding software and configuration content specific to SAT which is +needed when installing, upgrading, or deploying a SAT release. The Cray +System Management Documentation will +indicate when sections of this document should be referred to for detailed +information.

+

IUF will perform the following tasks for a release of SAT.

+
    +
  • IUF deliver-product stage: +
      +
    • Uploads SAT configuration content to VCS
    • +
    • Uploads SAT information to the CSM product catalog
    • +
    • Uploads SAT content to Nexus repositories
    • +
    +
  • +
  • IUF update-vcs-config stage: +
      +
    • Updates the VCS integration branch with new SAT configuration content if a +working branch is specified
    • +
    +
  • +
  • IUF update-cfs-config stage: +
      +
    • Creates a new CFS configuration for management nodes with new SAT configuration content
    • +
    +
  • +
  • IUF prepare-images stage: +
      +
    • Creates updated management NCN and managed node images with new SAT content
    • +
    +
  • +
  • IUF management-nodes-rollout stage: +
      +
    • Boots management NCNs with an image containing new SAT content
    • +
    +
  • +
+

IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF +section of the +Cray System Management Documentation +describes how to use these tools directly if it is desirable to use them +instead of IUF.

+

IUF Stage Details for SAT

+

This section describes SAT details that an administrator must be aware of +before running IUF stages. Entries are prefixed with Information if no +administrative action is required or Action if an administrator needs +to perform tasks outside of IUF.

+

update-vcs-config

+

Information: This stage is only run if a VCS working branch is specified for +SAT. By default, SAT does not create or specify a VCS working branch.

+

update-cfs-config

+

Information: This stage only applies to the management configuration and +not to the managed configuration.

+

prepare-images

+

Information: This stage only applies to management images and not to +managed images.

+

Post-Upgrade Procedures

+

After upgrading SAT with IUF, it is recommended to complete the following +procedures before using SAT:

+ +

Notes on the Procedures

+
    +
  • Ellipses (...) in shell output indicate omitted lines.
  • +
  • In the examples below, replace x.y.z with the version of the SAT product stream +being upgraded.
  • +
  • ‘manager’ and ‘master’ are used interchangeably in the steps below.
  • +
+

Remove Obsolete Configuration File Sections

+

After upgrading SAT, if using the configuration file from a previous version, there may be +configuration file sections no longer used in the new version. For example, when upgrading +from Shasta 1.4 to Shasta 1.5, the [redfish] configuration file section is no longer used.

+

(ncn-m001#) In that case, the following warning may appear upon running sat commands.

+
WARNING: Ignoring unknown section 'redfish' in config file.
+

Remove the [redfish] section from /root/.config/sat/sat.toml to resolve the warning.

+
[redfish]
+username = "admin"
+password = "adminpass"
+

Repeat this process for any configuration file sections for which there are “unknown section” warnings.

+

Update SAT Logging

+

As of SAT version 2.2, some command output that was previously printed to stdout +is now logged to stderr. These messages are logged at the INFO level. The +default logging threshold was changed from WARNING to INFO to accommodate +this logging change. Additionally, some messages previously logged at the INFO +are now logged at the DEBUG level.

+

These changes take effect automatically. However, if the default output threshold +has been manually set in ~/.config/sat/sat.toml, it should be changed to ensure +that important output is shown in the terminal.

+

Update Configuration

+

(ncn-m001#) In the following example, the stderr log level, logging.stderr_level, is set to +WARNING, which will exclude INFO-level logging from terminal output.

+
grep -A 3 logging ~/.config/sat/sat.toml
+

Example output:

+
[logging]
+...
+stderr_level = "WARNING"
+

To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.

+

If logging.stderr_level is commented out, its value will not affect logging +behavior. However, it may be helpful to set its value to INFO as a reminder of +the new default behavior.

+

Affected Commands

+

The following commands trigger messages that have been changed from stdout +print calls to INFO-level (or WARNING- or ERROR-level) log messages:

+
    +
  • sat bootsys --stage shutdown --stage session-checks
  • +
  • sat sensors
  • +
+

The following commands trigger messages that have been changed from INFO-level +log messages to DEBUG-level log messages:

+
    +
  • sat nid2xname
  • +
  • sat xname2nid
  • +
  • sat swap
  • +
+

Set System Revision Information

+

HPE service representatives use system revision information data to identify +systems in support cases.

+

Prerequisites

+ +

Notes on the Procedure

+

This procedure is not required if SAT was upgraded from 2.1 (Shasta v1.5) +or later. It is required if SAT was upgraded from 2.0 (Shasta v1.4) or +earlier.

+

Procedure

+
    +
  1. +

    Set System Revision Information.

    +

    (ncn-m001#) Run sat setrev and follow the prompts to set the following site-specific values:

    +
      +
    • Serial number
    • +
    • System name
    • +
    • System type
    • +
    • System description
    • +
    • Product number
    • +
    • Company name
    • +
    • Site name
    • +
    • Country code
    • +
    • System install date
    • +
    +

    Tip: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.

    +
    sat setrev
    +

    Example output:

    +
    --------------------------------------------------------------------------------
    +Setting:        Serial number
    +Purpose:        System identification. This will affect how snapshots are
    +                identified in the HPE backend services.
    +Description:    This is the top-level serial number which uniquely identifies
    +                the system. It can be requested from an HPE representative.
    +Valid values:   Alpha-numeric string, 4 - 20 characters.
    +Type:           <class 'str'>
    +Default:        None
    +Current value:  None
    +--------------------------------------------------------------------------------
    +Please do one of the following to set the value of the above setting:
    +    - Input a new value
    +    - Press CTRL-C to exit
    +...
    +
  2. +
  3. +

    Verify System Revision Information.

    +

    (ncn-m001#) Run sat showrev and verify the output shown in the “System Revision Information table.”

    +

    The following example shows sample table output.

    +
    sat showrev
    +

    Example output:

    +
    ################################################################################
    +System Revision Information
    +################################################################################
    ++---------------------+---------------+
    +| component           | data          |
    ++---------------------+---------------+
    +| Company name        | HPE           |
    +| Country code        | US            |
    +| Interconnect        | Sling         |
    +| Product number      | R4K98A        |
    +| Serial number       | 12345         |
    +| Site name           | HPE           |
    +| Slurm version       | slurm 20.02.5 |
    +| System description  | Test System   |
    +| System install date | 2021-01-29    |
    +| System name         | eniac         |
    +| System type         | EX-1C         |
    ++---------------------+---------------+
    +################################################################################
    +Product Revision Information
    +################################################################################
    ++--------------+-----------------+------------------------------+------------------------------+
    +| product_name | product_version | images                       | image_recipes                |
    ++--------------+-----------------+------------------------------+------------------------------+
    +| csm          | 0.8.14          | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
    +| sat          | 2.0.1           | -                            | -                            |
    +| sdu          | 1.0.8           | -                            | -                            |
    +| slingshot    | 0.8.0           | -                            | -                            |
    +| sma          | 1.4.12          | -                            | -                            |
    ++--------------+-----------------+------------------------------+------------------------------+
    +################################################################################
    +Local Host Operating System
    +################################################################################
    ++-----------+----------------------+
    +| component | version              |
    ++-----------+----------------------+
    +| Kernel    | 5.3.18-24.15-default |
    +| SLES      | SLES 15-SP2          |
    ++-----------+----------------------+
    +
  4. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/usage/change_bos_version/index.html b/en-26/usage/change_bos_version/index.html new file mode 100644 index 0000000000..bfd812ffd4 --- /dev/null +++ b/en-26/usage/change_bos_version/index.html @@ -0,0 +1,1284 @@ + + + + + + + + + + + + Change the BOS Version :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Change the BOS Version

+

By default, SAT uses Boot Orchestration Service (BOS) version two (v2). +Select the BOS version to use for individual commands with the --bos-version +option. For more information on this option, refer to the man page for a specific +command.

+

Another way to change the BOS version is by configuring it under the +api_version setting in the bos section of the SAT configuration file. +If the system is using an existing SAT configuration file from an older +version of SAT, the bos section might not exist. In that case, add the bos +section with the BOS version desired in the api_version setting.

+
    +
  1. +

    Find the SAT configuration file at ~/.config/sat/sat.toml, and look for a +section like this:

    +
    [bos]
    +api_version = "v2"
    +

    In this example, SAT is using BOS version "v2".

    +
  2. +
  3. +

    Change the line specifying the api_version to the BOS version desired (for +example, "v1").

    +
    [bos]
    +api_version = "v1"
    +
  4. +
  5. +

    If applicable, uncomment the api_version line.

    +

    If the system is using an existing SAT configuration file from a recent +version of SAT, the api_version line might be commented out like this:

    +
    [bos]
    +# api_version = "v2"
    +

    If the line is commented out, SAT will still use the default BOS +version. To ensure a different BOS version is used, uncomment the +api_version line by removing # at the beginning of the line.

    +
  6. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/usage/index.html b/en-26/usage/index.html new file mode 100644 index 0000000000..b925078170 --- /dev/null +++ b/en-26/usage/index.html @@ -0,0 +1,1292 @@ + + + + + + + + + + + + SAT Usage :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + +
+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + + +

SAT Usage

+ + + + + + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/en-26/usage/index.xml b/en-26/usage/index.xml new file mode 100644 index 0000000000..41186b21ea --- /dev/null +++ b/en-26/usage/index.xml @@ -0,0 +1,40 @@ + + + + SAT Usage on System Admin Toolkit (SAT) + /docs-sat/en-26/usage/ + Recent content in SAT Usage on System Admin Toolkit (SAT) + Hugo -- gohugo.io + en-26 + Wed, 11 Dec 2024 03:40:01 +0000 + + + Change the BOS Version + /docs-sat/en-26/usage/change_bos_version/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/usage/change_bos_version/ + Change the BOS Version By default, SAT uses Boot Orchestration Service (BOS) version two (v2). Select the BOS version to use for individual commands with the --bos-version option. For more information on this option, refer to the man page for a specific command. Another way to change the BOS version is by configuring it under the api_version setting in the bos section of the SAT configuration file. If the system is using an existing SAT configuration file from an older version of SAT, the bos section might not exist. + + + Configure Multi-tenancy + /docs-sat/en-26/usage/multi-tenancy/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/usage/multi-tenancy/ + Configure Multi-tenancy SAT supports supplying tenant information to CSM services in order to allow tenant admins to use SAT within their tenant. By default, the tenant name is not set, and SAT will not send any tenant information with its requests to CSM services. Configure the tenant name either in the SAT configuration file or on the command line. Configure the Tenant Name in the SAT Configuration File Set the tenant name in the SAT configuration file using the api_gateway. + + + SAT and IUF + /docs-sat/en-26/usage/sat_and_iuf/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/usage/sat_and_iuf/ + SAT and IUF The Install and Upgrade Framework (IUF) provides commands which install, upgrade, and deploy products on systems managed by CSM with the help of sat bootprep. Outside of IUF, it is uncommon to use sat bootprep. For more information on IUF, see the IUF section of the Cray System Management Documentation. For more information on sat bootprep, see SAT Bootprep. Variable Substitutions Both IUF and sat bootprep allow variable substitutions into the default HPC CSM Software Recipe bootprep input files. + + + SAT Bootprep + /docs-sat/en-26/usage/sat_bootprep/ + Wed, 11 Dec 2024 03:40:01 +0000 + /docs-sat/en-26/usage/sat_bootprep/ + SAT Bootprep SAT provides an automated solution for creating CFS configurations, building and configuring images in IMS, and creating BOS session templates. The solution is based on a given input file that defines how those configurations, images, and session templates should be created. This automated process centers around the sat bootprep command. Man page documentation for sat bootprep can be viewed similar to other SAT commands. (ncn-m001#) Here is an example: + + + diff --git a/en-26/usage/multi-tenancy/index.html b/en-26/usage/multi-tenancy/index.html new file mode 100644 index 0000000000..5defa03a7f --- /dev/null +++ b/en-26/usage/multi-tenancy/index.html @@ -0,0 +1,1235 @@ + + + + + + + + + + + + Configure Multi-tenancy :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

Configure Multi-tenancy

+

SAT supports supplying tenant information to CSM services in order to allow +tenant admins to use SAT within their tenant. By default, the tenant name is +not set, and SAT will not send any tenant information with its requests to +CSM services. Configure the tenant name either in the SAT configuration file +or on the command line.

+

Configure the Tenant Name in the SAT Configuration File

+

Set the tenant name in the SAT configuration file using the +api_gateway.tenant_name option.

+

Here is an example:

+
[api_gateway]
+tenant_name = "my_tenant"
+

Configure the Tenant Name on the Command Line

+

Set the tenant name for each sat invocation using the --tenant-name +option. The --tenant-name option must be specified before the subcommand +name.

+

(ncn-m001#) Here is an example:

+
sat --tenant-name=my_tenant status
+
+ +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/usage/sat_and_iuf/index.html b/en-26/usage/sat_and_iuf/index.html new file mode 100644 index 0000000000..f697f356d3 --- /dev/null +++ b/en-26/usage/sat_and_iuf/index.html @@ -0,0 +1,1312 @@ + + + + + + + + + + + + SAT and IUF :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+ + + + + + + +
+
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT and IUF

+

The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM with the help of +sat bootprep. Outside of IUF, it is uncommon to use sat bootprep. +For more information on IUF, see the +IUF section of +the Cray System Management Documentation. +For more information on sat bootprep, see SAT Bootprep.

+

Variable Substitutions

+

Both IUF and sat bootprep allow variable substitutions into the default HPC +CSM Software Recipe bootprep input files. The default variables of the HPC +CSM Software Recipe are available in a product_vars.yaml file. To override +the default variables, specify any site variables in a site_vars.yaml file. +Variables are sourced from the command line, any variable files directly +provided, and the HPC CSM Software Recipe files used, in that order.

+

IUF Session Variables

+

IUF also has special session variables internal to the iuf command that +override any matching entries. Session variables are the set of product and +version combinations being installed by the current IUF activity, and they are +found inside IUF’s internal session_vars.yaml file. For more information on +IUF and variable substitutions, see the +IUF section of +the Cray System Management Documentation.

+

SAT Variable Limitations

+

When using sat bootprep outside of IUF, substituting variables into the +default bootprep input files might cause problems. Complex variables like +"{{ working_branch }}" cannot be completely resolved outside of IUF and +its internal session variables. Thus, the default product_vars.yaml file is +unusable with only the sat bootprep command when variables like +"{{ working_branch }}" are used. To work around this limitation when +substituting complex variables, use the internal IUF session_vars.yaml file +with sat bootprep and the default bootprep input files.

+
    +
  1. +

    Find the session_vars.yaml file from the most recent IUF activity on the +system.

    +

    This process is documented in the upgrade prerequisites procedure of the +Cray System Management Documentation. For more information, see steps 1-6 of +Stage 0.3 - Option 2.

    +
  2. +
  3. +

    (ncn-m001#) Use the session_vars.yaml file to substitute variables into the default +bootprep input files.

    +
    sat bootprep run --vars-file session_vars.yaml
    +
  4. +
+

Limit SAT Bootprep Run into Stages

+

The sat bootprep run command uses information from the bootprep input files +to create CFS configurations, IMS images, and BOS session templates. To restrict +this creation into separate stages, use the --limit option and list whether +to create configurations, images, session_templates, or some +combination of these. IUF uses the --limit option in this way to install, +upgrade, and deploy products on a system in stages.

+

(ncn-m001#) For example, to create only CFS configurations, run the following command used +by the IUF update-cfs-config stage:

+
sat bootprep run --limit configurations example-bootprep-input-file.yaml
+

Example output:

+
INFO: Validating given input file example-bootprep-input-file.yaml
+INFO: Input file successfully validated against schema
+INFO: Creating 3 CFS configurations
+...
+INFO: Skipping creation of IMS images based on value of --limit option.
+INFO: Skipping creation of BOS session templates based on value of --limit option.
+

(ncn-m001#) To create only IMS images and BOS session templates, run the following command +used by the IUF prepare-images stage:

+
sat bootprep run --limit images --limit session_templates example-bootprep-input-file.yaml
+

Example output:

+
INFO: Validating given input file example-bootprep-input-file.yaml
+INFO: Input file successfully validated against schema
+INFO: Skipping creation of CFS configurations based on value of --limit option.
+
+ +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/en-26/usage/sat_bootprep/index.html b/en-26/usage/sat_bootprep/index.html new file mode 100644 index 0000000000..8f1515b90e --- /dev/null +++ b/en-26/usage/sat_bootprep/index.html @@ -0,0 +1,1922 @@ + + + + + + + + + + + + SAT Bootprep :: System Admin Toolkit (SAT) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+ +
+ +
+ +
+ +

+ +

+ + + + + +

SAT Bootprep

+

SAT provides an automated solution for creating CFS configurations, building +and configuring images in IMS, and creating BOS session templates. The +solution is based on a given input file that defines how those configurations, +images, and session templates should be created. This automated process centers +around the sat bootprep command. Man page documentation for sat bootprep +can be viewed similar to other SAT commands.

+

(ncn-m001#) Here is an example:

+
sat-man sat-bootprep
+

The sat bootprep command helps the Install and Upgrade Framework (IUF) +install, upgrade, and deploy products on systems managed by CSM. Outside of IUF, +it is uncommon to use sat bootprep. For more information on this relationship, +see SAT and IUF. For more information on IUF, see the +IUF section of +the Cray System Management Documentation.

+

SAT Bootprep vs SAT Bootsys

+

sat bootprep is used to create CFS configurations, build and +rename IMS images, and create BOS session templates which tie the +configurations and images together during a BOS session.

+

sat bootsys automates several portions of the boot and shutdown processes, +including (but not limited to) performing BOS operations (such as creating BOS +sessions), powering on and off cabinets, and checking the state of the system +prior to shutdown.

+

Edit a Bootprep Input File

+

The input file provided to sat bootprep is a YAML-formatted file containing +information which CFS, IMS, and BOS use to create configurations, images, and +BOS session templates respectively. Writing and modifying these input files is +the main task associated with using sat bootprep. An input file is composed of +three main sections, one each for configurations, images, and session templates. +These sections may be specified in any order, and any of the sections may be +omitted if desired.

+

Provide a Schema Version

+

The sat bootprep input file is validated against a versioned schema +definition. The input file should specify the version of the schema with which +it is compatible under a schema_version key. For example:

+
---
+schema_version: 1.0.2
+

(ncn-m001#) The current sat bootprep input file schema version can be viewed with the +following command:

+
sat bootprep view-schema | grep '^version:'
+

Example output:

+
version: '1.0.2'
+

The sat bootprep run command validates the schema version specified +in the input file. The command also makes sure that the schema version +of the input file is compatible with the schema version understood by the +current version of sat bootprep. For more information on schema version +validation, refer to the schema_version property description in the bootprep +input file schema. For more information on viewing the bootprep input file +schema in either raw form or user-friendly HTML form, see View SAT Bootprep +Schema.

+

The default HPC CSM Software Recipe bootprep input files provided by the +hpc-csm-software-recipe release distribution already contain the correct +schema version.

+

Define CFS Configurations

+

The CFS configurations are defined under a configurations key. Under this +key, list one or more configurations to create. For each +configuration, give a name in addition to the list of layers that +comprise the configuration.

+

Each layer can be defined by a product name and optionally a version number, +commit hash, or branch in the product’s configuration repository. If this +method is used, the layer is created in CFS by looking up relevant configuration +information (including the configuration repository and commit information) from +the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be +supplied. However, if it is absent, the version is assumed to be the latest +version found in the cray-product-catalog.

+

Alternatively, a configuration layer can be defined by explicitly referencing +the desired configuration repository. Specify the intended version +of the Ansible playbooks by providing a branch name or commit hash with branch +or commit.

+

The following example shows a CFS configuration with two layers. The first +layer is defined in terms of a product name and version, and the second layer +is defined in terms of a Git clone URL and branch:

+
---
+configurations:
+- name: example-configuration
+  layers:
+  - name: example-product
+    playbook: example.yml
+    product:
+      name: example
+      version: 1.2.3
+  - name: another-example-product
+    playbook: another-example.yml
+    git:
+      url: "https://vcs.local/vcs/another-example-config-management.git"
+      branch: main
+

When sat bootprep is run against an input file, a CFS configuration is created +corresponding to each configuration in the configurations section. For +example, the configuration created from an input file with the layers listed +above might look something like the following:

+
{
+    "lastUpdated": "2022-02-07T21:47:49Z",
+    "layers": [
+        {
+            "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "example product",
+            "playbook": "example.yml"
+        },
+        {
+            "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+            "commit": "<commit hash>",
+            "name": "another example product",
+            "playbook": "another-example.yml"
+        }
+    ],
+    "name": "example-configuration"
+}
+

Define IMS Images

+

The IMS images are defined under an images key. Under the images key, the +user may define one or more images to be created in a list. Each element of the +list defines a separate IMS image to be built and/or configured. Images must +contain a name key and a base key.

+

The name key defines the name of the resulting IMS image. The base key +defines the base image to be configured or the base recipe to be built and +optionally configured. One of the following keys must be present under the +base key:

+
    +
  • Use an ims key to specify an existing image or recipe in IMS.
  • +
  • Use a product key to specify an image or recipe provided by a particular +version of a product. If a product provides more than one image or recipe, +specify a filter to select one. For more information, see +Filter Base Images or Recipes from a Product.
  • +
  • Use an image_ref key to specify another image from the input file +using its ref_name.
  • +
+

Images may also contain the following keys:

+
    +
  • Use a configuration key to specify a CFS configuration with which to +customize the built image. If a configuration is specified, then configuration +groups must also be specified using the configuration_group_names key.
  • +
  • Use a ref_name key to specify a unique name that can refer to this image +within the input file in other images or in session templates. The ref_name +key allows references to images from the input file that have dynamically +generated names as described in +Dynamic Variable Substitutions.
  • +
  • Use a description key to describe the image in the bootprep input file. +Note that this key is not currently used.
  • +
+

Use Base Images or Recipes from IMS

+

Here is an example of an image using an existing IMS recipe as its base. This +example builds an IMS image from that recipe. It then configures it with +a CFS configuration named example-compute-config. The example-compute-config +CFS configuration can be defined under the configurations key in the same +input file, or it can be an existing CFS configuration. Running sat bootprep +against this input file results in an image named example-compute-image.

+
images:
+- name: example-compute-image
+  description: >
+    An example compute node image built from an existing IMS recipe.    
+  base:
+    ims:
+      name: example-compute-image-recipe
+      type: recipe
+  configuration: example-compute-config
+  configuration_group_names:
+  - Compute
+

Use Base Images or Recipes from a Product

+

Here is an example showing the definition of two images. The first image is +built from a recipe provided by the uss product. The second image uses the +first image as a base and configures it with a configuration named +example-compute-config. The value of the first image’s ref_name key is used +in the second image’s base.image_ref key to specify it as a dependency. +Running sat bootprep against this input file results in two images, the +first named example-uss-image and the second named example-compute-image.

+
images:
+- name: example-uss-image
+  ref_name: example-uss-image
+  description: >
+    An example image built from the recipe provided by the USS product.    
+  base:
+    product:
+      name: uss
+      version: 1.0.0
+      type: recipe
+- name: example-compute-image
+  description: >
+    An example image that is configured from an image built from the recipe provided
+    by the USS product.    
+  base:
+    image_ref: example-uss-image
+  configuration: example-compute-config
+  configuration_group_names:
+  - Compute
+

This example assumes that the given version of the uss product provides +only a single IMS recipe. If more than one recipe is provided by the +given version of the uss product, use a filter as described in +Filter Base Images or Recipes from a Product.

+

Filter Base Images or Recipes from a Product

+

A product may provide more than one image or recipe. If this happens, +filter the product’s images or recipes whenever a base image or recipe from +that product is used. Beneath the base.product value within an image, +specify a filter key to create a filter using the following criteria:

+
    +
  • Use the prefix key to filter based on a prefix matching the name of the +image or recipe.
  • +
  • Use the wildcard key to filter based on a shell-style wildcard matching the +name of the image or recipe.
  • +
  • Use the arch key to filter based on the target architecture of the image or +recipe in IMS.
  • +
+

When specifying more than one filter key, all filters must match only the +desired image or recipe. An error occurs if either no images or recipes +match the given filters or if more than one image or recipe matches +the given filters.

+

Here is an example of three IMS images built from the Kubernetes image and the +Ceph storage image provided by the csm product. This example uses a prefix +filter to select from the multiple images provided by the CSM product. +The first two IMS images in the example find any image from the specified csm +product version whose name starts with secure-kubernetes. The third image in +the example finds any csm image whose name starts with secure-storage-ceph. +All three images are then configured with a configuration named +example-management-config. Running sat bootprep against this input file +results in three IMS images named worker-example-csm-image, +master-example-csm-image, and storage-example-csm-image.

+
images:
+- name: worker-example-csm-image
+  base:
+    product:
+      name: csm
+      version: 1.4.1
+      type: image
+      filter:
+        prefix: secure-kubernetes
+  configuration: example-management-config
+  configuration_group_names:
+  - Management_Worker
+
+- name: master-example-csm-image
+  base:
+    product:
+      name: csm
+      version: 1.4.1
+      type: image
+      filter:
+        prefix: secure-kubernetes
+  configuration: example-management-config
+  configuration_group_names:
+  - Management_Master
+
+- name: storage-example-csm-image
+  base:
+    product:
+      name: csm
+      version: 1.4.1
+      type: image
+      filter:
+        prefix: secure-storage-ceph
+  configuration: example-management-config
+  configuration_group_names:
+  - Management_Storage
+

Here is an example of two IMS images built from recipes provided by the uss +product. This example uses an architecture filter to select from the multiple +recipes provided by the USS product. The first image will be built from the +x86_64 version of the IMS recipe provided by the specified version of the +uss product. The second image will be built from the aarch64 version of +the IMS recipe provided by the specified version of the uss product.

+
images:
+- name: example-uss-image-x86_64
+  ref_name: example-uss-image-x86_64
+  description: >
+    An example image built from the x86_64 recipe provided by the USS product.    
+  base:
+    product:
+      name: uss
+      version: 1.0.0
+      type: recipe
+      filter:
+        arch: x86_64
+
+- name: example-uss-image-aarch64
+  ref_name: example-uss-image-aarch64
+  description: >
+    An example image built from the aarch64 recipe provided by the USS product.    
+  base:
+    product:
+      name: uss
+      version: 1.0.0
+      type: recipe
+      filter:
+        arch: aarch64
+

Define BOS Session Templates

+

The BOS session templates are defined under the session_templates key. Each +session template must provide values for the name, image, configuration, +and bos_parameters keys. The name key defines the name of the resulting BOS +session template. The image key defines the image to use in the BOS session +template. One of the following keys must be present under the image key:

+
    +
  • Use an ims key to specify an existing image or recipe in IMS.
  • +
  • Use an image_ref key to specify another image from the input file +using its ref_name.
  • +
+

The configuration key defines the CFS configuration specified +in the BOS session template.

+

The bos_parameters key defines parameters that are passed through directly to +the BOS session template. The bos_parameters key should contain a boot_sets +key, and each boot set in the session template should be specified under +boot_sets. Each boot set can contain the following keys, all of +which are optional:

+
    +
  • Use an arch key to specify the architecture of the nodes that should be +targeted by the boot set. Valid values are the same as those used by +Hardware State Manager (HSM).
  • +
  • Use a kernel_parameters key to specify the parameters passed to the kernel +on the command line.
  • +
  • Use a network key to specify the network over which the nodes boot.
  • +
  • Use a node_list key to specify the nodes to add to the boot set.
  • +
  • Use a node_roles_groups key to specify the HSM roles to add to the boot +set.
  • +
  • Use a node_groups key to specify the HSM groups to add to the boot set.
  • +
  • Use a rootfs_provider key to specify the root file system provider.
  • +
  • Use a rootfs_provider_passthrough key to specify the parameters to add to +the rootfs= kernel parameter.
  • +
+

As mentioned above, the parameters under bos_parameters are passed through +directly to BOS. For more information on the properties of a BOS boot set, +refer to BOS Session Templates in the Cray +System Management Documentation.

+

Here is an example of a BOS session template that refers to an existing IMS +image by name and targets nodes with the role Compute and the architecture +X86 in HSM:

+
session_templates:
+- name: example-session-template
+  image:
+    ims:
+      name: example-image
+  configuration: example-configuration
+  bos_parameters:
+    boot_sets:
+      example_boot_set:
+        arch: X86
+        kernel_parameters: ip=dhcp quiet
+        node_roles_groups:
+        - Compute
+        rootfs_provider: cpss3
+        rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+

Here is an example of a BOS session template that refers to an image from the +input file by its ref_name and targets nodes with the role Compute and the +architecture ARM in HSM. Note that using the image_ref key requires that +an image defined in the input file specifies example-image as the value of +its ref_name key.

+
session_templates:
+- name: example-session-template
+  image:
+    image_ref: example-image
+  configuration: example-configuration
+  bos_parameters:
+    boot_sets:
+      example_boot_set:
+        arch: ARM
+        kernel_parameters: ip=dhcp quiet
+        node_roles_groups:
+        - Compute
+        rootfs_provider: cpss3
+        rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+

HPC CSM Software Recipe Variable Substitutions

+

The sat bootprep command takes any variables provided and substitutes them +into the input file. Variables are sourced from the command line, any variable +files directly provided, and the HPC CSM Software Recipe files used, in that +order. When providing values through a variable file, sat bootprep +substitutes the values with Jinja2 template syntax. The HPC CSM Software Recipe +provides default variables in a product_vars.yaml variable file. This file +defines information about each HPC software product included in the recipe.

+

Variables are primarily substituted into the default HPC CSM Software Recipe +bootprep input files through IUF. However, variable files can also be given to +sat bootprep directly from IUF’s use of the recipe. When using variables +directly with sat bootprep, there are some limitations. For more +information on SAT variable limitations, see SAT and IUF. +For more information on IUF and variable substitutions, see the +IUF section of +the Cray System Management Documentation.

+

Select an HPC CSM Software Recipe Version

+

View a listing of the default HPC CSM Software Recipe variables and +their values by running sat bootprep list-vars. For more information on +options that can be used with the list-vars subcommand, refer to the man page +for the sat bootprep subcommand.

+

By default, the sat bootprep command uses the variables from the latest +installed version of the HPC CSM Software Recipe. Override this with the +--recipe-version command line argument to sat bootprep run.

+

(ncn-m001#) For example, to explicitly select the 22.11.0 version of the HPC CSM Software +Recipe default variables, specify --recipe-version 22.11.0:

+
sat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml
+

Values Supporting Jinja2 Template Rendering

+

The entire sat bootprep input file is not rendered by the Jinja2 template +engine. Jinja2 template rendering of the input file is performed individually +for each supported value. The values of the following keys in the bootprep +input file support rendering as a Jinja2 template and thus support variables:

+
    +
  • The name key of each configuration under the configurations key.
  • +
  • The following keys of each layer under the layers key in a +configuration: +
      +
    • name
    • +
    • playbook
    • +
    • git.branch
    • +
    • product.version
    • +
    • product.branch
    • +
    +
  • +
  • The following keys of each image under the images key: +
      +
    • name
    • +
    • base.product.version
    • +
    • base.product.filter.arch
    • +
    • base.product.filter.prefix
    • +
    • base.product.filter.wildcard
    • +
    • configuration
    • +
    +
  • +
  • The following keys of each session template under the +session_templates key: +
      +
    • name
    • +
    • configuration
    • +
    +
  • +
+

You can use Jinja2 built-in filters in values of any of the keys listed above. +In addition, Python string methods can be called on the string variables.

+

Hyphens in HPC CSM Software Recipe Variables

+

Variable names with hyphens are not allowed in Jinja2 expressions because they +are parsed as an arithmetic expression instead of a single variable. To support +product names with hyphens, sat bootprep converts hyphens to underscores in +all top-level keys of the default HPC CSM Software Recipe variables. It also +converts any variables sourced from the command line or any variable files +provided directly. When referring to a variable with hyphens in the bootprep +input file, keep this in mind. For example, to refer to the product version +variable for slingshot-host-software in the bootprep input file, write +"{{slingshot_host_software.version}}".

+

HPC CSM Software Recipe Variable Substitution Example

+

The following example bootprep input file shows how a variable of a USS version +can be used in an input file that creates a CFS configuration for computes. +Only one layer is shown for brevity.

+
---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  layers:
+  - name: uss-compute-{{uss.working_branch}}
+    playbook: cos-compute.yml
+    product:
+      name: uss
+      version: "{{uss.version}}"
+      branch: "{{uss.working_branch}}"
+

Note: When the value of a key in the bootprep input file is a Jinja2 +expression, it must be quoted to pass YAML syntax checking.

+

Jinja2 expressions can also use filters and Python’s built-in string methods to +manipulate the variable values. For example, suppose only the major and minor +components of a USS version are to be used in the branch name for the USS +layer of the CFS configuration. Use the split string method to +achieve this as follows:

+
---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  layers:
+  - name: uss-compute-{{uss.working_branch}}
+    playbook: cos-compute.yml
+    product:
+      name: uss
+      version: "{{uss.version}}"
+      branch: integration-{{uss.version.split('.')[0]}}-{{uss.version.split('.')[1]}}
+

Dynamic Variable Substitutions

+

Additional variables are available besides the default variables provided by +the HPC CSM Software Recipe. (For more information, see HPC CSM Software +Recipe Variable Substitutions.) +These additional variables are dynamic because their values are determined +at run-time based on the context in which they appear. Available dynamic +variables include the following:

+
    +
  • +

    The variable base.name can be used in the name of an image under the +images key. The value of this variable is the name of the IMS image or +recipe used as the base of this image.

    +
  • +
  • +

    The variable image.name can be used in the name of a session template +under the session_templates key. The value of this variable is the name of +the IMS image used in this session template.

    +

    Note: The name of a session template is restricted to 45 characters. Keep +this in mind when using image.name in the name of a session template.

    +
  • +
+

These variables reduce the need to duplicate values throughout the sat bootprep input file and make the following use cases possible:

+
    +
  • Building an image from a recipe provided by a product and using the +name of the recipe in the name of the resulting image
  • +
  • Using the name of the image in the name of a session template when +the image is generated as described in the previous use case
  • +
+

Example Bootprep Input Files

+

This section provides an example bootprep input file. It also gives +instructions for obtaining the default bootprep input files delivered +with a release of the HPC CSM Software Recipe.

+

Example Bootprep Input File

+

The following bootprep input file provides an example of using most of the +features described in previous sections. It is not intended to be a complete +bootprep file for the entire CSM product.

+
---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  layers:
+  - name: uss-compute-{{uss.working_branch}}
+    playbook: cos-compute.yml
+    product:
+      name: uss
+      version: "{{uss.version}}"
+      branch: "{{uss.working_branch}}"
+  - name: cpe-pe_deploy-{{cpe.working_branch}}
+    playbook: pe_deploy.yml
+    product:
+      name: cpe
+      version: "{{cpe.version}}"
+      branch: "{{cpe.working_branch}}"
+
+images:
+- name: "{{default.note}}{{base.name}}{{default.suffix}}"
+  ref_name: base_uss_image
+  base:
+    product:
+      name: uss
+      type: recipe
+      version: "{{uss.version}}"
+
+- name: "compute-{{base.name}}"
+  ref_name: compute_image
+  base:
+    image_ref: base_uss_image
+  configuration: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  configuration_group_names:
+  - Compute
+
+session_templates:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  image:
+    image_ref: compute_image
+  configuration: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+  bos_parameters:
+    boot_sets:
+      compute:
+        kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+        node_roles_groups:
+        - Compute
+        rootfs_provider_passthrough: "dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0"
+

Access Default Bootprep Input Files

+

Default bootprep input files are delivered by the HPC CSM Software Recipe +product. Access these files by cloning the hpc-csm-software-recipe +repository, as described in the Accessing sat bootprep files process of +the Cray System Management +Documentation.

+

(ncn-m001#) Find the default input files in the bootprep directory of the +cloned repository:

+
ls bootprep/
+

Generate an Example Bootprep Input File

+

The sat bootprep generate-example command was not updated for +recent bootprep schema changes. It is recommended to instead use the +default bootprep input files described in Access Default Bootprep Input +Files. The sat bootprep generate-example command will be updated in a future release of SAT.

+

Summary of SAT Bootprep Results

+

The sat bootprep run command uses information from the bootprep input file to +create CFS configurations, IMS images, and BOS session templates. For easy +reference, the command also includes output summarizing the final creation +results.

+

(ncn-m001#) Here is a sample table output after running sat bootprep run:

+
################################################################################
+CFS configurations
+################################################################################
++------------------+
+| name             |
++------------------+
+| example-config-1 |
+| example-config-2 |
++------------------+
+################################################################################
+IMS images
+################################################################################
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+| name          | preconfigured_image_id               | final_image_id                       | configuration  | configuration_group_names  |
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+| example-image | c1bcaf00-109d-470f-b665-e7b37dedb62f | a22fb912-22be-449b-a51b-081af2d7aff6 | example-config | Compute                    |
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+################################################################################
+BOS session templates
+################################################################################
++------------------+----------------+
+| name             | configuration  |
++------------------+----------------+
+| example-template | example-config |
++------------------+----------------+
+

View SAT Bootprep Schema

+

The contents of the YAML input files used by sat bootprep must conform to a +schema which defines the structure of the data. The schema definition is written +using the JSON Schema format. (Although the format is named “JSON Schema”, the +schema itself is written in YAML as well.) More information, including introductory +materials and a formal specification of the JSON Schema metaschema, can be found +on the JSON Schema website.

+

View the Exact Schema Specification

+

(ncn-m001#) To view the exact schema specification, run sat bootprep view-schema.

+
sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft/2020-12/schema"
+

Example output:

+
title: Bootprep Input File
+description: >
+  A description of the set of CFS configurations to create, the set of IMS
+  images to create and optionally customize with the defined CFS configurations,
+  and the set of BOS session templates to create that reference the defined
+  images and configurations.
+type: object
+additionalProperties: false
+properties:
+  ...
+

Generate User-Friendly Documentation

+

The raw schema definition can be difficult to understand without experience +working with JSON Schema specifications. For this reason, a feature is included +with sat bootprep that generates user-friendly HTML documentation for the input +file schema. This HTML documentation can be browsed with a web browser.

+
    +
  1. +

    (ncn-m001#) Create a documentation tarball using sat bootprep.

    +
    sat bootprep generate-docs
    +

    Example output:

    +
    INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
    +

    An alternate output directory can be specified with the --output-dir +option. The generated tarball is always named bootprep-schema-docs.tar.gz.

    +
    sat bootprep generate-docs --output-dir /tmp
    +

    Example output:

    +
    INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
    +
  2. +
  3. +

    (user@hostname>) From another machine, copy the tarball to a local directory.

    +
    scp root@ncn-m001:bootprep-schema-docs.tar.gz .
    +
  4. +
  5. +

    (user@hostname>) Extract the contents of the tarball and open the contained index.html.

    +
    tar xzvf bootprep-schema-docs.tar.gz
    +

    Example output:

    +
    x bootprep-schema-docs/
    +x bootprep-schema-docs/index.html
    +x bootprep-schema-docs/schema_doc.css
    +x bootprep-schema-docs/schema_doc.min.js
    +another-machine$ open bootprep-schema-docs/index.html
    +
  6. +
+ + +
+ +
+ + +
+ + +
+ + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + diff --git a/fonts/Inconsolata.eot b/fonts/Inconsolata.eot new file mode 100644 index 0000000000..0a705d653f Binary files /dev/null and b/fonts/Inconsolata.eot differ diff --git a/fonts/Inconsolata.svg b/fonts/Inconsolata.svg new file mode 100644 index 0000000000..36775f0749 --- /dev/null +++ b/fonts/Inconsolata.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fonts/Inconsolata.ttf b/fonts/Inconsolata.ttf new file mode 100644 index 0000000000..4b8a36d249 Binary files /dev/null and b/fonts/Inconsolata.ttf differ diff --git a/fonts/Inconsolata.woff b/fonts/Inconsolata.woff new file mode 100644 index 0000000000..6f39625e58 Binary files /dev/null and b/fonts/Inconsolata.woff differ diff --git a/fonts/Novecentosanswide-Normal-webfont.eot b/fonts/Novecentosanswide-Normal-webfont.eot new file mode 100644 index 0000000000..9984682fc9 Binary files /dev/null and b/fonts/Novecentosanswide-Normal-webfont.eot differ diff --git a/fonts/Novecentosanswide-Normal-webfont.svg b/fonts/Novecentosanswide-Normal-webfont.svg new file mode 100644 index 0000000000..6fa1a66e30 --- /dev/null +++ b/fonts/Novecentosanswide-Normal-webfont.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fonts/Novecentosanswide-Normal-webfont.ttf b/fonts/Novecentosanswide-Normal-webfont.ttf new file mode 100644 index 0000000000..8cfb62dd59 Binary files /dev/null and b/fonts/Novecentosanswide-Normal-webfont.ttf differ diff --git a/fonts/Novecentosanswide-Normal-webfont.woff b/fonts/Novecentosanswide-Normal-webfont.woff new file mode 100644 index 0000000000..d5c4290791 Binary files /dev/null and b/fonts/Novecentosanswide-Normal-webfont.woff differ diff --git a/fonts/Novecentosanswide-Normal-webfont.woff2 b/fonts/Novecentosanswide-Normal-webfont.woff2 new file mode 100644 index 0000000000..eefb4a3186 Binary files /dev/null and b/fonts/Novecentosanswide-Normal-webfont.woff2 differ diff --git a/fonts/Novecentosanswide-UltraLight-webfont.eot b/fonts/Novecentosanswide-UltraLight-webfont.eot new file mode 100644 index 0000000000..2a26561f90 Binary files /dev/null and b/fonts/Novecentosanswide-UltraLight-webfont.eot differ diff --git a/fonts/Novecentosanswide-UltraLight-webfont.svg b/fonts/Novecentosanswide-UltraLight-webfont.svg new file mode 100644 index 0000000000..c4e903b61a --- /dev/null +++ b/fonts/Novecentosanswide-UltraLight-webfont.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fonts/Novecentosanswide-UltraLight-webfont.ttf b/fonts/Novecentosanswide-UltraLight-webfont.ttf new file mode 100644 index 0000000000..9ce9c7f99d Binary files /dev/null and b/fonts/Novecentosanswide-UltraLight-webfont.ttf differ diff --git a/fonts/Novecentosanswide-UltraLight-webfont.woff b/fonts/Novecentosanswide-UltraLight-webfont.woff new file mode 100644 index 0000000000..381650c98d Binary files /dev/null and b/fonts/Novecentosanswide-UltraLight-webfont.woff differ diff --git a/fonts/Novecentosanswide-UltraLight-webfont.woff2 b/fonts/Novecentosanswide-UltraLight-webfont.woff2 new file mode 100644 index 0000000000..7e659549bc Binary files /dev/null and b/fonts/Novecentosanswide-UltraLight-webfont.woff2 differ diff --git a/fonts/Work_Sans_200.eot b/fonts/Work_Sans_200.eot new file mode 100644 index 0000000000..4052e4f94a Binary files /dev/null and b/fonts/Work_Sans_200.eot differ diff --git a/fonts/Work_Sans_200.svg b/fonts/Work_Sans_200.svg new file mode 100644 index 0000000000..0ffbd3a845 --- /dev/null +++ b/fonts/Work_Sans_200.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fonts/Work_Sans_200.ttf b/fonts/Work_Sans_200.ttf new file mode 100644 index 0000000000..68019e1ccd Binary files /dev/null and b/fonts/Work_Sans_200.ttf differ diff --git a/fonts/Work_Sans_200.woff b/fonts/Work_Sans_200.woff new file mode 100644 index 0000000000..a1bd9e4699 Binary files /dev/null and b/fonts/Work_Sans_200.woff differ diff --git a/fonts/Work_Sans_200.woff2 b/fonts/Work_Sans_200.woff2 new file mode 100644 index 0000000000..20c68a75c4 Binary files /dev/null and b/fonts/Work_Sans_200.woff2 differ diff --git a/fonts/Work_Sans_300.eot b/fonts/Work_Sans_300.eot new file mode 100644 index 0000000000..ace799382a Binary files /dev/null and b/fonts/Work_Sans_300.eot differ diff --git a/fonts/Work_Sans_300.svg b/fonts/Work_Sans_300.svg new file mode 100644 index 0000000000..7d2936783b --- /dev/null +++ b/fonts/Work_Sans_300.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fonts/Work_Sans_300.ttf b/fonts/Work_Sans_300.ttf new file mode 100644 index 0000000000..35387c2357 Binary files /dev/null and b/fonts/Work_Sans_300.ttf differ diff --git a/fonts/Work_Sans_300.woff b/fonts/Work_Sans_300.woff new file mode 100644 index 0000000000..8d789eae97 Binary files /dev/null and b/fonts/Work_Sans_300.woff differ diff --git a/fonts/Work_Sans_300.woff2 b/fonts/Work_Sans_300.woff2 new file mode 100644 index 0000000000..f6e216d64d Binary files /dev/null and b/fonts/Work_Sans_300.woff2 differ diff --git a/fonts/Work_Sans_500.eot b/fonts/Work_Sans_500.eot new file mode 100644 index 0000000000..9df6929428 Binary files /dev/null and b/fonts/Work_Sans_500.eot differ diff --git a/fonts/Work_Sans_500.svg b/fonts/Work_Sans_500.svg new file mode 100644 index 0000000000..90a91c14cc --- /dev/null +++ b/fonts/Work_Sans_500.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fonts/Work_Sans_500.ttf b/fonts/Work_Sans_500.ttf new file mode 100644 index 0000000000..5b8cc5342b Binary files /dev/null and b/fonts/Work_Sans_500.ttf differ diff --git a/fonts/Work_Sans_500.woff b/fonts/Work_Sans_500.woff new file mode 100644 index 0000000000..df058514fb Binary files /dev/null and b/fonts/Work_Sans_500.woff differ diff --git a/fonts/Work_Sans_500.woff2 b/fonts/Work_Sans_500.woff2 new file mode 100644 index 0000000000..b06c54df0b Binary files /dev/null and b/fonts/Work_Sans_500.woff2 differ diff --git a/images/clippy.svg b/images/clippy.svg new file mode 100644 index 0000000000..f4551735e1 --- /dev/null +++ b/images/clippy.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/images/favicon.png b/images/favicon.png new file mode 100644 index 0000000000..df06e35d62 Binary files /dev/null and b/images/favicon.png differ diff --git a/images/gopher-404.jpg b/images/gopher-404.jpg new file mode 100644 index 0000000000..2a5054389c Binary files /dev/null and b/images/gopher-404.jpg differ diff --git a/index.html b/index.html new file mode 100644 index 0000000000..a4742b612d --- /dev/null +++ b/index.html @@ -0,0 +1,10 @@ + + + + /docs-sat/en-26/ + + + + + + diff --git a/js/auto-complete.js b/js/auto-complete.js new file mode 100644 index 0000000000..0b46054568 --- /dev/null +++ b/js/auto-complete.js @@ -0,0 +1,3 @@ +// JavaScript autoComplete v1.0.4 +// https://github.com/Pixabay/JavaScript-autoComplete +var autoComplete=function(){function e(e){function t(e,t){return e.classList?e.classList.contains(t):new RegExp("\\b"+t+"\\b").test(e.className)}function o(e,t,o){e.attachEvent?e.attachEvent("on"+t,o):e.addEventListener(t,o)}function s(e,t,o){e.detachEvent?e.detachEvent("on"+t,o):e.removeEventListener(t,o)}function n(e,s,n,l){o(l||document,s,function(o){for(var s,l=o.target||o.srcElement;l&&!(s=t(l,e));)l=l.parentElement;s&&n.call(l,o)})}if(document.querySelector){var l={selector:0,source:0,minChars:3,delay:150,offsetLeft:0,offsetTop:1,cache:1,menuClass:"",renderItem:function(e,t){t=t.replace(/[-\/\\^$*+?.()|[\]{}]/g,"\\$&");var o=new RegExp("("+t.split(" ").join("|")+")","gi");return'
'+e.replace(o,"$1")+"
"},onSelect:function(){}};for(var c in e)e.hasOwnProperty(c)&&(l[c]=e[c]);for(var a="object"==typeof l.selector?[l.selector]:document.querySelectorAll(l.selector),u=0;u0?i.sc.scrollTop=n+i.sc.suggestionHeight+s-i.sc.maxHeight:0>n&&(i.sc.scrollTop=n+s)}else i.sc.scrollTop=0},o(window,"resize",i.updateSC),document.body.appendChild(i.sc),n("autocomplete-suggestion","mouseleave",function(){var e=i.sc.querySelector(".autocomplete-suggestion.selected");e&&setTimeout(function(){e.className=e.className.replace("selected","")},20)},i.sc),n("autocomplete-suggestion","mouseover",function(){var e=i.sc.querySelector(".autocomplete-suggestion.selected");e&&(e.className=e.className.replace("selected","")),this.className+=" selected"},i.sc),n("autocomplete-suggestion","mousedown",function(e){if(t(this,"autocomplete-suggestion")){var o=this.getAttribute("data-val");i.value=o,l.onSelect(e,o,this),i.sc.style.display="none"}},i.sc),i.blurHandler=function(){try{var e=document.querySelector(".autocomplete-suggestions:hover")}catch(t){var e=0}e?i!==document.activeElement&&setTimeout(function(){i.focus()},20):(i.last_val=i.value,i.sc.style.display="none",setTimeout(function(){i.sc.style.display="none"},350))},o(i,"blur",i.blurHandler);var r=function(e){var t=i.value;if(i.cache[t]=e,e.length&&t.length>=l.minChars){for(var o="",s=0;st||t>40)&&13!=t&&27!=t){var o=i.value;if(o.length>=l.minChars){if(o!=i.last_val){if(i.last_val=o,clearTimeout(i.timer),l.cache){if(o in i.cache)return void r(i.cache[o]);for(var s=1;s https://github.com/noelboss/featherlight/issues/317 +!function(u){"use strict";if(void 0!==u)if(u.fn.jquery.match(/-ajax/))"console"in window&&window.console.info("Featherlight needs regular jQuery, not the slim version.");else{var r=[],i=function(t){return r=u.grep(r,function(e){return e!==t&&0','
','",'
'+n.loading+"
","
",""].join("")),o="."+n.namespace+"-close"+(n.otherClose?","+n.otherClose:"");return n.$instance=i.clone().addClass(n.variant),n.$instance.on(n.closeTrigger+"."+n.namespace,function(e){if(!e.isDefaultPrevented()){var t=u(e.target);("background"===n.closeOnClick&&t.is("."+n.namespace)||"anywhere"===n.closeOnClick||t.closest(o).length)&&(n.close(e),e.preventDefault())}}),this},getContent:function(){if(!1!==this.persist&&this.$content)return this.$content;var t=this,e=this.constructor.contentFilters,n=function(e){return t.$currentTarget&&t.$currentTarget.attr(e)},r=n(t.targetAttr),i=t.target||r||"",o=e[t.type];if(!o&&i in e&&(o=e[i],i=t.target&&r),i=i||n("href")||"",!o)for(var a in e)t[a]&&(o=e[a],i=t[a]);if(!o){var s=i;if(i=null,u.each(t.contentFilters,function(){return(o=e[this]).test&&(i=o.test(s)),!i&&o.regex&&s.match&&s.match(o.regex)&&(i=s),!i}),!i)return"console"in window&&window.console.error("Featherlight: no content filter found "+(s?' for "'+s+'"':" (no target specified)")),!1}return o.process.call(t,i)},setContent:function(e){return this.$instance.removeClass(this.namespace+"-loading"),this.$instance.toggleClass(this.namespace+"-iframe",e.is("iframe")),this.$instance.find("."+this.namespace+"-inner").not(e).slice(1).remove().end().replaceWith(u.contains(this.$instance[0],e[0])?"":e),this.$content=e.addClass(this.namespace+"-inner"),this},open:function(t){var n=this;if(n.$instance.hide().appendTo(n.root),!(t&&t.isDefaultPrevented()||!1===n.beforeOpen(t))){t&&t.preventDefault();var e=n.getContent();if(e)return r.push(n),s(!0),n.$instance.fadeIn(n.openSpeed),n.beforeContent(t),u.when(e).always(function(e){n.setContent(e),n.afterContent(t)}).then(n.$instance.promise()).done(function(){n.afterOpen(t)})}return n.$instance.detach(),u.Deferred().reject().promise()},close:function(e){var t=this,n=u.Deferred();return!1===t.beforeClose(e)?n.reject():(0===i(t).length&&s(!1),t.$instance.fadeOut(t.closeSpeed,function(){t.$instance.detach(),t.afterClose(e),n.resolve()})),n.promise()},resize:function(e,t){if(e&&t&&(this.$content.css("width","").css("height",""),this.$content.parent().width()');return n.onload=function(){r.naturalWidth=n.width,r.naturalHeight=n.height,t.resolve(r)},n.onerror=function(){t.reject(r)},n.src=e,t.promise()}},html:{regex:/^\s*<[\w!][^<]*>/,process:function(e){return u(e)}},ajax:{regex:/./,process:function(e){var n=u.Deferred(),r=u("
").load(e,function(e,t){"error"!==t&&n.resolve(r.contents()),n.fail()});return n.promise()}},iframe:{process:function(e){var t=new u.Deferred,n=u("