diff --git a/css/atom-one-dark-reasonable.css b/css/atom-one-dark-reasonable.css new file mode 100644 index 0000000000..fd41c996a3 --- /dev/null +++ b/css/atom-one-dark-reasonable.css @@ -0,0 +1,77 @@ +/* + +Atom One Dark With support for ReasonML by Gidi Morris, based off work by Daniel Gamage + +Original One Dark Syntax theme from https://github.com/atom/one-dark-syntax + +*/ +.hljs { + display: block; + overflow-x: auto; + padding: 0.5em; + line-height: 1.3em; + color: #abb2bf; + background: #282c34; + border-radius: 5px; +} +.hljs-keyword, .hljs-operator { + color: #F92672; +} +.hljs-pattern-match { + color: #F92672; +} +.hljs-pattern-match .hljs-constructor { + color: #61aeee; +} +.hljs-function { + color: #61aeee; +} +.hljs-function .hljs-params { + color: #A6E22E; +} +.hljs-function .hljs-params .hljs-typing { + color: #FD971F; +} +.hljs-module-access .hljs-module { + color: #7e57c2; +} +.hljs-constructor { + color: #e2b93d; +} +.hljs-constructor .hljs-string { + color: #9CCC65; +} +.hljs-comment, .hljs-quote { + color: #b18eb1; + font-style: italic; +} +.hljs-doctag, .hljs-formula { + color: #c678dd; +} +.hljs-section, .hljs-name, .hljs-selector-tag, .hljs-deletion, .hljs-subst { + color: #e06c75; +} +.hljs-literal { + color: #56b6c2; +} +.hljs-string, .hljs-regexp, .hljs-addition, .hljs-attribute, .hljs-meta-string { + color: #98c379; +} +.hljs-built_in, .hljs-class .hljs-title { + color: #e6c07b; +} +.hljs-attr, .hljs-variable, .hljs-template-variable, .hljs-type, .hljs-selector-class, .hljs-selector-attr, .hljs-selector-pseudo, .hljs-number { + color: #d19a66; +} +.hljs-symbol, .hljs-bullet, .hljs-link, .hljs-meta, .hljs-selector-id, .hljs-title { + color: #61aeee; +} +.hljs-emphasis { + font-style: italic; +} +.hljs-strong { + font-weight: bold; +} +.hljs-link { + text-decoration: underline; +} diff --git a/css/auto-complete.css b/css/auto-complete.css new file mode 100644 index 0000000000..1557ef6ae0 --- /dev/null +++ b/css/auto-complete.css @@ -0,0 +1,49 @@ +.autocomplete-suggestions { + text-align: left; + cursor: default; + border: 1px solid #ccc; + border-top: 0; + background: #fff; + box-shadow: -1px 1px 3px rgba(0,0,0,.8); + + /* core styles should not be changed */ + position: absolute; + display: none; + z-index: 9999; + max-height: 80%; + width: 33% !important; + min-width: 266px; + overflow: hidden; + overflow-y: auto; + box-sizing: border-box; + +} +.autocomplete-suggestion { + position: relative; + cursor: pointer; + padding: 7px; + line-height: 23px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + color: #333; +} + +.autocomplete-suggestion b { + font-weight: normal; + color: #1f8dd6; +} + +.autocomplete-suggestion.selected { + background: #333; + color: #fff; +} + +.autocomplete-suggestion:hover { + background: #444; + color: #fff; +} + +.autocomplete-suggestion > .context { + font-size: 12px; +} diff --git a/css/featherlight.min.css b/css/featherlight.min.css new file mode 100644 index 0000000000..058487f916 --- /dev/null +++ b/css/featherlight.min.css @@ -0,0 +1,8 @@ +/** + * Featherlight - ultra slim jQuery lightbox + * Version 1.7.13 - http://noelboss.github.io/featherlight/ + * + * Copyright 2018, Noël Raoul Bossart (http://www.noelboss.com) + * MIT Licensed. +**/ +html.with-featherlight{overflow:hidden}.featherlight{display:none;position:fixed;top:0;right:0;bottom:0;left:0;z-index:2147483647;text-align:center;white-space:nowrap;cursor:pointer;background:#333;background:rgba(0,0,0,0)}.featherlight:last-of-type{background:rgba(0,0,0,.8)}.featherlight:before{content:'';display:inline-block;height:100%;vertical-align:middle}.featherlight .featherlight-content{position:relative;text-align:left;vertical-align:middle;display:inline-block;overflow:auto;padding:25px 25px 0;border-bottom:25px solid transparent;margin-left:5%;margin-right:5%;max-height:95%;background:#fff;cursor:auto;white-space:normal}.featherlight .featherlight-inner{display:block}.featherlight link.featherlight-inner,.featherlight script.featherlight-inner,.featherlight style.featherlight-inner{display:none}.featherlight .featherlight-close-icon{position:absolute;z-index:9999;top:0;right:0;line-height:25px;width:25px;cursor:pointer;text-align:center;font-family:Arial,sans-serif;background:#fff;background:rgba(255,255,255,.3);color:#000;border:0;padding:0}.featherlight .featherlight-close-icon::-moz-focus-inner{border:0;padding:0}.featherlight .featherlight-image{width:100%}.featherlight-iframe .featherlight-content{border-bottom:0;padding:0;-webkit-overflow-scrolling:touch}.featherlight iframe{border:0}.featherlight *{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}@media only screen and (max-width:1024px){.featherlight .featherlight-content{margin-left:0;margin-right:0;max-height:98%;padding:10px 10px 0;border-bottom:10px solid transparent}}@media print{html.with-featherlight>*>:not(.featherlight){display:none}} \ No newline at end of file diff --git a/css/fontawesome-all.min.css b/css/fontawesome-all.min.css new file mode 100644 index 0000000000..de56473722 --- /dev/null +++ b/css/fontawesome-all.min.css @@ -0,0 +1 @@ +.fa,.fab,.fal,.far,.fas{-moz-osx-font-smoothing:grayscale;-webkit-font-smoothing:antialiased;display:inline-block;font-style:normal;font-variant:normal;text-rendering:auto;line-height:1}.fa-lg{font-size:1.33333em;line-height:.75em;vertical-align:-.0667em}.fa-xs{font-size:.75em}.fa-sm{font-size:.875em}.fa-1x{font-size:1em}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-6x{font-size:6em}.fa-7x{font-size:7em}.fa-8x{font-size:8em}.fa-9x{font-size:9em}.fa-10x{font-size:10em}.fa-fw{text-align:center;width:1.25em}.fa-ul{list-style-type:none;margin-left:2.5em;padding-left:0}.fa-ul>li{position:relative}.fa-li{left:-2em;position:absolute;text-align:center;width:2em;line-height:inherit}.fa-border{border:.08em solid #eee;border-radius:.1em;padding:.2em .25em .15em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left,.fab.fa-pull-left,.fal.fa-pull-left,.far.fa-pull-left,.fas.fa-pull-left{margin-right:.3em}.fa.fa-pull-right,.fab.fa-pull-right,.fal.fa-pull-right,.far.fa-pull-right,.fas.fa-pull-right{margin-left:.3em}.fa-spin{animation:fa-spin 2s infinite linear}.fa-pulse{animation:fa-spin 1s infinite steps(8)}@keyframes fa-spin{0%{transform:rotate(0deg)}to{transform:rotate(1turn)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";transform:scaleX(-1)}.fa-flip-vertical{transform:scaleY(-1)}.fa-flip-horizontal.fa-flip-vertical,.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)"}.fa-flip-horizontal.fa-flip-vertical{transform:scale(-1)}:root .fa-flip-horizontal,:root .fa-flip-vertical,:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270{filter:none}.fa-stack{display:inline-block;height:2em;line-height:2em;position:relative;vertical-align:middle;width:2.5em}.fa-stack-1x,.fa-stack-2x{left:0;position:absolute;text-align:center;width:100%}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-500px:before{content:"\f26e"}.fa-accessible-icon:before{content:"\f368"}.fa-accusoft:before{content:"\f369"}.fa-acquisitions-incorporated:before{content:"\f6af"}.fa-ad:before{content:"\f641"}.fa-address-book:before{content:"\f2b9"}.fa-address-card:before{content:"\f2bb"}.fa-adjust:before{content:"\f042"}.fa-adn:before{content:"\f170"}.fa-adobe:before{content:"\f778"}.fa-adversal:before{content:"\f36a"}.fa-affiliatetheme:before{content:"\f36b"}.fa-air-freshener:before{content:"\f5d0"}.fa-algolia:before{content:"\f36c"}.fa-align-center:before{content:"\f037"}.fa-align-justify:before{content:"\f039"}.fa-align-left:before{content:"\f036"}.fa-align-right:before{content:"\f038"}.fa-alipay:before{content:"\f642"}.fa-allergies:before{content:"\f461"}.fa-amazon:before{content:"\f270"}.fa-amazon-pay:before{content:"\f42c"}.fa-ambulance:before{content:"\f0f9"}.fa-american-sign-language-interpreting:before{content:"\f2a3"}.fa-amilia:before{content:"\f36d"}.fa-anchor:before{content:"\f13d"}.fa-android:before{content:"\f17b"}.fa-angellist:before{content:"\f209"}.fa-angle-double-down:before{content:"\f103"}.fa-angle-double-left:before{content:"\f100"}.fa-angle-double-right:before{content:"\f101"}.fa-angle-double-up:before{content:"\f102"}.fa-angle-down:before{content:"\f107"}.fa-angle-left:before{content:"\f104"}.fa-angle-right:before{content:"\f105"}.fa-angle-up:before{content:"\f106"}.fa-angry:before{content:"\f556"}.fa-angrycreative:before{content:"\f36e"}.fa-angular:before{content:"\f420"}.fa-ankh:before{content:"\f644"}.fa-app-store:before{content:"\f36f"}.fa-app-store-ios:before{content:"\f370"}.fa-apper:before{content:"\f371"}.fa-apple:before{content:"\f179"}.fa-apple-alt:before{content:"\f5d1"}.fa-apple-pay:before{content:"\f415"}.fa-archive:before{content:"\f187"}.fa-archway:before{content:"\f557"}.fa-arrow-alt-circle-down:before{content:"\f358"}.fa-arrow-alt-circle-left:before{content:"\f359"}.fa-arrow-alt-circle-right:before{content:"\f35a"}.fa-arrow-alt-circle-up:before{content:"\f35b"}.fa-arrow-circle-down:before{content:"\f0ab"}.fa-arrow-circle-left:before{content:"\f0a8"}.fa-arrow-circle-right:before{content:"\f0a9"}.fa-arrow-circle-up:before{content:"\f0aa"}.fa-arrow-down:before{content:"\f063"}.fa-arrow-left:before{content:"\f060"}.fa-arrow-right:before{content:"\f061"}.fa-arrow-up:before{content:"\f062"}.fa-arrows-alt:before{content:"\f0b2"}.fa-arrows-alt-h:before{content:"\f337"}.fa-arrows-alt-v:before{content:"\f338"}.fa-artstation:before{content:"\f77a"}.fa-assistive-listening-systems:before{content:"\f2a2"}.fa-asterisk:before{content:"\f069"}.fa-asymmetrik:before{content:"\f372"}.fa-at:before{content:"\f1fa"}.fa-atlas:before{content:"\f558"}.fa-atlassian:before{content:"\f77b"}.fa-atom:before{content:"\f5d2"}.fa-audible:before{content:"\f373"}.fa-audio-description:before{content:"\f29e"}.fa-autoprefixer:before{content:"\f41c"}.fa-avianex:before{content:"\f374"}.fa-aviato:before{content:"\f421"}.fa-award:before{content:"\f559"}.fa-aws:before{content:"\f375"}.fa-baby:before{content:"\f77c"}.fa-baby-carriage:before{content:"\f77d"}.fa-backspace:before{content:"\f55a"}.fa-backward:before{content:"\f04a"}.fa-balance-scale:before{content:"\f24e"}.fa-ban:before{content:"\f05e"}.fa-band-aid:before{content:"\f462"}.fa-bandcamp:before{content:"\f2d5"}.fa-barcode:before{content:"\f02a"}.fa-bars:before{content:"\f0c9"}.fa-baseball-ball:before{content:"\f433"}.fa-basketball-ball:before{content:"\f434"}.fa-bath:before{content:"\f2cd"}.fa-battery-empty:before{content:"\f244"}.fa-battery-full:before{content:"\f240"}.fa-battery-half:before{content:"\f242"}.fa-battery-quarter:before{content:"\f243"}.fa-battery-three-quarters:before{content:"\f241"}.fa-bed:before{content:"\f236"}.fa-beer:before{content:"\f0fc"}.fa-behance:before{content:"\f1b4"}.fa-behance-square:before{content:"\f1b5"}.fa-bell:before{content:"\f0f3"}.fa-bell-slash:before{content:"\f1f6"}.fa-bezier-curve:before{content:"\f55b"}.fa-bible:before{content:"\f647"}.fa-bicycle:before{content:"\f206"}.fa-bimobject:before{content:"\f378"}.fa-binoculars:before{content:"\f1e5"}.fa-biohazard:before{content:"\f780"}.fa-birthday-cake:before{content:"\f1fd"}.fa-bitbucket:before{content:"\f171"}.fa-bitcoin:before{content:"\f379"}.fa-bity:before{content:"\f37a"}.fa-black-tie:before{content:"\f27e"}.fa-blackberry:before{content:"\f37b"}.fa-blender:before{content:"\f517"}.fa-blender-phone:before{content:"\f6b6"}.fa-blind:before{content:"\f29d"}.fa-blog:before{content:"\f781"}.fa-blogger:before{content:"\f37c"}.fa-blogger-b:before{content:"\f37d"}.fa-bluetooth:before{content:"\f293"}.fa-bluetooth-b:before{content:"\f294"}.fa-bold:before{content:"\f032"}.fa-bolt:before{content:"\f0e7"}.fa-bomb:before{content:"\f1e2"}.fa-bone:before{content:"\f5d7"}.fa-bong:before{content:"\f55c"}.fa-book:before{content:"\f02d"}.fa-book-dead:before{content:"\f6b7"}.fa-book-open:before{content:"\f518"}.fa-book-reader:before{content:"\f5da"}.fa-bookmark:before{content:"\f02e"}.fa-bowling-ball:before{content:"\f436"}.fa-box:before{content:"\f466"}.fa-box-open:before{content:"\f49e"}.fa-boxes:before{content:"\f468"}.fa-braille:before{content:"\f2a1"}.fa-brain:before{content:"\f5dc"}.fa-briefcase:before{content:"\f0b1"}.fa-briefcase-medical:before{content:"\f469"}.fa-broadcast-tower:before{content:"\f519"}.fa-broom:before{content:"\f51a"}.fa-brush:before{content:"\f55d"}.fa-btc:before{content:"\f15a"}.fa-bug:before{content:"\f188"}.fa-building:before{content:"\f1ad"}.fa-bullhorn:before{content:"\f0a1"}.fa-bullseye:before{content:"\f140"}.fa-burn:before{content:"\f46a"}.fa-buromobelexperte:before{content:"\f37f"}.fa-bus:before{content:"\f207"}.fa-bus-alt:before{content:"\f55e"}.fa-business-time:before{content:"\f64a"}.fa-buysellads:before{content:"\f20d"}.fa-calculator:before{content:"\f1ec"}.fa-calendar:before{content:"\f133"}.fa-calendar-alt:before{content:"\f073"}.fa-calendar-check:before{content:"\f274"}.fa-calendar-day:before{content:"\f783"}.fa-calendar-minus:before{content:"\f272"}.fa-calendar-plus:before{content:"\f271"}.fa-calendar-times:before{content:"\f273"}.fa-calendar-week:before{content:"\f784"}.fa-camera:before{content:"\f030"}.fa-camera-retro:before{content:"\f083"}.fa-campground:before{content:"\f6bb"}.fa-canadian-maple-leaf:before{content:"\f785"}.fa-candy-cane:before{content:"\f786"}.fa-cannabis:before{content:"\f55f"}.fa-capsules:before{content:"\f46b"}.fa-car:before{content:"\f1b9"}.fa-car-alt:before{content:"\f5de"}.fa-car-battery:before{content:"\f5df"}.fa-car-crash:before{content:"\f5e1"}.fa-car-side:before{content:"\f5e4"}.fa-caret-down:before{content:"\f0d7"}.fa-caret-left:before{content:"\f0d9"}.fa-caret-right:before{content:"\f0da"}.fa-caret-square-down:before{content:"\f150"}.fa-caret-square-left:before{content:"\f191"}.fa-caret-square-right:before{content:"\f152"}.fa-caret-square-up:before{content:"\f151"}.fa-caret-up:before{content:"\f0d8"}.fa-carrot:before{content:"\f787"}.fa-cart-arrow-down:before{content:"\f218"}.fa-cart-plus:before{content:"\f217"}.fa-cash-register:before{content:"\f788"}.fa-cat:before{content:"\f6be"}.fa-cc-amazon-pay:before{content:"\f42d"}.fa-cc-amex:before{content:"\f1f3"}.fa-cc-apple-pay:before{content:"\f416"}.fa-cc-diners-club:before{content:"\f24c"}.fa-cc-discover:before{content:"\f1f2"}.fa-cc-jcb:before{content:"\f24b"}.fa-cc-mastercard:before{content:"\f1f1"}.fa-cc-paypal:before{content:"\f1f4"}.fa-cc-stripe:before{content:"\f1f5"}.fa-cc-visa:before{content:"\f1f0"}.fa-centercode:before{content:"\f380"}.fa-centos:before{content:"\f789"}.fa-certificate:before{content:"\f0a3"}.fa-chair:before{content:"\f6c0"}.fa-chalkboard:before{content:"\f51b"}.fa-chalkboard-teacher:before{content:"\f51c"}.fa-charging-station:before{content:"\f5e7"}.fa-chart-area:before{content:"\f1fe"}.fa-chart-bar:before{content:"\f080"}.fa-chart-line:before{content:"\f201"}.fa-chart-pie:before{content:"\f200"}.fa-check:before{content:"\f00c"}.fa-check-circle:before{content:"\f058"}.fa-check-double:before{content:"\f560"}.fa-check-square:before{content:"\f14a"}.fa-chess:before{content:"\f439"}.fa-chess-bishop:before{content:"\f43a"}.fa-chess-board:before{content:"\f43c"}.fa-chess-king:before{content:"\f43f"}.fa-chess-knight:before{content:"\f441"}.fa-chess-pawn:before{content:"\f443"}.fa-chess-queen:before{content:"\f445"}.fa-chess-rook:before{content:"\f447"}.fa-chevron-circle-down:before{content:"\f13a"}.fa-chevron-circle-left:before{content:"\f137"}.fa-chevron-circle-right:before{content:"\f138"}.fa-chevron-circle-up:before{content:"\f139"}.fa-chevron-down:before{content:"\f078"}.fa-chevron-left:before{content:"\f053"}.fa-chevron-right:before{content:"\f054"}.fa-chevron-up:before{content:"\f077"}.fa-child:before{content:"\f1ae"}.fa-chrome:before{content:"\f268"}.fa-church:before{content:"\f51d"}.fa-circle:before{content:"\f111"}.fa-circle-notch:before{content:"\f1ce"}.fa-city:before{content:"\f64f"}.fa-clipboard:before{content:"\f328"}.fa-clipboard-check:before{content:"\f46c"}.fa-clipboard-list:before{content:"\f46d"}.fa-clock:before{content:"\f017"}.fa-clone:before{content:"\f24d"}.fa-closed-captioning:before{content:"\f20a"}.fa-cloud:before{content:"\f0c2"}.fa-cloud-download-alt:before{content:"\f381"}.fa-cloud-meatball:before{content:"\f73b"}.fa-cloud-moon:before{content:"\f6c3"}.fa-cloud-moon-rain:before{content:"\f73c"}.fa-cloud-rain:before{content:"\f73d"}.fa-cloud-showers-heavy:before{content:"\f740"}.fa-cloud-sun:before{content:"\f6c4"}.fa-cloud-sun-rain:before{content:"\f743"}.fa-cloud-upload-alt:before{content:"\f382"}.fa-cloudscale:before{content:"\f383"}.fa-cloudsmith:before{content:"\f384"}.fa-cloudversify:before{content:"\f385"}.fa-cocktail:before{content:"\f561"}.fa-code:before{content:"\f121"}.fa-code-branch:before{content:"\f126"}.fa-codepen:before{content:"\f1cb"}.fa-codiepie:before{content:"\f284"}.fa-coffee:before{content:"\f0f4"}.fa-cog:before{content:"\f013"}.fa-cogs:before{content:"\f085"}.fa-coins:before{content:"\f51e"}.fa-columns:before{content:"\f0db"}.fa-comment:before{content:"\f075"}.fa-comment-alt:before{content:"\f27a"}.fa-comment-dollar:before{content:"\f651"}.fa-comment-dots:before{content:"\f4ad"}.fa-comment-slash:before{content:"\f4b3"}.fa-comments:before{content:"\f086"}.fa-comments-dollar:before{content:"\f653"}.fa-compact-disc:before{content:"\f51f"}.fa-compass:before{content:"\f14e"}.fa-compress:before{content:"\f066"}.fa-compress-arrows-alt:before{content:"\f78c"}.fa-concierge-bell:before{content:"\f562"}.fa-confluence:before{content:"\f78d"}.fa-connectdevelop:before{content:"\f20e"}.fa-contao:before{content:"\f26d"}.fa-cookie:before{content:"\f563"}.fa-cookie-bite:before{content:"\f564"}.fa-copy:before{content:"\f0c5"}.fa-copyright:before{content:"\f1f9"}.fa-couch:before{content:"\f4b8"}.fa-cpanel:before{content:"\f388"}.fa-creative-commons:before{content:"\f25e"}.fa-creative-commons-by:before{content:"\f4e7"}.fa-creative-commons-nc:before{content:"\f4e8"}.fa-creative-commons-nc-eu:before{content:"\f4e9"}.fa-creative-commons-nc-jp:before{content:"\f4ea"}.fa-creative-commons-nd:before{content:"\f4eb"}.fa-creative-commons-pd:before{content:"\f4ec"}.fa-creative-commons-pd-alt:before{content:"\f4ed"}.fa-creative-commons-remix:before{content:"\f4ee"}.fa-creative-commons-sa:before{content:"\f4ef"}.fa-creative-commons-sampling:before{content:"\f4f0"}.fa-creative-commons-sampling-plus:before{content:"\f4f1"}.fa-creative-commons-share:before{content:"\f4f2"}.fa-creative-commons-zero:before{content:"\f4f3"}.fa-credit-card:before{content:"\f09d"}.fa-critical-role:before{content:"\f6c9"}.fa-crop:before{content:"\f125"}.fa-crop-alt:before{content:"\f565"}.fa-cross:before{content:"\f654"}.fa-crosshairs:before{content:"\f05b"}.fa-crow:before{content:"\f520"}.fa-crown:before{content:"\f521"}.fa-css3:before{content:"\f13c"}.fa-css3-alt:before{content:"\f38b"}.fa-cube:before{content:"\f1b2"}.fa-cubes:before{content:"\f1b3"}.fa-cut:before{content:"\f0c4"}.fa-cuttlefish:before{content:"\f38c"}.fa-d-and-d:before{content:"\f38d"}.fa-d-and-d-beyond:before{content:"\f6ca"}.fa-dashcube:before{content:"\f210"}.fa-database:before{content:"\f1c0"}.fa-deaf:before{content:"\f2a4"}.fa-delicious:before{content:"\f1a5"}.fa-democrat:before{content:"\f747"}.fa-deploydog:before{content:"\f38e"}.fa-deskpro:before{content:"\f38f"}.fa-desktop:before{content:"\f108"}.fa-dev:before{content:"\f6cc"}.fa-deviantart:before{content:"\f1bd"}.fa-dharmachakra:before{content:"\f655"}.fa-dhl:before{content:"\f790"}.fa-diagnoses:before{content:"\f470"}.fa-diaspora:before{content:"\f791"}.fa-dice:before{content:"\f522"}.fa-dice-d20:before{content:"\f6cf"}.fa-dice-d6:before{content:"\f6d1"}.fa-dice-five:before{content:"\f523"}.fa-dice-four:before{content:"\f524"}.fa-dice-one:before{content:"\f525"}.fa-dice-six:before{content:"\f526"}.fa-dice-three:before{content:"\f527"}.fa-dice-two:before{content:"\f528"}.fa-digg:before{content:"\f1a6"}.fa-digital-ocean:before{content:"\f391"}.fa-digital-tachograph:before{content:"\f566"}.fa-directions:before{content:"\f5eb"}.fa-discord:before{content:"\f392"}.fa-discourse:before{content:"\f393"}.fa-divide:before{content:"\f529"}.fa-dizzy:before{content:"\f567"}.fa-dna:before{content:"\f471"}.fa-dochub:before{content:"\f394"}.fa-docker:before{content:"\f395"}.fa-dog:before{content:"\f6d3"}.fa-dollar-sign:before{content:"\f155"}.fa-dolly:before{content:"\f472"}.fa-dolly-flatbed:before{content:"\f474"}.fa-donate:before{content:"\f4b9"}.fa-door-closed:before{content:"\f52a"}.fa-door-open:before{content:"\f52b"}.fa-dot-circle:before{content:"\f192"}.fa-dove:before{content:"\f4ba"}.fa-download:before{content:"\f019"}.fa-draft2digital:before{content:"\f396"}.fa-drafting-compass:before{content:"\f568"}.fa-dragon:before{content:"\f6d5"}.fa-draw-polygon:before{content:"\f5ee"}.fa-dribbble:before{content:"\f17d"}.fa-dribbble-square:before{content:"\f397"}.fa-dropbox:before{content:"\f16b"}.fa-drum:before{content:"\f569"}.fa-drum-steelpan:before{content:"\f56a"}.fa-drumstick-bite:before{content:"\f6d7"}.fa-drupal:before{content:"\f1a9"}.fa-dumbbell:before{content:"\f44b"}.fa-dumpster:before{content:"\f793"}.fa-dumpster-fire:before{content:"\f794"}.fa-dungeon:before{content:"\f6d9"}.fa-dyalog:before{content:"\f399"}.fa-earlybirds:before{content:"\f39a"}.fa-ebay:before{content:"\f4f4"}.fa-edge:before{content:"\f282"}.fa-edit:before{content:"\f044"}.fa-eject:before{content:"\f052"}.fa-elementor:before{content:"\f430"}.fa-ellipsis-h:before{content:"\f141"}.fa-ellipsis-v:before{content:"\f142"}.fa-ello:before{content:"\f5f1"}.fa-ember:before{content:"\f423"}.fa-empire:before{content:"\f1d1"}.fa-envelope:before{content:"\f0e0"}.fa-envelope-open:before{content:"\f2b6"}.fa-envelope-open-text:before{content:"\f658"}.fa-envelope-square:before{content:"\f199"}.fa-envira:before{content:"\f299"}.fa-equals:before{content:"\f52c"}.fa-eraser:before{content:"\f12d"}.fa-erlang:before{content:"\f39d"}.fa-ethereum:before{content:"\f42e"}.fa-ethernet:before{content:"\f796"}.fa-etsy:before{content:"\f2d7"}.fa-euro-sign:before{content:"\f153"}.fa-exchange-alt:before{content:"\f362"}.fa-exclamation:before{content:"\f12a"}.fa-exclamation-circle:before{content:"\f06a"}.fa-exclamation-triangle:before{content:"\f071"}.fa-expand:before{content:"\f065"}.fa-expand-arrows-alt:before{content:"\f31e"}.fa-expeditedssl:before{content:"\f23e"}.fa-external-link-alt:before{content:"\f35d"}.fa-external-link-square-alt:before{content:"\f360"}.fa-eye:before{content:"\f06e"}.fa-eye-dropper:before{content:"\f1fb"}.fa-eye-slash:before{content:"\f070"}.fa-facebook:before{content:"\f09a"}.fa-facebook-f:before{content:"\f39e"}.fa-facebook-messenger:before{content:"\f39f"}.fa-facebook-square:before{content:"\f082"}.fa-fantasy-flight-games:before{content:"\f6dc"}.fa-fast-backward:before{content:"\f049"}.fa-fast-forward:before{content:"\f050"}.fa-fax:before{content:"\f1ac"}.fa-feather:before{content:"\f52d"}.fa-feather-alt:before{content:"\f56b"}.fa-fedex:before{content:"\f797"}.fa-fedora:before{content:"\f798"}.fa-female:before{content:"\f182"}.fa-fighter-jet:before{content:"\f0fb"}.fa-figma:before{content:"\f799"}.fa-file:before{content:"\f15b"}.fa-file-alt:before{content:"\f15c"}.fa-file-archive:before{content:"\f1c6"}.fa-file-audio:before{content:"\f1c7"}.fa-file-code:before{content:"\f1c9"}.fa-file-contract:before{content:"\f56c"}.fa-file-csv:before{content:"\f6dd"}.fa-file-download:before{content:"\f56d"}.fa-file-excel:before{content:"\f1c3"}.fa-file-export:before{content:"\f56e"}.fa-file-image:before{content:"\f1c5"}.fa-file-import:before{content:"\f56f"}.fa-file-invoice:before{content:"\f570"}.fa-file-invoice-dollar:before{content:"\f571"}.fa-file-medical:before{content:"\f477"}.fa-file-medical-alt:before{content:"\f478"}.fa-file-pdf:before{content:"\f1c1"}.fa-file-powerpoint:before{content:"\f1c4"}.fa-file-prescription:before{content:"\f572"}.fa-file-signature:before{content:"\f573"}.fa-file-upload:before{content:"\f574"}.fa-file-video:before{content:"\f1c8"}.fa-file-word:before{content:"\f1c2"}.fa-fill:before{content:"\f575"}.fa-fill-drip:before{content:"\f576"}.fa-film:before{content:"\f008"}.fa-filter:before{content:"\f0b0"}.fa-fingerprint:before{content:"\f577"}.fa-fire:before{content:"\f06d"}.fa-fire-alt:before{content:"\f7e4"}.fa-fire-extinguisher:before{content:"\f134"}.fa-firefox:before{content:"\f269"}.fa-first-aid:before{content:"\f479"}.fa-first-order:before{content:"\f2b0"}.fa-first-order-alt:before{content:"\f50a"}.fa-firstdraft:before{content:"\f3a1"}.fa-fish:before{content:"\f578"}.fa-fist-raised:before{content:"\f6de"}.fa-flag:before{content:"\f024"}.fa-flag-checkered:before{content:"\f11e"}.fa-flag-usa:before{content:"\f74d"}.fa-flask:before{content:"\f0c3"}.fa-flickr:before{content:"\f16e"}.fa-flipboard:before{content:"\f44d"}.fa-flushed:before{content:"\f579"}.fa-fly:before{content:"\f417"}.fa-folder:before{content:"\f07b"}.fa-folder-minus:before{content:"\f65d"}.fa-folder-open:before{content:"\f07c"}.fa-folder-plus:before{content:"\f65e"}.fa-font:before{content:"\f031"}.fa-font-awesome:before{content:"\f2b4"}.fa-font-awesome-alt:before{content:"\f35c"}.fa-font-awesome-flag:before{content:"\f425"}.fa-font-awesome-logo-full:before{content:"\f4e6"}.fa-fonticons:before{content:"\f280"}.fa-fonticons-fi:before{content:"\f3a2"}.fa-football-ball:before{content:"\f44e"}.fa-fort-awesome:before{content:"\f286"}.fa-fort-awesome-alt:before{content:"\f3a3"}.fa-forumbee:before{content:"\f211"}.fa-forward:before{content:"\f04e"}.fa-foursquare:before{content:"\f180"}.fa-free-code-camp:before{content:"\f2c5"}.fa-freebsd:before{content:"\f3a4"}.fa-frog:before{content:"\f52e"}.fa-frown:before{content:"\f119"}.fa-frown-open:before{content:"\f57a"}.fa-fulcrum:before{content:"\f50b"}.fa-funnel-dollar:before{content:"\f662"}.fa-futbol:before{content:"\f1e3"}.fa-galactic-republic:before{content:"\f50c"}.fa-galactic-senate:before{content:"\f50d"}.fa-gamepad:before{content:"\f11b"}.fa-gas-pump:before{content:"\f52f"}.fa-gavel:before{content:"\f0e3"}.fa-gem:before{content:"\f3a5"}.fa-genderless:before{content:"\f22d"}.fa-get-pocket:before{content:"\f265"}.fa-gg:before{content:"\f260"}.fa-gg-circle:before{content:"\f261"}.fa-ghost:before{content:"\f6e2"}.fa-gift:before{content:"\f06b"}.fa-gifts:before{content:"\f79c"}.fa-git:before{content:"\f1d3"}.fa-git-square:before{content:"\f1d2"}.fa-github:before{content:"\f09b"}.fa-github-alt:before{content:"\f113"}.fa-github-square:before{content:"\f092"}.fa-gitkraken:before{content:"\f3a6"}.fa-gitlab:before{content:"\f296"}.fa-gitter:before{content:"\f426"}.fa-glass-cheers:before{content:"\f79f"}.fa-glass-martini:before{content:"\f000"}.fa-glass-martini-alt:before{content:"\f57b"}.fa-glass-whiskey:before{content:"\f7a0"}.fa-glasses:before{content:"\f530"}.fa-glide:before{content:"\f2a5"}.fa-glide-g:before{content:"\f2a6"}.fa-globe:before{content:"\f0ac"}.fa-globe-africa:before{content:"\f57c"}.fa-globe-americas:before{content:"\f57d"}.fa-globe-asia:before{content:"\f57e"}.fa-globe-europe:before{content:"\f7a2"}.fa-gofore:before{content:"\f3a7"}.fa-golf-ball:before{content:"\f450"}.fa-goodreads:before{content:"\f3a8"}.fa-goodreads-g:before{content:"\f3a9"}.fa-google:before{content:"\f1a0"}.fa-google-drive:before{content:"\f3aa"}.fa-google-play:before{content:"\f3ab"}.fa-google-plus:before{content:"\f2b3"}.fa-google-plus-g:before{content:"\f0d5"}.fa-google-plus-square:before{content:"\f0d4"}.fa-google-wallet:before{content:"\f1ee"}.fa-gopuram:before{content:"\f664"}.fa-graduation-cap:before{content:"\f19d"}.fa-gratipay:before{content:"\f184"}.fa-grav:before{content:"\f2d6"}.fa-greater-than:before{content:"\f531"}.fa-greater-than-equal:before{content:"\f532"}.fa-grimace:before{content:"\f57f"}.fa-grin:before{content:"\f580"}.fa-grin-alt:before{content:"\f581"}.fa-grin-beam:before{content:"\f582"}.fa-grin-beam-sweat:before{content:"\f583"}.fa-grin-hearts:before{content:"\f584"}.fa-grin-squint:before{content:"\f585"}.fa-grin-squint-tears:before{content:"\f586"}.fa-grin-stars:before{content:"\f587"}.fa-grin-tears:before{content:"\f588"}.fa-grin-tongue:before{content:"\f589"}.fa-grin-tongue-squint:before{content:"\f58a"}.fa-grin-tongue-wink:before{content:"\f58b"}.fa-grin-wink:before{content:"\f58c"}.fa-grip-horizontal:before{content:"\f58d"}.fa-grip-lines:before{content:"\f7a4"}.fa-grip-lines-vertical:before{content:"\f7a5"}.fa-grip-vertical:before{content:"\f58e"}.fa-gripfire:before{content:"\f3ac"}.fa-grunt:before{content:"\f3ad"}.fa-guitar:before{content:"\f7a6"}.fa-gulp:before{content:"\f3ae"}.fa-h-square:before{content:"\f0fd"}.fa-hacker-news:before{content:"\f1d4"}.fa-hacker-news-square:before{content:"\f3af"}.fa-hackerrank:before{content:"\f5f7"}.fa-hammer:before{content:"\f6e3"}.fa-hamsa:before{content:"\f665"}.fa-hand-holding:before{content:"\f4bd"}.fa-hand-holding-heart:before{content:"\f4be"}.fa-hand-holding-usd:before{content:"\f4c0"}.fa-hand-lizard:before{content:"\f258"}.fa-hand-paper:before{content:"\f256"}.fa-hand-peace:before{content:"\f25b"}.fa-hand-point-down:before{content:"\f0a7"}.fa-hand-point-left:before{content:"\f0a5"}.fa-hand-point-right:before{content:"\f0a4"}.fa-hand-point-up:before{content:"\f0a6"}.fa-hand-pointer:before{content:"\f25a"}.fa-hand-rock:before{content:"\f255"}.fa-hand-scissors:before{content:"\f257"}.fa-hand-spock:before{content:"\f259"}.fa-hands:before{content:"\f4c2"}.fa-hands-helping:before{content:"\f4c4"}.fa-handshake:before{content:"\f2b5"}.fa-hanukiah:before{content:"\f6e6"}.fa-hashtag:before{content:"\f292"}.fa-hat-wizard:before{content:"\f6e8"}.fa-haykal:before{content:"\f666"}.fa-hdd:before{content:"\f0a0"}.fa-heading:before{content:"\f1dc"}.fa-headphones:before{content:"\f025"}.fa-headphones-alt:before{content:"\f58f"}.fa-headset:before{content:"\f590"}.fa-heart:before{content:"\f004"}.fa-heart-broken:before{content:"\f7a9"}.fa-heartbeat:before{content:"\f21e"}.fa-helicopter:before{content:"\f533"}.fa-highlighter:before{content:"\f591"}.fa-hiking:before{content:"\f6ec"}.fa-hippo:before{content:"\f6ed"}.fa-hips:before{content:"\f452"}.fa-hire-a-helper:before{content:"\f3b0"}.fa-history:before{content:"\f1da"}.fa-hockey-puck:before{content:"\f453"}.fa-holly-berry:before{content:"\f7aa"}.fa-home:before{content:"\f015"}.fa-hooli:before{content:"\f427"}.fa-hornbill:before{content:"\f592"}.fa-horse:before{content:"\f6f0"}.fa-horse-head:before{content:"\f7ab"}.fa-hospital:before{content:"\f0f8"}.fa-hospital-alt:before{content:"\f47d"}.fa-hospital-symbol:before{content:"\f47e"}.fa-hot-tub:before{content:"\f593"}.fa-hotel:before{content:"\f594"}.fa-hotjar:before{content:"\f3b1"}.fa-hourglass:before{content:"\f254"}.fa-hourglass-end:before{content:"\f253"}.fa-hourglass-half:before{content:"\f252"}.fa-hourglass-start:before{content:"\f251"}.fa-house-damage:before{content:"\f6f1"}.fa-houzz:before{content:"\f27c"}.fa-hryvnia:before{content:"\f6f2"}.fa-html5:before{content:"\f13b"}.fa-hubspot:before{content:"\f3b2"}.fa-i-cursor:before{content:"\f246"}.fa-icicles:before{content:"\f7ad"}.fa-id-badge:before{content:"\f2c1"}.fa-id-card:before{content:"\f2c2"}.fa-id-card-alt:before{content:"\f47f"}.fa-igloo:before{content:"\f7ae"}.fa-image:before{content:"\f03e"}.fa-images:before{content:"\f302"}.fa-imdb:before{content:"\f2d8"}.fa-inbox:before{content:"\f01c"}.fa-indent:before{content:"\f03c"}.fa-industry:before{content:"\f275"}.fa-infinity:before{content:"\f534"}.fa-info:before{content:"\f129"}.fa-info-circle:before{content:"\f05a"}.fa-instagram:before{content:"\f16d"}.fa-intercom:before{content:"\f7af"}.fa-internet-explorer:before{content:"\f26b"}.fa-invision:before{content:"\f7b0"}.fa-ioxhost:before{content:"\f208"}.fa-italic:before{content:"\f033"}.fa-itunes:before{content:"\f3b4"}.fa-itunes-note:before{content:"\f3b5"}.fa-java:before{content:"\f4e4"}.fa-jedi:before{content:"\f669"}.fa-jedi-order:before{content:"\f50e"}.fa-jenkins:before{content:"\f3b6"}.fa-jira:before{content:"\f7b1"}.fa-joget:before{content:"\f3b7"}.fa-joint:before{content:"\f595"}.fa-joomla:before{content:"\f1aa"}.fa-journal-whills:before{content:"\f66a"}.fa-js:before{content:"\f3b8"}.fa-js-square:before{content:"\f3b9"}.fa-jsfiddle:before{content:"\f1cc"}.fa-kaaba:before{content:"\f66b"}.fa-kaggle:before{content:"\f5fa"}.fa-key:before{content:"\f084"}.fa-keybase:before{content:"\f4f5"}.fa-keyboard:before{content:"\f11c"}.fa-keycdn:before{content:"\f3ba"}.fa-khanda:before{content:"\f66d"}.fa-kickstarter:before{content:"\f3bb"}.fa-kickstarter-k:before{content:"\f3bc"}.fa-kiss:before{content:"\f596"}.fa-kiss-beam:before{content:"\f597"}.fa-kiss-wink-heart:before{content:"\f598"}.fa-kiwi-bird:before{content:"\f535"}.fa-korvue:before{content:"\f42f"}.fa-landmark:before{content:"\f66f"}.fa-language:before{content:"\f1ab"}.fa-laptop:before{content:"\f109"}.fa-laptop-code:before{content:"\f5fc"}.fa-laravel:before{content:"\f3bd"}.fa-lastfm:before{content:"\f202"}.fa-lastfm-square:before{content:"\f203"}.fa-laugh:before{content:"\f599"}.fa-laugh-beam:before{content:"\f59a"}.fa-laugh-squint:before{content:"\f59b"}.fa-laugh-wink:before{content:"\f59c"}.fa-layer-group:before{content:"\f5fd"}.fa-leaf:before{content:"\f06c"}.fa-leanpub:before{content:"\f212"}.fa-lemon:before{content:"\f094"}.fa-less:before{content:"\f41d"}.fa-less-than:before{content:"\f536"}.fa-less-than-equal:before{content:"\f537"}.fa-level-down-alt:before{content:"\f3be"}.fa-level-up-alt:before{content:"\f3bf"}.fa-life-ring:before{content:"\f1cd"}.fa-lightbulb:before{content:"\f0eb"}.fa-line:before{content:"\f3c0"}.fa-link:before{content:"\f0c1"}.fa-linkedin:before{content:"\f08c"}.fa-linkedin-in:before{content:"\f0e1"}.fa-linode:before{content:"\f2b8"}.fa-linux:before{content:"\f17c"}.fa-lira-sign:before{content:"\f195"}.fa-list:before{content:"\f03a"}.fa-list-alt:before{content:"\f022"}.fa-list-ol:before{content:"\f0cb"}.fa-list-ul:before{content:"\f0ca"}.fa-location-arrow:before{content:"\f124"}.fa-lock:before{content:"\f023"}.fa-lock-open:before{content:"\f3c1"}.fa-long-arrow-alt-down:before{content:"\f309"}.fa-long-arrow-alt-left:before{content:"\f30a"}.fa-long-arrow-alt-right:before{content:"\f30b"}.fa-long-arrow-alt-up:before{content:"\f30c"}.fa-low-vision:before{content:"\f2a8"}.fa-luggage-cart:before{content:"\f59d"}.fa-lyft:before{content:"\f3c3"}.fa-magento:before{content:"\f3c4"}.fa-magic:before{content:"\f0d0"}.fa-magnet:before{content:"\f076"}.fa-mail-bulk:before{content:"\f674"}.fa-mailchimp:before{content:"\f59e"}.fa-male:before{content:"\f183"}.fa-mandalorian:before{content:"\f50f"}.fa-map:before{content:"\f279"}.fa-map-marked:before{content:"\f59f"}.fa-map-marked-alt:before{content:"\f5a0"}.fa-map-marker:before{content:"\f041"}.fa-map-marker-alt:before{content:"\f3c5"}.fa-map-pin:before{content:"\f276"}.fa-map-signs:before{content:"\f277"}.fa-markdown:before{content:"\f60f"}.fa-marker:before{content:"\f5a1"}.fa-mars:before{content:"\f222"}.fa-mars-double:before{content:"\f227"}.fa-mars-stroke:before{content:"\f229"}.fa-mars-stroke-h:before{content:"\f22b"}.fa-mars-stroke-v:before{content:"\f22a"}.fa-mask:before{content:"\f6fa"}.fa-mastodon:before{content:"\f4f6"}.fa-maxcdn:before{content:"\f136"}.fa-medal:before{content:"\f5a2"}.fa-medapps:before{content:"\f3c6"}.fa-medium:before{content:"\f23a"}.fa-medium-m:before{content:"\f3c7"}.fa-medkit:before{content:"\f0fa"}.fa-medrt:before{content:"\f3c8"}.fa-meetup:before{content:"\f2e0"}.fa-megaport:before{content:"\f5a3"}.fa-meh:before{content:"\f11a"}.fa-meh-blank:before{content:"\f5a4"}.fa-meh-rolling-eyes:before{content:"\f5a5"}.fa-memory:before{content:"\f538"}.fa-mendeley:before{content:"\f7b3"}.fa-menorah:before{content:"\f676"}.fa-mercury:before{content:"\f223"}.fa-meteor:before{content:"\f753"}.fa-microchip:before{content:"\f2db"}.fa-microphone:before{content:"\f130"}.fa-microphone-alt:before{content:"\f3c9"}.fa-microphone-alt-slash:before{content:"\f539"}.fa-microphone-slash:before{content:"\f131"}.fa-microscope:before{content:"\f610"}.fa-microsoft:before{content:"\f3ca"}.fa-minus:before{content:"\f068"}.fa-minus-circle:before{content:"\f056"}.fa-minus-square:before{content:"\f146"}.fa-mitten:before{content:"\f7b5"}.fa-mix:before{content:"\f3cb"}.fa-mixcloud:before{content:"\f289"}.fa-mizuni:before{content:"\f3cc"}.fa-mobile:before{content:"\f10b"}.fa-mobile-alt:before{content:"\f3cd"}.fa-modx:before{content:"\f285"}.fa-monero:before{content:"\f3d0"}.fa-money-bill:before{content:"\f0d6"}.fa-money-bill-alt:before{content:"\f3d1"}.fa-money-bill-wave:before{content:"\f53a"}.fa-money-bill-wave-alt:before{content:"\f53b"}.fa-money-check:before{content:"\f53c"}.fa-money-check-alt:before{content:"\f53d"}.fa-monument:before{content:"\f5a6"}.fa-moon:before{content:"\f186"}.fa-mortar-pestle:before{content:"\f5a7"}.fa-mosque:before{content:"\f678"}.fa-motorcycle:before{content:"\f21c"}.fa-mountain:before{content:"\f6fc"}.fa-mouse-pointer:before{content:"\f245"}.fa-mug-hot:before{content:"\f7b6"}.fa-music:before{content:"\f001"}.fa-napster:before{content:"\f3d2"}.fa-neos:before{content:"\f612"}.fa-network-wired:before{content:"\f6ff"}.fa-neuter:before{content:"\f22c"}.fa-newspaper:before{content:"\f1ea"}.fa-nimblr:before{content:"\f5a8"}.fa-nintendo-switch:before{content:"\f418"}.fa-node:before{content:"\f419"}.fa-node-js:before{content:"\f3d3"}.fa-not-equal:before{content:"\f53e"}.fa-notes-medical:before{content:"\f481"}.fa-npm:before{content:"\f3d4"}.fa-ns8:before{content:"\f3d5"}.fa-nutritionix:before{content:"\f3d6"}.fa-object-group:before{content:"\f247"}.fa-object-ungroup:before{content:"\f248"}.fa-odnoklassniki:before{content:"\f263"}.fa-odnoklassniki-square:before{content:"\f264"}.fa-oil-can:before{content:"\f613"}.fa-old-republic:before{content:"\f510"}.fa-om:before{content:"\f679"}.fa-opencart:before{content:"\f23d"}.fa-openid:before{content:"\f19b"}.fa-opera:before{content:"\f26a"}.fa-optin-monster:before{content:"\f23c"}.fa-osi:before{content:"\f41a"}.fa-otter:before{content:"\f700"}.fa-outdent:before{content:"\f03b"}.fa-page4:before{content:"\f3d7"}.fa-pagelines:before{content:"\f18c"}.fa-paint-brush:before{content:"\f1fc"}.fa-paint-roller:before{content:"\f5aa"}.fa-palette:before{content:"\f53f"}.fa-palfed:before{content:"\f3d8"}.fa-pallet:before{content:"\f482"}.fa-paper-plane:before{content:"\f1d8"}.fa-paperclip:before{content:"\f0c6"}.fa-parachute-box:before{content:"\f4cd"}.fa-paragraph:before{content:"\f1dd"}.fa-parking:before{content:"\f540"}.fa-passport:before{content:"\f5ab"}.fa-pastafarianism:before{content:"\f67b"}.fa-paste:before{content:"\f0ea"}.fa-patreon:before{content:"\f3d9"}.fa-pause:before{content:"\f04c"}.fa-pause-circle:before{content:"\f28b"}.fa-paw:before{content:"\f1b0"}.fa-paypal:before{content:"\f1ed"}.fa-peace:before{content:"\f67c"}.fa-pen:before{content:"\f304"}.fa-pen-alt:before{content:"\f305"}.fa-pen-fancy:before{content:"\f5ac"}.fa-pen-nib:before{content:"\f5ad"}.fa-pen-square:before{content:"\f14b"}.fa-pencil-alt:before{content:"\f303"}.fa-pencil-ruler:before{content:"\f5ae"}.fa-penny-arcade:before{content:"\f704"}.fa-people-carry:before{content:"\f4ce"}.fa-percent:before{content:"\f295"}.fa-percentage:before{content:"\f541"}.fa-periscope:before{content:"\f3da"}.fa-person-booth:before{content:"\f756"}.fa-phabricator:before{content:"\f3db"}.fa-phoenix-framework:before{content:"\f3dc"}.fa-phoenix-squadron:before{content:"\f511"}.fa-phone:before{content:"\f095"}.fa-phone-slash:before{content:"\f3dd"}.fa-phone-square:before{content:"\f098"}.fa-phone-volume:before{content:"\f2a0"}.fa-php:before{content:"\f457"}.fa-pied-piper:before{content:"\f2ae"}.fa-pied-piper-alt:before{content:"\f1a8"}.fa-pied-piper-hat:before{content:"\f4e5"}.fa-pied-piper-pp:before{content:"\f1a7"}.fa-piggy-bank:before{content:"\f4d3"}.fa-pills:before{content:"\f484"}.fa-pinterest:before{content:"\f0d2"}.fa-pinterest-p:before{content:"\f231"}.fa-pinterest-square:before{content:"\f0d3"}.fa-place-of-worship:before{content:"\f67f"}.fa-plane:before{content:"\f072"}.fa-plane-arrival:before{content:"\f5af"}.fa-plane-departure:before{content:"\f5b0"}.fa-play:before{content:"\f04b"}.fa-play-circle:before{content:"\f144"}.fa-playstation:before{content:"\f3df"}.fa-plug:before{content:"\f1e6"}.fa-plus:before{content:"\f067"}.fa-plus-circle:before{content:"\f055"}.fa-plus-square:before{content:"\f0fe"}.fa-podcast:before{content:"\f2ce"}.fa-poll:before{content:"\f681"}.fa-poll-h:before{content:"\f682"}.fa-poo:before{content:"\f2fe"}.fa-poo-storm:before{content:"\f75a"}.fa-poop:before{content:"\f619"}.fa-portrait:before{content:"\f3e0"}.fa-pound-sign:before{content:"\f154"}.fa-power-off:before{content:"\f011"}.fa-pray:before{content:"\f683"}.fa-praying-hands:before{content:"\f684"}.fa-prescription:before{content:"\f5b1"}.fa-prescription-bottle:before{content:"\f485"}.fa-prescription-bottle-alt:before{content:"\f486"}.fa-print:before{content:"\f02f"}.fa-procedures:before{content:"\f487"}.fa-product-hunt:before{content:"\f288"}.fa-project-diagram:before{content:"\f542"}.fa-pushed:before{content:"\f3e1"}.fa-puzzle-piece:before{content:"\f12e"}.fa-python:before{content:"\f3e2"}.fa-qq:before{content:"\f1d6"}.fa-qrcode:before{content:"\f029"}.fa-question:before{content:"\f128"}.fa-question-circle:before{content:"\f059"}.fa-quidditch:before{content:"\f458"}.fa-quinscape:before{content:"\f459"}.fa-quora:before{content:"\f2c4"}.fa-quote-left:before{content:"\f10d"}.fa-quote-right:before{content:"\f10e"}.fa-quran:before{content:"\f687"}.fa-r-project:before{content:"\f4f7"}.fa-radiation:before{content:"\f7b9"}.fa-radiation-alt:before{content:"\f7ba"}.fa-rainbow:before{content:"\f75b"}.fa-random:before{content:"\f074"}.fa-raspberry-pi:before{content:"\f7bb"}.fa-ravelry:before{content:"\f2d9"}.fa-react:before{content:"\f41b"}.fa-reacteurope:before{content:"\f75d"}.fa-readme:before{content:"\f4d5"}.fa-rebel:before{content:"\f1d0"}.fa-receipt:before{content:"\f543"}.fa-recycle:before{content:"\f1b8"}.fa-red-river:before{content:"\f3e3"}.fa-reddit:before{content:"\f1a1"}.fa-reddit-alien:before{content:"\f281"}.fa-reddit-square:before{content:"\f1a2"}.fa-redhat:before{content:"\f7bc"}.fa-redo:before{content:"\f01e"}.fa-redo-alt:before{content:"\f2f9"}.fa-registered:before{content:"\f25d"}.fa-renren:before{content:"\f18b"}.fa-reply:before{content:"\f3e5"}.fa-reply-all:before{content:"\f122"}.fa-replyd:before{content:"\f3e6"}.fa-republican:before{content:"\f75e"}.fa-researchgate:before{content:"\f4f8"}.fa-resolving:before{content:"\f3e7"}.fa-restroom:before{content:"\f7bd"}.fa-retweet:before{content:"\f079"}.fa-rev:before{content:"\f5b2"}.fa-ribbon:before{content:"\f4d6"}.fa-ring:before{content:"\f70b"}.fa-road:before{content:"\f018"}.fa-robot:before{content:"\f544"}.fa-rocket:before{content:"\f135"}.fa-rocketchat:before{content:"\f3e8"}.fa-rockrms:before{content:"\f3e9"}.fa-route:before{content:"\f4d7"}.fa-rss:before{content:"\f09e"}.fa-rss-square:before{content:"\f143"}.fa-ruble-sign:before{content:"\f158"}.fa-ruler:before{content:"\f545"}.fa-ruler-combined:before{content:"\f546"}.fa-ruler-horizontal:before{content:"\f547"}.fa-ruler-vertical:before{content:"\f548"}.fa-running:before{content:"\f70c"}.fa-rupee-sign:before{content:"\f156"}.fa-sad-cry:before{content:"\f5b3"}.fa-sad-tear:before{content:"\f5b4"}.fa-safari:before{content:"\f267"}.fa-sass:before{content:"\f41e"}.fa-satellite:before{content:"\f7bf"}.fa-satellite-dish:before{content:"\f7c0"}.fa-save:before{content:"\f0c7"}.fa-schlix:before{content:"\f3ea"}.fa-school:before{content:"\f549"}.fa-screwdriver:before{content:"\f54a"}.fa-scribd:before{content:"\f28a"}.fa-scroll:before{content:"\f70e"}.fa-sd-card:before{content:"\f7c2"}.fa-search:before{content:"\f002"}.fa-search-dollar:before{content:"\f688"}.fa-search-location:before{content:"\f689"}.fa-search-minus:before{content:"\f010"}.fa-search-plus:before{content:"\f00e"}.fa-searchengin:before{content:"\f3eb"}.fa-seedling:before{content:"\f4d8"}.fa-sellcast:before{content:"\f2da"}.fa-sellsy:before{content:"\f213"}.fa-server:before{content:"\f233"}.fa-servicestack:before{content:"\f3ec"}.fa-shapes:before{content:"\f61f"}.fa-share:before{content:"\f064"}.fa-share-alt:before{content:"\f1e0"}.fa-share-alt-square:before{content:"\f1e1"}.fa-share-square:before{content:"\f14d"}.fa-shekel-sign:before{content:"\f20b"}.fa-shield-alt:before{content:"\f3ed"}.fa-ship:before{content:"\f21a"}.fa-shipping-fast:before{content:"\f48b"}.fa-shirtsinbulk:before{content:"\f214"}.fa-shoe-prints:before{content:"\f54b"}.fa-shopping-bag:before{content:"\f290"}.fa-shopping-basket:before{content:"\f291"}.fa-shopping-cart:before{content:"\f07a"}.fa-shopware:before{content:"\f5b5"}.fa-shower:before{content:"\f2cc"}.fa-shuttle-van:before{content:"\f5b6"}.fa-sign:before{content:"\f4d9"}.fa-sign-in-alt:before{content:"\f2f6"}.fa-sign-language:before{content:"\f2a7"}.fa-sign-out-alt:before{content:"\f2f5"}.fa-signal:before{content:"\f012"}.fa-signature:before{content:"\f5b7"}.fa-sim-card:before{content:"\f7c4"}.fa-simplybuilt:before{content:"\f215"}.fa-sistrix:before{content:"\f3ee"}.fa-sitemap:before{content:"\f0e8"}.fa-sith:before{content:"\f512"}.fa-skating:before{content:"\f7c5"}.fa-sketch:before{content:"\f7c6"}.fa-skiing:before{content:"\f7c9"}.fa-skiing-nordic:before{content:"\f7ca"}.fa-skull:before{content:"\f54c"}.fa-skull-crossbones:before{content:"\f714"}.fa-skyatlas:before{content:"\f216"}.fa-skype:before{content:"\f17e"}.fa-slack:before{content:"\f198"}.fa-slack-hash:before{content:"\f3ef"}.fa-slash:before{content:"\f715"}.fa-sleigh:before{content:"\f7cc"}.fa-sliders-h:before{content:"\f1de"}.fa-slideshare:before{content:"\f1e7"}.fa-smile:before{content:"\f118"}.fa-smile-beam:before{content:"\f5b8"}.fa-smile-wink:before{content:"\f4da"}.fa-smog:before{content:"\f75f"}.fa-smoking:before{content:"\f48d"}.fa-smoking-ban:before{content:"\f54d"}.fa-sms:before{content:"\f7cd"}.fa-snapchat:before{content:"\f2ab"}.fa-snapchat-ghost:before{content:"\f2ac"}.fa-snapchat-square:before{content:"\f2ad"}.fa-snowboarding:before{content:"\f7ce"}.fa-snowflake:before{content:"\f2dc"}.fa-snowman:before{content:"\f7d0"}.fa-snowplow:before{content:"\f7d2"}.fa-socks:before{content:"\f696"}.fa-solar-panel:before{content:"\f5ba"}.fa-sort:before{content:"\f0dc"}.fa-sort-alpha-down:before{content:"\f15d"}.fa-sort-alpha-up:before{content:"\f15e"}.fa-sort-amount-down:before{content:"\f160"}.fa-sort-amount-up:before{content:"\f161"}.fa-sort-down:before{content:"\f0dd"}.fa-sort-numeric-down:before{content:"\f162"}.fa-sort-numeric-up:before{content:"\f163"}.fa-sort-up:before{content:"\f0de"}.fa-soundcloud:before{content:"\f1be"}.fa-sourcetree:before{content:"\f7d3"}.fa-spa:before{content:"\f5bb"}.fa-space-shuttle:before{content:"\f197"}.fa-speakap:before{content:"\f3f3"}.fa-spider:before{content:"\f717"}.fa-spinner:before{content:"\f110"}.fa-splotch:before{content:"\f5bc"}.fa-spotify:before{content:"\f1bc"}.fa-spray-can:before{content:"\f5bd"}.fa-square:before{content:"\f0c8"}.fa-square-full:before{content:"\f45c"}.fa-square-root-alt:before{content:"\f698"}.fa-squarespace:before{content:"\f5be"}.fa-stack-exchange:before{content:"\f18d"}.fa-stack-overflow:before{content:"\f16c"}.fa-stamp:before{content:"\f5bf"}.fa-star:before{content:"\f005"}.fa-star-and-crescent:before{content:"\f699"}.fa-star-half:before{content:"\f089"}.fa-star-half-alt:before{content:"\f5c0"}.fa-star-of-david:before{content:"\f69a"}.fa-star-of-life:before{content:"\f621"}.fa-staylinked:before{content:"\f3f5"}.fa-steam:before{content:"\f1b6"}.fa-steam-square:before{content:"\f1b7"}.fa-steam-symbol:before{content:"\f3f6"}.fa-step-backward:before{content:"\f048"}.fa-step-forward:before{content:"\f051"}.fa-stethoscope:before{content:"\f0f1"}.fa-sticker-mule:before{content:"\f3f7"}.fa-sticky-note:before{content:"\f249"}.fa-stop:before{content:"\f04d"}.fa-stop-circle:before{content:"\f28d"}.fa-stopwatch:before{content:"\f2f2"}.fa-store:before{content:"\f54e"}.fa-store-alt:before{content:"\f54f"}.fa-strava:before{content:"\f428"}.fa-stream:before{content:"\f550"}.fa-street-view:before{content:"\f21d"}.fa-strikethrough:before{content:"\f0cc"}.fa-stripe:before{content:"\f429"}.fa-stripe-s:before{content:"\f42a"}.fa-stroopwafel:before{content:"\f551"}.fa-studiovinari:before{content:"\f3f8"}.fa-stumbleupon:before{content:"\f1a4"}.fa-stumbleupon-circle:before{content:"\f1a3"}.fa-subscript:before{content:"\f12c"}.fa-subway:before{content:"\f239"}.fa-suitcase:before{content:"\f0f2"}.fa-suitcase-rolling:before{content:"\f5c1"}.fa-sun:before{content:"\f185"}.fa-superpowers:before{content:"\f2dd"}.fa-superscript:before{content:"\f12b"}.fa-supple:before{content:"\f3f9"}.fa-surprise:before{content:"\f5c2"}.fa-suse:before{content:"\f7d6"}.fa-swatchbook:before{content:"\f5c3"}.fa-swimmer:before{content:"\f5c4"}.fa-swimming-pool:before{content:"\f5c5"}.fa-synagogue:before{content:"\f69b"}.fa-sync:before{content:"\f021"}.fa-sync-alt:before{content:"\f2f1"}.fa-syringe:before{content:"\f48e"}.fa-table:before{content:"\f0ce"}.fa-table-tennis:before{content:"\f45d"}.fa-tablet:before{content:"\f10a"}.fa-tablet-alt:before{content:"\f3fa"}.fa-tablets:before{content:"\f490"}.fa-tachometer-alt:before{content:"\f3fd"}.fa-tag:before{content:"\f02b"}.fa-tags:before{content:"\f02c"}.fa-tape:before{content:"\f4db"}.fa-tasks:before{content:"\f0ae"}.fa-taxi:before{content:"\f1ba"}.fa-teamspeak:before{content:"\f4f9"}.fa-teeth:before{content:"\f62e"}.fa-teeth-open:before{content:"\f62f"}.fa-telegram:before{content:"\f2c6"}.fa-telegram-plane:before{content:"\f3fe"}.fa-temperature-high:before{content:"\f769"}.fa-temperature-low:before{content:"\f76b"}.fa-tencent-weibo:before{content:"\f1d5"}.fa-tenge:before{content:"\f7d7"}.fa-terminal:before{content:"\f120"}.fa-text-height:before{content:"\f034"}.fa-text-width:before{content:"\f035"}.fa-th:before{content:"\f00a"}.fa-th-large:before{content:"\f009"}.fa-th-list:before{content:"\f00b"}.fa-the-red-yeti:before{content:"\f69d"}.fa-theater-masks:before{content:"\f630"}.fa-themeco:before{content:"\f5c6"}.fa-themeisle:before{content:"\f2b2"}.fa-thermometer:before{content:"\f491"}.fa-thermometer-empty:before{content:"\f2cb"}.fa-thermometer-full:before{content:"\f2c7"}.fa-thermometer-half:before{content:"\f2c9"}.fa-thermometer-quarter:before{content:"\f2ca"}.fa-thermometer-three-quarters:before{content:"\f2c8"}.fa-think-peaks:before{content:"\f731"}.fa-thumbs-down:before{content:"\f165"}.fa-thumbs-up:before{content:"\f164"}.fa-thumbtack:before{content:"\f08d"}.fa-ticket-alt:before{content:"\f3ff"}.fa-times:before{content:"\f00d"}.fa-times-circle:before{content:"\f057"}.fa-tint:before{content:"\f043"}.fa-tint-slash:before{content:"\f5c7"}.fa-tired:before{content:"\f5c8"}.fa-toggle-off:before{content:"\f204"}.fa-toggle-on:before{content:"\f205"}.fa-toilet:before{content:"\f7d8"}.fa-toilet-paper:before{content:"\f71e"}.fa-toolbox:before{content:"\f552"}.fa-tools:before{content:"\f7d9"}.fa-tooth:before{content:"\f5c9"}.fa-torah:before{content:"\f6a0"}.fa-torii-gate:before{content:"\f6a1"}.fa-tractor:before{content:"\f722"}.fa-trade-federation:before{content:"\f513"}.fa-trademark:before{content:"\f25c"}.fa-traffic-light:before{content:"\f637"}.fa-train:before{content:"\f238"}.fa-tram:before{content:"\f7da"}.fa-transgender:before{content:"\f224"}.fa-transgender-alt:before{content:"\f225"}.fa-trash:before{content:"\f1f8"}.fa-trash-alt:before{content:"\f2ed"}.fa-tree:before{content:"\f1bb"}.fa-trello:before{content:"\f181"}.fa-tripadvisor:before{content:"\f262"}.fa-trophy:before{content:"\f091"}.fa-truck:before{content:"\f0d1"}.fa-truck-loading:before{content:"\f4de"}.fa-truck-monster:before{content:"\f63b"}.fa-truck-moving:before{content:"\f4df"}.fa-truck-pickup:before{content:"\f63c"}.fa-tshirt:before{content:"\f553"}.fa-tty:before{content:"\f1e4"}.fa-tumblr:before{content:"\f173"}.fa-tumblr-square:before{content:"\f174"}.fa-tv:before{content:"\f26c"}.fa-twitch:before{content:"\f1e8"}.fa-twitter:before{content:"\f099"}.fa-twitter-square:before{content:"\f081"}.fa-typo3:before{content:"\f42b"}.fa-uber:before{content:"\f402"}.fa-ubuntu:before{content:"\f7df"}.fa-uikit:before{content:"\f403"}.fa-umbrella:before{content:"\f0e9"}.fa-umbrella-beach:before{content:"\f5ca"}.fa-underline:before{content:"\f0cd"}.fa-undo:before{content:"\f0e2"}.fa-undo-alt:before{content:"\f2ea"}.fa-uniregistry:before{content:"\f404"}.fa-universal-access:before{content:"\f29a"}.fa-university:before{content:"\f19c"}.fa-unlink:before{content:"\f127"}.fa-unlock:before{content:"\f09c"}.fa-unlock-alt:before{content:"\f13e"}.fa-untappd:before{content:"\f405"}.fa-upload:before{content:"\f093"}.fa-ups:before{content:"\f7e0"}.fa-usb:before{content:"\f287"}.fa-user:before{content:"\f007"}.fa-user-alt:before{content:"\f406"}.fa-user-alt-slash:before{content:"\f4fa"}.fa-user-astronaut:before{content:"\f4fb"}.fa-user-check:before{content:"\f4fc"}.fa-user-circle:before{content:"\f2bd"}.fa-user-clock:before{content:"\f4fd"}.fa-user-cog:before{content:"\f4fe"}.fa-user-edit:before{content:"\f4ff"}.fa-user-friends:before{content:"\f500"}.fa-user-graduate:before{content:"\f501"}.fa-user-injured:before{content:"\f728"}.fa-user-lock:before{content:"\f502"}.fa-user-md:before{content:"\f0f0"}.fa-user-minus:before{content:"\f503"}.fa-user-ninja:before{content:"\f504"}.fa-user-plus:before{content:"\f234"}.fa-user-secret:before{content:"\f21b"}.fa-user-shield:before{content:"\f505"}.fa-user-slash:before{content:"\f506"}.fa-user-tag:before{content:"\f507"}.fa-user-tie:before{content:"\f508"}.fa-user-times:before{content:"\f235"}.fa-users:before{content:"\f0c0"}.fa-users-cog:before{content:"\f509"}.fa-usps:before{content:"\f7e1"}.fa-ussunnah:before{content:"\f407"}.fa-utensil-spoon:before{content:"\f2e5"}.fa-utensils:before{content:"\f2e7"}.fa-vaadin:before{content:"\f408"}.fa-vector-square:before{content:"\f5cb"}.fa-venus:before{content:"\f221"}.fa-venus-double:before{content:"\f226"}.fa-venus-mars:before{content:"\f228"}.fa-viacoin:before{content:"\f237"}.fa-viadeo:before{content:"\f2a9"}.fa-viadeo-square:before{content:"\f2aa"}.fa-vial:before{content:"\f492"}.fa-vials:before{content:"\f493"}.fa-viber:before{content:"\f409"}.fa-video:before{content:"\f03d"}.fa-video-slash:before{content:"\f4e2"}.fa-vihara:before{content:"\f6a7"}.fa-vimeo:before{content:"\f40a"}.fa-vimeo-square:before{content:"\f194"}.fa-vimeo-v:before{content:"\f27d"}.fa-vine:before{content:"\f1ca"}.fa-vk:before{content:"\f189"}.fa-vnv:before{content:"\f40b"}.fa-volleyball-ball:before{content:"\f45f"}.fa-volume-down:before{content:"\f027"}.fa-volume-mute:before{content:"\f6a9"}.fa-volume-off:before{content:"\f026"}.fa-volume-up:before{content:"\f028"}.fa-vote-yea:before{content:"\f772"}.fa-vr-cardboard:before{content:"\f729"}.fa-vuejs:before{content:"\f41f"}.fa-walking:before{content:"\f554"}.fa-wallet:before{content:"\f555"}.fa-warehouse:before{content:"\f494"}.fa-water:before{content:"\f773"}.fa-weebly:before{content:"\f5cc"}.fa-weibo:before{content:"\f18a"}.fa-weight:before{content:"\f496"}.fa-weight-hanging:before{content:"\f5cd"}.fa-weixin:before{content:"\f1d7"}.fa-whatsapp:before{content:"\f232"}.fa-whatsapp-square:before{content:"\f40c"}.fa-wheelchair:before{content:"\f193"}.fa-whmcs:before{content:"\f40d"}.fa-wifi:before{content:"\f1eb"}.fa-wikipedia-w:before{content:"\f266"}.fa-wind:before{content:"\f72e"}.fa-window-close:before{content:"\f410"}.fa-window-maximize:before{content:"\f2d0"}.fa-window-minimize:before{content:"\f2d1"}.fa-window-restore:before{content:"\f2d2"}.fa-windows:before{content:"\f17a"}.fa-wine-bottle:before{content:"\f72f"}.fa-wine-glass:before{content:"\f4e3"}.fa-wine-glass-alt:before{content:"\f5ce"}.fa-wix:before{content:"\f5cf"}.fa-wizards-of-the-coast:before{content:"\f730"}.fa-wolf-pack-battalion:before{content:"\f514"}.fa-won-sign:before{content:"\f159"}.fa-wordpress:before{content:"\f19a"}.fa-wordpress-simple:before{content:"\f411"}.fa-wpbeginner:before{content:"\f297"}.fa-wpexplorer:before{content:"\f2de"}.fa-wpforms:before{content:"\f298"}.fa-wpressr:before{content:"\f3e4"}.fa-wrench:before{content:"\f0ad"}.fa-x-ray:before{content:"\f497"}.fa-xbox:before{content:"\f412"}.fa-xing:before{content:"\f168"}.fa-xing-square:before{content:"\f169"}.fa-y-combinator:before{content:"\f23b"}.fa-yahoo:before{content:"\f19e"}.fa-yandex:before{content:"\f413"}.fa-yandex-international:before{content:"\f414"}.fa-yarn:before{content:"\f7e3"}.fa-yelp:before{content:"\f1e9"}.fa-yen-sign:before{content:"\f157"}.fa-yin-yang:before{content:"\f6ad"}.fa-yoast:before{content:"\f2b1"}.fa-youtube:before{content:"\f167"}.fa-youtube-square:before{content:"\f431"}.fa-zhihu:before{content:"\f63f"}.sr-only{border:0;clip:rect(0,0,0,0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.sr-only-focusable:active,.sr-only-focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}@font-face{font-family:"Font Awesome 5 Brands";font-style:normal;font-weight:normal;src:url(../webfonts/fa-brands-400.eot);src:url(../webfonts/fa-brands-400.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-brands-400.woff2) format("woff2"),url(../webfonts/fa-brands-400.woff) format("woff"),url(../webfonts/fa-brands-400.ttf) format("truetype"),url(../webfonts/fa-brands-400.svg#fontawesome) format("svg")}.fab{font-family:"Font Awesome 5 Brands"}@font-face{font-family:"Font Awesome 5 Free";font-style:normal;font-weight:400;src:url(../webfonts/fa-regular-400.eot);src:url(../webfonts/fa-regular-400.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-regular-400.woff2) format("woff2"),url(../webfonts/fa-regular-400.woff) format("woff"),url(../webfonts/fa-regular-400.ttf) format("truetype"),url(../webfonts/fa-regular-400.svg#fontawesome) format("svg")}.far{font-weight:400}@font-face{font-family:"Font Awesome 5 Free";font-style:normal;font-weight:900;src:url(../webfonts/fa-solid-900.eot);src:url(../webfonts/fa-solid-900.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-solid-900.woff2) format("woff2"),url(../webfonts/fa-solid-900.woff) format("woff"),url(../webfonts/fa-solid-900.ttf) format("truetype"),url(../webfonts/fa-solid-900.svg#fontawesome) format("svg")}.fa,.far,.fas{font-family:"Font Awesome 5 Free"}.fa,.fas{font-weight:900} \ No newline at end of file diff --git a/css/hugo-theme.css b/css/hugo-theme.css new file mode 100644 index 0000000000..9bd8a1e4e7 --- /dev/null +++ b/css/hugo-theme.css @@ -0,0 +1,245 @@ +/* Insert here special css for hugo theme, on top of any other imported css */ + + +/* Table of contents */ + +.progress ul { + list-style: none; + margin: 0; + padding: 0 15px; +} + +#TableOfContents { + font-size: 13px !important; + max-height: 85vh; + overflow: auto; + padding: 15px 5px !important; +} + +#TableOfContents > ul > li > a { + font-weight: bold; +} + +body { + font-size: 16px !important; + color: #323232 !important; +} + +#body a.highlight, #body a.highlight:hover, #body a.highlight:focus { + text-decoration: none; + outline: none; + outline: 0; +} +#body a.highlight { + line-height: 1.1; + display: inline-block; +} +#body a.highlight:after { + display: block; + content: ""; + height: 1px; + width: 0%; + background-color: #0082a7; /*#CE3B2F*/ + -webkit-transition: width 0.5s ease; + -moz-transition: width 0.5s ease; + -ms-transition: width 0.5s ease; + transition: width 0.5s ease; +} +#body a.highlight:hover:after, #body a.highlight:focus:after { + width: 100%; +} +.progress { + position:absolute; + background-color: rgba(246, 246, 246, 0.97); + width: auto; + border: thin solid #ECECEC; + display:none; + z-index:200; +} + +#toc-menu { + border-right: thin solid #DAD8D8 !important; + padding-right: 1rem !important; + margin-right: 0.5rem !important; +} + +#sidebar-toggle-span { + border-right: thin solid #DAD8D8 !important; + padding-right: 0.5rem !important; + margin-right: 1rem !important; +} + +.btn { + display: inline-block !important; + padding: 6px 12px !important; + margin-bottom: 0 !important; + font-size: 14px !important; + font-weight: normal !important; + line-height: 1.42857143 !important; + text-align: center !important; + white-space: nowrap !important; + vertical-align: middle !important; + -ms-touch-action: manipulation !important; + touch-action: manipulation !important; + cursor: pointer !important; + -webkit-user-select: none !important; + -moz-user-select: none !important; + -ms-user-select: none !important; + user-select: none !important; + background-image: none !important; + border: 1px solid transparent !important; + border-radius: 4px !important; + -webkit-transition: all 0.15s !important; + -moz-transition: all 0.15s !important; + transition: all 0.15s !important; +} +.btn:focus { + /*outline: thin dotted; + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px;*/ + outline: none !important; +} +.btn:hover, +.btn:focus { + color: #2b2b2b !important; + text-decoration: none !important; +} + +.btn-default { + color: #333 !important; + background-color: #fff !important; + border-color: #ccc !important; +} +.btn-default:hover, +.btn-default:focus, +.btn-default:active { + color: #fff !important; + background-color: #9e9e9e !important; + border-color: #9e9e9e !important; +} +.btn-default:active { + background-image: none !important; +} + +/* anchors */ +.anchor { + color: #00bdf3; + font-size: 0.5em; + cursor:pointer; + visibility:hidden; + margin-left: 0.5em; + position: absolute; + margin-top:0.1em; +} + +h2:hover .anchor, h3:hover .anchor, h4:hover .anchor, h5:hover .anchor, h6:hover .anchor { + visibility:visible; +} + +/* Redfines headers style */ + +h2, h3, h4, h5, h6 { + font-weight: 400; + line-height: 1.1; +} + +h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { + font-weight: inherit; +} + +h2 { + font-size: 2.5rem; + line-height: 110% !important; + margin: 2.5rem 0 1.5rem 0; +} + +h3 { + font-size: 2rem; + line-height: 110% !important; + margin: 2rem 0 1rem 0; +} + +h4 { + font-size: 1.5rem; + line-height: 110% !important; + margin: 1.5rem 0 0.75rem 0; +} + +h5 { + font-size: 1rem; + line-height: 110% !important; + margin: 1rem 0 0.2rem 0; +} + +h6 { + font-size: 0.5rem; + line-height: 110% !important; + margin: 0.5rem 0 0.2rem 0; +} + +p { + margin: 1rem 0; +} + +figcaption h4 { + font-weight: 300 !important; + opacity: .85; + font-size: 1em; + text-align: center; + margin-top: -1.5em; +} + +.select-style { + border: 0; + width: 22%; + border-radius: 0.6em; + padding: 0px 6px; + overflow: hidden; + display: inline-flex; + background-color: rgba(0, 0, 0, 0.1); + color: white; +} + +.select-style svg { + fill: #ccc; + width: 14px; + height: 14px; + pointer-events: none; + margin: auto; +} + +.select-style svg:hover { + fill: #e6e6e6; +} + +.select-style select { + padding: 0; + width: 130%; + color: white; + border: none; + box-shadow: none; + background: transparent; + background-image: none; + -webkit-appearance: none; + margin: auto; + margin-left: 0.2em; + margin-right: -27px +} + +.select-style select:focus { + outline: none; +} + +.select-style :hover { + cursor: pointer; +} + +@media only all and (max-width: 47.938em) { + #breadcrumbs .links, #top-github-link-text { + display: none; + } +} + +.is-sticky #top-bar { + box-shadow: -1px 2px 5px 1px rgba(0, 0, 0, 0.1); +} \ No newline at end of file diff --git a/css/hybrid.css b/css/hybrid.css new file mode 100644 index 0000000000..29735a1890 --- /dev/null +++ b/css/hybrid.css @@ -0,0 +1,102 @@ +/* + +vim-hybrid theme by w0ng (https://github.com/w0ng/vim-hybrid) + +*/ + +/*background color*/ +.hljs { + display: block; + overflow-x: auto; + padding: 0.5em; + background: #1d1f21; +} + +/*selection color*/ +.hljs::selection, +.hljs span::selection { + background: #373b41; +} + +.hljs::-moz-selection, +.hljs span::-moz-selection { + background: #373b41; +} + +/*foreground color*/ +.hljs { + color: #c5c8c6; +} + +/*color: fg_yellow*/ +.hljs-title, +.hljs-name { + color: #f0c674; +} + +/*color: fg_comment*/ +.hljs-comment, +.hljs-meta, +.hljs-meta .hljs-keyword { + color: #707880; +} + +/*color: fg_red*/ +.hljs-number, +.hljs-symbol, +.hljs-literal, +.hljs-deletion, +.hljs-link { + color: #cc6666 +} + +/*color: fg_green*/ +.hljs-string, +.hljs-doctag, +.hljs-addition, +.hljs-regexp, +.hljs-selector-attr, +.hljs-selector-pseudo { + color: #b5bd68; +} + +/*color: fg_purple*/ +.hljs-attribute, +.hljs-code, +.hljs-selector-id { + color: #b294bb; +} + +/*color: fg_blue*/ +.hljs-keyword, +.hljs-selector-tag, +.hljs-bullet, +.hljs-tag { + color: #81a2be; +} + +/*color: fg_aqua*/ +.hljs-subst, +.hljs-variable, +.hljs-template-tag, +.hljs-template-variable { + color: #8abeb7; +} + +/*color: fg_orange*/ +.hljs-type, +.hljs-built_in, +.hljs-builtin-name, +.hljs-quote, +.hljs-section, +.hljs-selector-class { + color: #de935f; +} + +.hljs-emphasis { + font-style: italic; +} + +.hljs-strong { + font-weight: bold; +} diff --git a/css/nucleus.css b/css/nucleus.css new file mode 100644 index 0000000000..1897fc5d6d --- /dev/null +++ b/css/nucleus.css @@ -0,0 +1,615 @@ +*, *::before, *::after { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; } + +@-webkit-viewport { + width: device-width; } +@-moz-viewport { + width: device-width; } +@-ms-viewport { + width: device-width; } +@-o-viewport { + width: device-width; } +@viewport { + width: device-width; } +html { + font-size: 100%; + -ms-text-size-adjust: 100%; + -webkit-text-size-adjust: 100%; } + +body { + margin: 0; } + +article, +aside, +details, +figcaption, +figure, +footer, +header, +hgroup, +main, +nav, +section, +summary { + display: block; } + +audio, +canvas, +progress, +video { + display: inline-block; + vertical-align: baseline; } + +audio:not([controls]) { + display: none; + height: 0; } + +[hidden], +template { + display: none; } + +a { + background: transparent; + text-decoration: none; } + +a:active, +a:hover { + outline: 0; } + +abbr[title] { + border-bottom: 1px dotted; } + +b, +strong { + font-weight: bold; } + +dfn { + font-style: italic; } + +mark { + background: #FFFF27; + color: #333; } + +sub, +sup { + font-size: 0.8rem; + line-height: 0; + position: relative; + vertical-align: baseline; } + +sup { + top: -0.5em; } + +sub { + bottom: -0.25em; } + +img { + border: 0; + max-width: 100%; } + +svg:not(:root) { + overflow: hidden; } + +figure { + margin: 1em 40px; } + +hr { + height: 0; } + +pre { + overflow: auto; } + +button, +input, +optgroup, +select, +textarea { + color: inherit; + font: inherit; + margin: 0; } + +button { + overflow: visible; } + +button, +select { + text-transform: none; } + +button, +html input[type="button"], +input[type="reset"], +input[type="submit"] { + -webkit-appearance: button; + cursor: pointer; } + +button[disabled], +html input[disabled] { + cursor: default; } + +button::-moz-focus-inner, +input::-moz-focus-inner { + border: 0; + padding: 0; } + +input { + line-height: normal; } + +input[type="checkbox"], +input[type="radio"] { + padding: 0; } + +input[type="number"]::-webkit-inner-spin-button, +input[type="number"]::-webkit-outer-spin-button { + height: auto; } + +input[type="search"] { + -webkit-appearance: textfield; } + +input[type="search"]::-webkit-search-cancel-button, +input[type="search"]::-webkit-search-decoration { + -webkit-appearance: none; } + +legend { + border: 0; + padding: 0; } + +textarea { + overflow: auto; } + +optgroup { + font-weight: bold; } + +table { + border-collapse: collapse; + border-spacing: 0; + table-layout: fixed; + width: 100%; } + +tr, td, th { + vertical-align: middle; } + +th, td { + padding: 0.425rem 0; } + +th { + text-align: left; } + +.container { + width: 75em; + margin: 0 auto; + padding: 0; } + @media only all and (min-width: 60em) and (max-width: 74.938em) { + .container { + width: 60em; } } + @media only all and (min-width: 48em) and (max-width: 59.938em) { + .container { + width: 48em; } } + @media only all and (min-width: 30.063em) and (max-width: 47.938em) { + .container { + width: 30em; } } + @media only all and (max-width: 30em) { + .container { + width: 100%; } } + +.grid { + display: -webkit-box; + display: -moz-box; + display: box; + display: -webkit-flex; + display: -moz-flex; + display: -ms-flexbox; + display: flex; + -webkit-flex-flow: row; + -moz-flex-flow: row; + flex-flow: row; + list-style: none; + margin: 0; + padding: 0; } + @media only all and (max-width: 47.938em) { + .grid { + -webkit-flex-flow: row wrap; + -moz-flex-flow: row wrap; + flex-flow: row wrap; } } + +.block { + -webkit-box-flex: 1; + -moz-box-flex: 1; + box-flex: 1; + -webkit-flex: 1; + -moz-flex: 1; + -ms-flex: 1; + flex: 1; + min-width: 0; + min-height: 0; } + @media only all and (max-width: 47.938em) { + .block { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 100%; + -moz-flex: 0 100%; + -ms-flex: 0 100%; + flex: 0 100%; } } + +.content { + margin: 0.625rem; + padding: 0.938rem; } + +@media only all and (max-width: 47.938em) { + body [class*="size-"] { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 100%; + -moz-flex: 0 100%; + -ms-flex: 0 100%; + flex: 0 100%; } } + +.size-1-2 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 50%; + -moz-flex: 0 50%; + -ms-flex: 0 50%; + flex: 0 50%; } + +.size-1-3 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 33.33333%; + -moz-flex: 0 33.33333%; + -ms-flex: 0 33.33333%; + flex: 0 33.33333%; } + +.size-1-4 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 25%; + -moz-flex: 0 25%; + -ms-flex: 0 25%; + flex: 0 25%; } + +.size-1-5 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 20%; + -moz-flex: 0 20%; + -ms-flex: 0 20%; + flex: 0 20%; } + +.size-1-6 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 16.66667%; + -moz-flex: 0 16.66667%; + -ms-flex: 0 16.66667%; + flex: 0 16.66667%; } + +.size-1-7 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 14.28571%; + -moz-flex: 0 14.28571%; + -ms-flex: 0 14.28571%; + flex: 0 14.28571%; } + +.size-1-8 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 12.5%; + -moz-flex: 0 12.5%; + -ms-flex: 0 12.5%; + flex: 0 12.5%; } + +.size-1-9 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 11.11111%; + -moz-flex: 0 11.11111%; + -ms-flex: 0 11.11111%; + flex: 0 11.11111%; } + +.size-1-10 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 10%; + -moz-flex: 0 10%; + -ms-flex: 0 10%; + flex: 0 10%; } + +.size-1-11 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 9.09091%; + -moz-flex: 0 9.09091%; + -ms-flex: 0 9.09091%; + flex: 0 9.09091%; } + +.size-1-12 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 8.33333%; + -moz-flex: 0 8.33333%; + -ms-flex: 0 8.33333%; + flex: 0 8.33333%; } + +@media only all and (min-width: 48em) and (max-width: 59.938em) { + .size-tablet-1-2 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 50%; + -moz-flex: 0 50%; + -ms-flex: 0 50%; + flex: 0 50%; } + + .size-tablet-1-3 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 33.33333%; + -moz-flex: 0 33.33333%; + -ms-flex: 0 33.33333%; + flex: 0 33.33333%; } + + .size-tablet-1-4 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 25%; + -moz-flex: 0 25%; + -ms-flex: 0 25%; + flex: 0 25%; } + + .size-tablet-1-5 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 20%; + -moz-flex: 0 20%; + -ms-flex: 0 20%; + flex: 0 20%; } + + .size-tablet-1-6 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 16.66667%; + -moz-flex: 0 16.66667%; + -ms-flex: 0 16.66667%; + flex: 0 16.66667%; } + + .size-tablet-1-7 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 14.28571%; + -moz-flex: 0 14.28571%; + -ms-flex: 0 14.28571%; + flex: 0 14.28571%; } + + .size-tablet-1-8 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 12.5%; + -moz-flex: 0 12.5%; + -ms-flex: 0 12.5%; + flex: 0 12.5%; } + + .size-tablet-1-9 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 11.11111%; + -moz-flex: 0 11.11111%; + -ms-flex: 0 11.11111%; + flex: 0 11.11111%; } + + .size-tablet-1-10 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 10%; + -moz-flex: 0 10%; + -ms-flex: 0 10%; + flex: 0 10%; } + + .size-tablet-1-11 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 9.09091%; + -moz-flex: 0 9.09091%; + -ms-flex: 0 9.09091%; + flex: 0 9.09091%; } + + .size-tablet-1-12 { + -webkit-box-flex: 0; + -moz-box-flex: 0; + box-flex: 0; + -webkit-flex: 0 8.33333%; + -moz-flex: 0 8.33333%; + -ms-flex: 0 8.33333%; + flex: 0 8.33333%; } } +@media only all and (max-width: 47.938em) { + @supports not (flex-wrap: wrap) { + .grid { + display: block; + -webkit-box-lines: inherit; + -moz-box-lines: inherit; + box-lines: inherit; + -webkit-flex-wrap: inherit; + -moz-flex-wrap: inherit; + -ms-flex-wrap: inherit; + flex-wrap: inherit; } + + .block { + display: block; + -webkit-box-flex: inherit; + -moz-box-flex: inherit; + box-flex: inherit; + -webkit-flex: inherit; + -moz-flex: inherit; + -ms-flex: inherit; + flex: inherit; } } } +.first-block { + -webkit-box-ordinal-group: 0; + -webkit-order: -1; + -ms-flex-order: -1; + order: -1; } + +.last-block { + -webkit-box-ordinal-group: 2; + -webkit-order: 1; + -ms-flex-order: 1; + order: 1; } + +.fixed-blocks { + -webkit-flex-flow: row wrap; + -moz-flex-flow: row wrap; + flex-flow: row wrap; } + .fixed-blocks .block { + -webkit-box-flex: inherit; + -moz-box-flex: inherit; + box-flex: inherit; + -webkit-flex: inherit; + -moz-flex: inherit; + -ms-flex: inherit; + flex: inherit; + width: 25%; } + @media only all and (min-width: 60em) and (max-width: 74.938em) { + .fixed-blocks .block { + width: 33.33333%; } } + @media only all and (min-width: 48em) and (max-width: 59.938em) { + .fixed-blocks .block { + width: 50%; } } + @media only all and (max-width: 47.938em) { + .fixed-blocks .block { + width: 100%; } } + +body { + font-size: 1.05rem; + line-height: 1.7; } + +h1, h2, h3, h4, h5, h6 { + margin: 0.85rem 0 1.7rem 0; + text-rendering: optimizeLegibility; } + +h1 { + font-size: 3.25rem; } + +h2 { + font-size: 2.55rem; } + +h3 { + font-size: 2.15rem; } + +h4 { + font-size: 1.8rem; } + +h5 { + font-size: 1.4rem; } + +h6 { + font-size: 0.9rem; } + +p { + margin: 1.7rem 0; } + +ul, ol { + margin-top: 1.7rem; + margin-bottom: 1.7rem; } + ul ul, ul ol, ol ul, ol ol { + margin-top: 0; + margin-bottom: 0; } + +blockquote { + margin: 1.7rem 0; + padding-left: 0.85rem; } + +cite { + display: block; + font-size: 0.925rem; } + cite:before { + content: "\2014 \0020"; } + +pre { + margin: 1.7rem 0; + padding: 0.938rem; } + +code { + vertical-align: bottom; } + +small { + font-size: 0.925rem; } + +hr { + border-left: none; + border-right: none; + border-top: none; + margin: 1.7rem 0; } + +fieldset { + border: 0; + padding: 0.938rem; + margin: 0 0 1.7rem 0; } + +input, +label, +select { + display: block; } + +label { + margin-bottom: 0.425rem; } + label.required:after { + content: "*"; } + label abbr { + display: none; } + +textarea, input[type="email"], input[type="number"], input[type="password"], input[type="search"], input[type="tel"], input[type="text"], input[type="url"], input[type="color"], input[type="date"], input[type="datetime"], input[type="datetime-local"], input[type="month"], input[type="time"], input[type="week"], select[multiple=multiple] { + -webkit-transition: border-color; + -moz-transition: border-color; + transition: border-color; + border-radius: 0.1875rem; + margin-bottom: 0.85rem; + padding: 0.425rem 0.425rem; + width: 100%; } + textarea:focus, input[type="email"]:focus, input[type="number"]:focus, input[type="password"]:focus, input[type="search"]:focus, input[type="tel"]:focus, input[type="text"]:focus, input[type="url"]:focus, input[type="color"]:focus, input[type="date"]:focus, input[type="datetime"]:focus, input[type="datetime-local"]:focus, input[type="month"]:focus, input[type="time"]:focus, input[type="week"]:focus, select[multiple=multiple]:focus { + outline: none; } + +textarea { + resize: vertical; } + +input[type="checkbox"], input[type="radio"] { + display: inline; + margin-right: 0.425rem; } + +input[type="file"] { + width: 100%; } + +select { + width: auto; + max-width: 100%; + margin-bottom: 1.7rem; } + +button, +input[type="submit"] { + cursor: pointer; + user-select: none; + vertical-align: middle; + white-space: nowrap; + border: inherit; } diff --git a/css/perfect-scrollbar.min.css b/css/perfect-scrollbar.min.css new file mode 100644 index 0000000000..ebd2cb43bc --- /dev/null +++ b/css/perfect-scrollbar.min.css @@ -0,0 +1,2 @@ +/* perfect-scrollbar v0.6.13 */ +.ps-container{-ms-touch-action:auto;touch-action:auto;overflow:hidden !important;-ms-overflow-style:none}@supports (-ms-overflow-style: none){.ps-container{overflow:auto !important}}@media screen and (-ms-high-contrast: active), (-ms-high-contrast: none){.ps-container{overflow:auto !important}}.ps-container.ps-active-x>.ps-scrollbar-x-rail,.ps-container.ps-active-y>.ps-scrollbar-y-rail{display:block;background-color:transparent}.ps-container.ps-in-scrolling.ps-x>.ps-scrollbar-x-rail{background-color:#eee;opacity:.9}.ps-container.ps-in-scrolling.ps-x>.ps-scrollbar-x-rail>.ps-scrollbar-x{background-color:#999;height:11px}.ps-container.ps-in-scrolling.ps-y>.ps-scrollbar-y-rail{background-color:#eee;opacity:.9}.ps-container.ps-in-scrolling.ps-y>.ps-scrollbar-y-rail>.ps-scrollbar-y{background-color:#999;width:11px}.ps-container>.ps-scrollbar-x-rail{display:none;position:absolute;opacity:0;-webkit-transition:background-color .2s linear, opacity .2s linear;-o-transition:background-color .2s linear, opacity .2s linear;-moz-transition:background-color .2s linear, opacity .2s linear;transition:background-color .2s linear, opacity .2s linear;bottom:0px;height:15px}.ps-container>.ps-scrollbar-x-rail>.ps-scrollbar-x{position:absolute;background-color:#aaa;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out;-o-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out;-moz-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out;bottom:2px;height:6px}.ps-container>.ps-scrollbar-x-rail:hover>.ps-scrollbar-x,.ps-container>.ps-scrollbar-x-rail:active>.ps-scrollbar-x{height:11px}.ps-container>.ps-scrollbar-y-rail{display:none;position:absolute;opacity:0;-webkit-transition:background-color .2s linear, opacity .2s linear;-o-transition:background-color .2s linear, opacity .2s linear;-moz-transition:background-color .2s linear, opacity .2s linear;transition:background-color .2s linear, opacity .2s linear;right:0;width:15px}.ps-container>.ps-scrollbar-y-rail>.ps-scrollbar-y{position:absolute;background-color:#aaa;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out;-o-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out;-moz-transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out;transition:background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out;right:2px;width:6px}.ps-container>.ps-scrollbar-y-rail:hover>.ps-scrollbar-y,.ps-container>.ps-scrollbar-y-rail:active>.ps-scrollbar-y{width:11px}.ps-container:hover.ps-in-scrolling.ps-x>.ps-scrollbar-x-rail{background-color:#eee;opacity:.9}.ps-container:hover.ps-in-scrolling.ps-x>.ps-scrollbar-x-rail>.ps-scrollbar-x{background-color:#999;height:11px}.ps-container:hover.ps-in-scrolling.ps-y>.ps-scrollbar-y-rail{background-color:#eee;opacity:.9}.ps-container:hover.ps-in-scrolling.ps-y>.ps-scrollbar-y-rail>.ps-scrollbar-y{background-color:#999;width:11px}.ps-container:hover>.ps-scrollbar-x-rail,.ps-container:hover>.ps-scrollbar-y-rail{opacity:.6}.ps-container:hover>.ps-scrollbar-x-rail:hover{background-color:#eee;opacity:.9}.ps-container:hover>.ps-scrollbar-x-rail:hover>.ps-scrollbar-x{background-color:#999}.ps-container:hover>.ps-scrollbar-y-rail:hover{background-color:#eee;opacity:.9}.ps-container:hover>.ps-scrollbar-y-rail:hover>.ps-scrollbar-y{background-color:#999} diff --git a/css/tabs.css b/css/tabs.css new file mode 100644 index 0000000000..2ad2728772 --- /dev/null +++ b/css/tabs.css @@ -0,0 +1,43 @@ +#body .tab-nav-button { + border-width: 1px 1px 1px 1px !important; + border-color: #ccc !important; + border-radius: 4px 4px 0 0 !important; + background-color: #ddd !important; + float: left; + display: block; + position: relative; + margin-left: 4px; + bottom: -1px; +} +#body .tab-nav-button:first-child { + margin-left: 0px; +} +#body .tab-nav-button.active { + background-color: #fff !important; + border-bottom-color: #fff !important; +} + +#body .tab-panel { + margin-top: 32px; + margin-bottom: 32px; +} +#body .tab-content { + display: block; + clear: both; + padding: 8px; + border-width: 1px; + border-style: solid; + border-color: #ccc; +} +#body .tab-content .tab-item{ + display: none; +} + +#body .tab-content .tab-item.active{ + display: block; +} + +#body .tab-item pre{ + margin-bottom: 0; + margin-top: 0; +} diff --git a/css/tags.css b/css/tags.css new file mode 100644 index 0000000000..495d2f9f71 --- /dev/null +++ b/css/tags.css @@ -0,0 +1,49 @@ +/* Tags */ + +#head-tags{ + margin-left:1em; + margin-top:1em; +} + +#body .tags a.tag-link { + display: inline-block; + line-height: 2em; + font-size: 0.8em; + position: relative; + margin: 0 16px 8px 0; + padding: 0 10px 0 12px; + background: #8451a1; + + -webkit-border-bottom-right-radius: 3px; + border-bottom-right-radius: 3px; + -webkit-border-top-right-radius: 3px; + border-top-right-radius: 3px; + + -webkit-box-shadow: 0 1px 2px rgba(0,0,0,0.2); + box-shadow: 0 1px 2px rgba(0,0,0,0.2); + color: #fff; +} + +#body .tags a.tag-link:before { + content: ""; + position: absolute; + top:0; + left: -1em; + width: 0; + height: 0; + border-color: transparent #8451a1 transparent transparent; + border-style: solid; + border-width: 1em 1em 1em 0; +} + +#body .tags a.tag-link:after { + content: ""; + position: absolute; + top: 10px; + left: 1px; + width: 5px; + height: 5px; + -webkit-border-radius: 50%; + border-radius: 100%; + background: #fff; +} diff --git a/css/theme-blue.css b/css/theme-blue.css new file mode 100644 index 0000000000..91369947ca --- /dev/null +++ b/css/theme-blue.css @@ -0,0 +1,133 @@ + +:root{ + + --MAIN-TEXT-color:#323232; /* Color of text by default */ + --MAIN-TITLES-TEXT-color: #5e5e5e; /* Color of titles h2-h3-h4-h5 */ + --MAIN-LINK-color:#1C90F3; /* Color of links */ + --MAIN-LINK-HOVER-color:#167ad0; /* Color of hovered links */ + --MAIN-ANCHOR-color: #1C90F3; /* color of anchors on titles */ + + --MENU-HOME-LINK-color: #323232; /* Color of the home button text */ + --MENU-HOME-LINK-HOVER-color: #5e5e5e; /* Color of the hovered home button text */ + + --MENU-HEADER-BG-color:#1C90F3; /* Background color of menu header */ + --MENU-HEADER-BORDER-color:#33a1ff; /*Color of menu header border */ + + --MENU-SEARCH-BG-color:#167ad0; /* Search field background color (by default borders + icons) */ + --MENU-SEARCH-BOX-color: #33a1ff; /* Override search field border color */ + --MENU-SEARCH-BOX-ICONS-color: #a1d2fd; /* Override search field icons color */ + + --MENU-SECTIONS-ACTIVE-BG-color:#20272b; /* Background color of the active section and its childs */ + --MENU-SECTIONS-BG-color:#252c31; /* Background color of other sections */ + --MENU-SECTIONS-LINK-color: #ccc; /* Color of links in menu */ + --MENU-SECTIONS-LINK-HOVER-color: #e6e6e6; /* Color of links in menu, when hovered */ + --MENU-SECTION-ACTIVE-CATEGORY-color: #777; /* Color of active category text */ + --MENU-SECTION-ACTIVE-CATEGORY-BG-color: #fff; /* Color of background for the active category (only) */ + + --MENU-VISITED-color: #33a1ff; /* Color of 'page visited' icons in menu */ + --MENU-SECTION-HR-color: #20272b; /* Color of
The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.
+Grafana can be accessed via web browser at the following URL:
+https://sma-grafana.<system_name>.<system_domain>
For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the +SMA product documentation.
+For more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry +Kafka Topics in the SMA product documentation.
+There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.
+Dashboard Name | +Display Type | +
---|---|
Fabric Congestion | +Chart Panels | +
Fabric RFC3635 | +Chart Panels | +
Fabric Errors | +Tabular Format | +
Fabric Port State | +Tabular Format | +
The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.
+Shows the Interval and Locations Options for the available telemetry.
+ +The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.
+For additional information, refer to Grafana Templates and Variables.
+The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.
+The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.
+SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.
+This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.
+The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.
+The port type of a link is reported as a port state “subtype” event when defined at port initialization.
+This dashboard reports error counters in a tabular format in three panels.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.
+The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.
+For additional information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.
+Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.
+ + + + + +Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.
+Kibana can be accessed via web browser at the following URL:
+https://sma-kibana.<system_name>.<system_domain>
For additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product +documentation.
+Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this +table.
+Dashboard | +Short Description | +Long Description | +Kibana Visualization and Search Name | +
---|---|---|---|
sat-aer | +AER corrected | +Corrected Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-corrected Search: sat-aer-corrected | +
sat-aer | +AER fatal | +Fatal Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-fatal Search: sat-aer-fatal | +
sat-atom | +ATOM failures | +Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. | +sat-atom-failed | +
sat-atom | +ATOM admindown | +Application Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch. | +sat-atom-admindown | +
sat-heartbeat | +Heartbeat loss events | +Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. | +sat-heartbeat | +
sat-kernel | +Kernel assertions | +The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. | +sat-kassertions | +
sat-kernel | +Kernel panics | +The kernel panics when something is seriously wrong. The node goes down. | +sat-kernel-panic | +
sat-kernel | +Lustre bugs (LBUGs) | +The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. | +sat-lbug | +
sat-kernel | +CPU stalls | +CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. | +sat-cpu-stall | +
sat-kernel | +Out of memory | +An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. | +sat-oom | +
sat-mce | +MCE | +Machine Check Exceptions (MCE) are errors detected at the processor level. | +sat-mce | +
sat-rasdaemon | +rasdaemon errors | +Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. |
+sat-rasdaemon-error | +
sat-rasdaemon | +rasdaemon messages | +All messages from the rasdaemon service on nodes. |
+sat-rasdaemon | +
By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.
+The Kibana Dashboard should be open on your system.
+Navigate to Management
+Navigate to Advanced Settings in the Kibana section, below the Elastic search section
+Scroll down to the Discover section
+Change Highlight results from on to off
+Click Save to save changes
+The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.
+Go to the dashboard section.
+Select sat-aer dashboard.
+Choose the time range of interest.
+View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.
+The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health +checks and application test failures. Some test failures are of possible interest even though a node is not marked +admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide +clues if a node otherwise fails. They might also show application problems.
+HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.
+Go to the dashboard section.
+Select sat-atom dashboard.
+Choose the time range of interest.
+View any nodes marked admindown and any ATOM test failures. These failures occur during health checks and +application test failures. Test failures marked admindown are important to note. View the matching log messages +in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, +results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.
+The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods +are responsible for monitoring nodes in the system for heartbeat loss.
+Go to the dashboard section.
+Select sat-heartbeat dashboard.
+Choose the time range of interest.
+View the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for +monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.
+The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.
+Go to the dashboard section.
+Select sat-kernel dashboard.
+Choose the time range of interest.
+View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.
+The MCE Dashboard displays CPU detected processor-level hardware errors.
+Go to the dashboard section.
+Select sat-mce dashboard.
+Choose the time range of interest.
+View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.
+The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon
+service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including
+PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages
+presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one
+for only messages of severity “emerg” or “err” and another for all messages from rasdaemon
.
Go to the dashboard section.
+Select sat-rasdaemon dashboard.
+Choose the time range of interest.
+View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.
+Describes how to install the System Admin Toolkit (SAT) product stream.
+...
) in shell output indicate omitted lines.2.1.x
with the version of the SAT product stream
+being installed.Start a typescript.
+The typescript will record the commands and the output from this installation.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
Copy the release distribution gzipped tar file to ncn-m001
.
Unzip and extract the release distribution, 2.1.x
.
ncn-m001# tar -xvzf sat-2.1.x.tar.gz
+
Change directory to the extracted release distribution directory.
+ncn-m001# cd sat-2.1.x
+
Run the installer: install.sh.
+The script produces a lot of output. The last several lines are included +below for reference.
+ncn-m001# ./install.sh
+...
+ConfigMap data updates exist; Exiting.
++ clean-install-deps
++ for image in "${vendor_images[@]}"
++ podman rmi -f docker.io/library/cray-nexus-setup:sat-2.1.x-20210804163905-8dbb87d
+Untagged: docker.io/library/cray-nexus-setup:sat-2.1.x-20210804163905-8dbb87d
+Deleted: 2c196c0c6364d9a1699d83dc98550880dc491cc3433a015d35f6cab1987dd6da
++ for image in "${vendor_images[@]}"
++ podman rmi -f docker.io/library/skopeo:sat-2.1.x-20210804163905-8dbb87d
+Untagged: docker.io/library/skopeo:sat-2.1.x-20210804163905-8dbb87d
+Deleted: 1b38b7600f146503e246e753cd9df801e18409a176b3dbb07b0564e6bc27144c
+
Check the return code of the installer. Zero indicates a successful installation.
+ncn-m001# echo $?
+0
+
Check the progress of the SAT configuration import Kubernetes job, which is
+initiated by install.sh
.
If the “Pods Statuses” appear as “Succeeded”, the job has completed +successfully. The job usually takes between 30 seconds and 2 minutes.
+ncn-m001# kubectl describe job sat-config-import-2.1.x -n services
+...
+Pods Statuses: 0 Running / 1 Succeeded / 0 Failed
+...
+
The job’s progress may be monitored using kubectl logs
. The example below includes
+the final log lines from a successful configuration import Kubernetes job.
ncn-m001# kubectl logs -f -n services --selector \
+ job-name=sat-config-import-2.1.x --all-containers
+...
+ConfigMap update attempt=1
+Resting 1s before reading ConfigMap
+ConfigMap data updates exist; Exiting.
+2021-08-04T21:50:10.275886Z info Agent has successfully terminated
+2021-08-04T21:50:10.276118Z warning envoy main caught SIGTERM
+# Completed on Wed Aug 4 21:49:44 2021
+
The following error may appear in this log, but it can be ignored.
+error accept tcp [::]:15020: use of closed network connection
+
Optional: Remove the SAT release distribution tar file and extracted directory.
+ncn-m001# rm sat-2.2.x.tar.gz
+ncn-m001# rm -rf sat-2.2.x/
+
Upgrade only: Ensure that the environment variable SAT_TAG
is not set
+in the ~/.bashrc
file on any of the management NCNs.
NOTE: This step should only be required when updating from +Shasta 1.4.1 or Shasta 1.4.2.
+The following example assumes three manager NCNs: ncn-m001
, ncn-m002
, and ncn-m003
,
+and shows output from a system in which no further action is needed.
ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
+ncn-m001: source <(kubectl completion bash)
+ncn-m003: source <(kubectl completion bash)
+ncn-m002: source <(kubectl completion bash)
+
The following example shows that SAT_TAG
is set in ~/.bashrc
on ncn-m002
.
+Remove that line from the ~/.bashrc
file on ncn-m002
.
ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
+ncn-m001: source <(kubectl completion bash)
+ncn-m002: source <(kubectl completion bash)
+ncn-m002: export SAT_TAG=3.5.0
+ncn-m003: source <(kubectl completion bash)
+
Stop the typescript.
+NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT install. See Next Steps.
+ncn-m001# exit
+
SAT version 2.1.x
is now installed/upgraded, meaning the SAT 2.1.x
release
+has been loaded into the system software repository.
sat
command won’t be available until the NCN Personalization
+procedure has been executed.If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.
+If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the sections listed below.
+NOTE: The NCN Personalization procedure is required when +upgrading SAT. The setup procedures in SAT Setup, however, are +not required when upgrading SAT. They should have been executed +during the first installation of SAT.
+Execute the NCN Personalization procedure:
+ +If performing a fresh install, execute the SAT Setup procedures:
+ +If performing an upgrade, execute the upgrade procedures:
+ +Describes how to perform NCN personalization using CFS. This personalization process +will configure the System Admin Toolkit (SAT) product stream.
+...
) in shell output indicate omitted lines.2.1.x
with the version of the SAT product stream
+being installed.Start a typescript if not already using one.
+The typescript will capture the commands and the output from this installation procedure.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
Get the git commit ID for the branch with a version number matching the version of SAT.
+This represents a revision of Ansible configuration content stored in VCS.
+Get and store the VCS password (required to access the remote VCS repo).
+ncn-m001# VCS_PASS=$(kubectl get secret -n services vcs-user-credentials \
+ --template={{.data.vcs_password}} | base64 --decode)
+
In this example, the git commit ID is 82537e59c24dd5607d5f5d6f92cdff971bd9c615
,
+and the version number is 2.1.x
.
ncn-m001# git ls-remote \
+ https://crayvcs:$VCS_PASS@api-gw-service-nmn.local/vcs/cray/sat-config-management.git \
+ refs/heads/cray/sat/*
+...
+82537e59c24dd5607d5f5d6f92cdff971bd9c615 refs/heads/cray/sat/2.1.x
+
Add a sat
layer to the CFS configuration(s) associated with the manager NCNs.
Get the name(s) of the CFS configuration(s).
+NOTE: Each manager NCN uses a single CFS configuration. An individual CFS configuration +may be used by any number of manage NCNs, i.e., three manager NCNs might use one, +two, or three CFS configurations.
+In the following example, all three manager NCNs use the same CFS configuration – ncn-personalization
.
ncn-m001:~ # for component in $(cray hsm state components list \
+ --role Management --subrole Master --format json | jq -r \
+ '.Components | .[].ID'); do cray cfs components describe $component \
+ --format json | jq -r '.desiredConfig'; done
+ncn-personalization
+ncn-personalization
+ncn-personalization
+
In the following example, the three manager NCNs all use different configurations, +each with a unique name.
+ncn-personalization-m001
+ncn-personalization-m002
+ncn-personalization-m003
+
Execute the following sub-steps (3.2 through 3.5) once for each unique CFS +configuration name.
+NOTE: Examples in the following sub-steps assume that all manager NCNs use the
+CFS configuration ncn-personalization
.
Get the current configuration layers for each CFS configuration, and save the +data to a local JSON file.
+The JSON file created in this sub-step will serve as a template for updating +an existing CFS configuration, or creating a new one.
+ncn-m001# cray cfs configurations describe ncn-personalization --format \
+ json | jq '{ layers }' > ncn-personalization.json
+
If the configuration does not exist yet, you may see the following error.
+In this case, create a new JSON file for that CFS configuration, e.g., ncn-personalization.json
.
Error: Configuration could not found.: Configuration ncn-personalization could not be found
+
NOTE: For more on CFS configuration management, refer to “Manage a Configuration +with CFS” in the CSM product documentation.
+Append a sat
layer to the end of the JSON file’s list of layers.
If the file already contains a sat
layer entry, update it.
If the configuration data could not be found in the previous sub-step, the JSON file
+will be empty. In this case, copy the ncn-personalization.json
example below,
+paste it into the JSON file, delete the ellipsis, and make appropriate changes to
+the sat
layer entry.
Use the git commit ID from step 8, e.g. 82537e59c24dd5607d5f5d6f92cdff971bd9c615
.
NOTE: The name
value in the example below may be changed, but the installation
+procedure uses the example value, sat-ncn
. If an alternate value is used, some
+of the following examples must be updated accordingly before they are executed.
ncn-m001# vim ncn-personalization.json
+...
+ncn-m001# cat ncn-personalization.json
+{
+ "layers": [
+ ...
+ {
+ "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
+ "commit": "82537e59c24dd5607d5f5d6f92cdff971bd9c615",
+ "name": "sat-ncn",
+ "playbook": "sat-ncn.yml"
+ }
+ ]
+}
+
Update the existing CFS configuration, or create a new one.
+The command should output a JSON-formatted representation of the CFS configuration,
+which will look like the JSON file, but with lastUpdated
and name
fields.
ncn-m001# cray cfs configurations update ncn-personalization --file \
+ ncn-personalization.json --format json
+{
+ "lastUpdated": "2021-08-05T16:38:53Z",
+ "layers": {
+ ...
+ },
+ "name": "ncn-personalization"
+}
+
Optional: Delete the JSON file.
+NOTE: There is no reason to keep the file. If you keep it, verify that +it is up-to-date with the actual CFS configuration before using it again.
+ncn-m001# rm ncn-personalization.json
+
Invoke the CFS configurations that you created or updated in the previous step.
+This step will create a CFS session based on the given configuration and install +SAT on the associated manager NCNs.
+The --configuration-limit
option causes only the sat-ncn
layer of the configuration,
+ncn-personalization
, to run.
CAUTION: In this example, the session --name
is sat-session
. That value
+is only an example. Declare a unique name for each configuration session.
You should see a representation of the CFS session in the output.
+ncn-m001# cray cfs sessions create --name sat-session --configuration-name \
+ ncn-personalization --configuration-limit sat-ncn
+name="sat-session"
+
+[ansible]
+...
+
Execute this step once for each unique CFS configuration that you created or +updated in the previous step.
+Monitor the progress of each CFS session.
+First, list all containers associated with the CFS session:
+ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session \
+ -o json | jq '.items[0].spec.containers[] | .name'
+"inventory"
+"ansible-1"
+"istio-proxy"
+
Next, get the logs for the ansible-1
container.
NOTE: the trailing digit might differ from “1”. It is the zero-based
+index of the sat-ncn
layer within the configuration’s layers.
ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
+ --selector=cfsession=sat-session
+
Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:
+...
+PLAY RECAP *********************************************************************
+x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+
Execute this step for each unique CFS configuration.
+NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.
+Verify that SAT was successfully configured.
+If sat
is configured, the --version
command will indicate which version
+is installed. If sat
is not properly configured, the command will fail.
NOTE: This version number will differ from the version number of the SAT
+release distribution. This is the semantic version of the sat
Python package,
+which is different from the version number of the overall SAT release distribution.
ncn-m001# sat --version
+sat 3.7.0
+
NOTE: Upon first running sat
, you may see additional output while the sat
+container image is downloaded. This will occur the first time sat
is run on
+each manager NCN. For example, if you run sat
for the first time on ncn-m001
+and then for the first time on ncn-m002
, you will see this additional output
+both times.
Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
+Getting image source signatures
+Copying blob da64e8df3afc done
+Copying blob 0f36fd81d583 done
+Copying blob 12527cf455ba done
+...
+sat 3.7.0
+
Stop the typescript.
+ncn-m001# exit
+
SAT version 2.1.x
is now configured:
If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.
+If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.
+If performing a fresh install, execute the SAT Setup procedures:
+ +If performing an upgrade, execute the SAT Post-Upgrade procedures:
+ +Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in
+later steps of the install process. The admin account used to authenticate with sat auth
must be enabled in
+Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see
+Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation.
+For additional information on SAT authentication, see System Security and Authentication in the CSM
+documentation.
NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to
+the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat
commands require S3 to
+be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket,
+the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be
+done on every Kubernetes manager node where SAT commands are run.
Below is a table describing SAT commands and the types of authentication they require.
+SAT Subcommand | +Authentication/Credentials Required | +Man Page | +Description | +
---|---|---|---|
sat auth |
+Responsible for authenticating to the API gateway and storing a token. | +sat-auth |
+Authenticate to the API gateway and save the token. | +
sat bootsys |
+Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. |
+sat-bootsys |
+Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. | +
sat diag |
+Requires authentication to the API gateway. | +sat-diag |
+Launch diagnostics on the HSN switches and generate a report. | +
sat firmware |
+Requires authentication to the API gateway. | +sat-firmware |
+Report firmware version. | +
sat hwinv |
+Requires authentication to the API gateway. | +sat-hwinv |
+Give a listing of the hardware of the HPE Cray EX system. | +
sat hwmatch |
+Requires authentication to the API gateway. | +sat-hwmatch |
+Report hardware mismatches. | +
sat init |
+None | +sat-init |
+Create a default SAT configuration file. | +
sat k8s |
+Requires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install. | +sat-k8s |
+Report on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node). | +
sat linkhealth |
++ | + | This command has been deprecated. | +
sat nid2xname |
+Requires authentication to the API gateway. | +sat-nid2xname |
+Translate node IDs to node xnames. | +
sat sensors |
+Requires authentication to the API gateway. | +sat-sensors |
+Report current sensor data. | +
sat setrev |
+Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-setrev |
+Set HPE Cray EX system revision information. | +
sat showrev |
+Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-showrev |
+Print revision information for the HPE Cray EX system. | +
sat status |
+Requires authentication to the API gateway. | +sat-status |
+Report node status across the HPE Cray EX system. | +
sat swap |
+Requires authentication to the API gateway. | +sat-swap |
+Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. | +
sat xname2nid |
+Requires authentication to the API gateway. | +sat-xname2nid |
+Translate node and node BMC xnames to node IDs. | +
sat switch |
+This command has been deprecated. It has been replaced by sat swap . |
++ | + |
In order to authenticate to the API gateway, you must run the sat auth
command. This command will prompt for a password
+on the command line. The username value is obtained from the following locations, in order of higher precedence to lower
+precedence:
--username
global command-line option.username
option in the api_gateway
section of the config file at ~/.config/sat/sat.toml
.sat
command.If credentials are entered correctly when prompted by sat auth
, a token file will be obtained and saved to
+~/.config/sat/tokens
. Subsequent sat commands will determine the username the same way as sat auth
described above,
+and will use the token for that username if it has been obtained and saved by sat auth
.
sat
CLI has been installed following Install The System Admin Toolkit Product Stream.The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:
+Generate a default SAT configuration file, if one does not exist.
+ncn-m001# sat init
+Configuration file "/root/.config/sat/sat.toml" generated.
+
Note: If the config file already exists, it will print out an error:
+ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
+Not generating configuration file.
+
Edit ~/.config/sat/sat.toml
and set the username option in the api_gateway
section of the config file. E.g.:
username = "crayadmin"
+
Run sat auth
. Enter your password when prompted. E.g.:
ncn-m001# sat auth
+Password for crayadmin:
+Succeeded!
+
Other sat
commands are now authenticated to make requests to the API gateway. E.g.:
ncn-m001# sat status
+
Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT +S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. +This must be done on every Kubernetes master node where SAT commands are run.
+SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev
+(see: Run Sat Setrev to Set System Information).
NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+sat
CLI has been installed following Install The System Admin Toolkit Product Stream.sat
configuration file has been created (See SAT Authentication).Ensure the files are readable only by root
.
ncn-m001# touch /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
Write the credentials to local files using kubectl
.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.access_key}' | base64 -d > \
+ /root/.config/sat/s3_access_key
+
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.secret_key}' | base64 -d > \
+ /root/.config/sat/s3_secret_key
+
Verify the S3 endpoint specified in the SAT configuration file is correct.
+Get the SAT configuration file’s endpoint valie.
+NOTE: If the command’s output is commented out, indicated by an initial #
+character, the SAT configuration will take the default value – "https://rgw-vip.nmn"
.
ncn-m001# grep endpoint ~/.config/sat/sat.toml
+# endpoint = "https://rgw-vip.nmn"
+
Get the sat-s3-credentials
secret’s endpoint value.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
+https://rgw-vip.nmn
+
Compare the two endpoint values.
+If the values differ, modify the SAT configuration file’s endpoint value to match the secret’s.
+Copy SAT configurations to every manager node on the system.
+ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
+ mkdir -p /root/.config/sat; \
+ scp -pr /root/.config/sat ${i}:/root/.config; done
+
NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may +be different. This example assumes three manager nodes, where the configuration files must be +copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 +and ncn-m003.
+NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Run sat setrev
to set System Revision Information. Follow the on-screen prompts.
ncn-m001# sat setrev
+--------------------------------------------------------------------------------
+Setting: Serial number
+Purpose: System identification. This will affect how snapshots are
+ identified in the HPE backend services.
+Description: This is the top-level serial number which uniquely identifies
+ the system. It can be requested from an HPE representative.
+Valid values: Alpha-numeric string, 4 - 20 characters.
+Type: <class 'str'>
+Default: None
+Current value: None
+--------------------------------------------------------------------------------
+Please do one of the following to set the value of the above setting:
+ - Input a new value
+ - Press CTRL-C to exit
+...
+
Run sat showrev
to verify System Revision Information. The following tables contain example information.
ncn-m001# sat showrev
+################################################################################
+System Revision Information
+################################################################################
++---------------------+---------------+
+| component | data |
++---------------------+---------------+
+| Company name | HPE |
+| Country code | US |
+| Interconnect | Sling |
+| Product number | R4K98A |
+| Serial number | 12345 |
+| Site name | HPE |
+| Slurm version | slurm 20.02.5 |
+| System description | Test System |
+| System install date | 2021-01-29 |
+| System name | eniac |
+| System type | Shasta |
++---------------------+---------------+
+################################################################################
+Product Revision Information
+################################################################################
++--------------+-----------------+------------------------------+------------------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+------------------------------+------------------------------+
+| csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
+| sat | 2.0.1 | - | - |
+| sdu | 1.0.8 | - | - |
+| slingshot | 0.8.0 | - | - |
+| sma | 1.4.12 | - | - |
++--------------+-----------------+------------------------------+------------------------------+
+################################################################################
+Local Host Operating System
+################################################################################
++-----------+----------------------+
+| component | version |
++-----------+----------------------+
+| Kernel | 5.3.18-24.15-default |
+| SLES | SLES 15-SP2 |
++-----------+----------------------+
+
After upgrading from a previous version of SAT, the old version of the cray/cray-sat
+container image will remain in the registry on the system. It is not removed
+automatically, but it will not be the default version.
The admin can remove the older version of the cray/cray-sat
container image.
The cray-product-catalog
Kubernetes configuration map will also show all versions
+of SAT that are installed. The command sat showrev --products
will display these
+versions. See the example:
ncn-m001# sat showrev --products
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------------------+-----------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+--------------------+-----------------------+
+...
+| sat | 2.1.3 | - | - |
+| sat | 2.0.4 | - | - |
+...
++--------------+-----------------+--------------------+-----------------------+
+
After upgrading SAT, if using the configuration file from a previous version, there may be
+configuration file sections no longer used in the new version. For example, when upgrading
+from Shasta 1.4 to Shasta 1.5, the [redfish]
configuration file section is no longer used.
+In that case, the following warning may appear upon running sat
commands.
WARNING: Ignoring unknown section 'redfish' in config file.
+
Remove the [redfish]
section from /root/.config/sat/sat.toml
to resolve the warning.
[redfish]
+username = "admin"
+password = "adminpass"
+
Repeat this process for any configuration file sections for which there are “unknown section” warnings.
+ + + + + +The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.
+SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.
+Six Kibana Dashboards are included with SAT. They provide organized output for system health information.
+Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.
+SAT is installed as a separate product as part of the HPE Cray EX System base installation.
+Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.
+The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes
+(ncn-m
nodes).
It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.
+The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.
+The sat
command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes
+manager nodes. A few important points about the SAT container environment include the following:
sat
or sat bash
always launches a container.There are two ways to run sat.
+sat bash
, followed by a sat
command.sat
command directly on a Kubernetes manager node.In both of these cases, a container is launched in the background to execute the command. The first option, running
+sat bash
first, gives an interactive shell, at which point sat
commands can be run. In the second option, the
+container is launched, executes the command, and upon the command’s completion the container exits. The following two
+examples show the same action, checking the system status, using interactive and non-interactive modes.
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+
ncn-m001# sat status
+
Running sat
using the interactive command prompt gives the ability to read and write local files on ephemeral
+container storage. If multiple sat
commands are being run in succession, then use sat bash to launch the
+container beforehand. This will save time because the container does not need to be launched for each sat
command.
The non-interactive mode is useful if calling sat
with a script, or when running a single sat
command as a part of
+several steps that need to be executed from a management NCN.
To view a sat
man page from a Kubernetes manager node, use sat-man
on the manager node as shown in the following
+example.
ncn-m001# sat-man status
+
A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed
+either with man sat
or man sat-podman
from the manager node.
ncn-m001# man sat
+
ncn-m001# man sat-podman
+
The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.
+root
or super-user account always has the #
character at the end of the prompt and has the host name of the
+host in the prompt.root
account is indicated with account@hostname>. A user account that is neither root
nor crayadm
is
+referred to as user
.Command Prompt | +Meaning | +
---|---|
ncn-m001# |
+Run on one of the Kubernetes Manager servers. (Non-interactive) | +
(CONTAINER_ID) sat-container# |
+Run the command inside the SAT container environment by first running sat bash . (Interactive) |
+
Examples of the sat status
command used by an administrator:
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+
Most sat
subcommands depend on services or components from other products in the
+HPE Cray EX (Shasta) software stack. The following list shows these dependencies
+for each subcommand. Each service or component is listed under the product it belongs to.
sat auth
sat bootsys
sat diag
sat firmware
sat hwinv
sat hwmatch
sat init
None
+sat k8s
sat nid2xname
sat sensors
sat setrev
sat showrev
sat status
sat swap
sat switch
Deprecated: See sat swap
sat xname2nid
We released version 2.1.16 of the SAT product in Shasta v1.5.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptIt also added the following new component:
+sat-cfs-install
docker image and helm chartThe following sections detail the changes in this release.
+This release further decouples the installation of the SAT product from the CSM
+product. The cray-sat-podman
RPM is no longer installed in the management
+non-compute node (NCN) image. Instead, the cray-sat-podman
RPM is installed on
+all master management NCNs via an Ansible playbook which is referenced by a
+layer of the CFS configuration that applies to management NCNs. This CFS
+configuration is typically named “ncn-personalization”.
The SAT product now includes a Docker image and a Helm chart named
+sat-cfs-install
. The SAT install script, install.sh
, deploys the Helm chart
+with Loftsman. This helm chart deploys a Kubernetes job that imports the
+SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management
.
+This repository is referenced by the layer added to the NCN personalization
+CFS configuration.
All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:
+sat sensors
sat diag
sat linkhealth
The sat sensors
command has been rewritten to use the SMA telemetry API to
+obtain the latest sensor values. The command’s usage has changed slightly, but
+legacy options work as before, so it is backwards compatible. Additionally, new
+commands have been added.
The sat diag
command has been rewritten to use a new service called Fox, which
+is delivered with the CSM-diags product. The sat diag
command now launches
+diagnostics using the Fox service, which launches the corresponding diagnostic
+executables on controllers using the Hardware Management Job and Task Daemon
+(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start
+diagnostics over Redfish.
The sat linkhealth
command has been removed. Its functionality has been
+replaced by functionality from the Slingshot Topology Tool (STT) in the
+fabric manager pod.
The Redfish username and password command line options and config file options +have been removed. For further instructions, see Remove Obsolete Configuration +File Sections.
+sat setrev
and sat showrev
sat setrev
now collects the following information from the admin, which is then displayed by sat showrev
:
Additional guidance and validation has been added to each field collected by
+sat setrev
. This sets the stage for sdu setup
to stop collecting this
+information and instead collect it from sat showrev
or its S3 bucket.
sat bootsys
The platform-services
stage of the sat bootsys boot
command has been
+improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph
+health in the correct order. The ceph-check
stage has been removed as it is no
+longer needed.
The platform-services
stage of sat bootsys
boot now prompts for confirmation
+of the storage NCN hostnames in addition to the Kubernetes masters and workers.
sat firmware
.cray-sat
container image.sat firmware
command.We released version 2.0.4 of the SAT product in Shasta v1.4.1.
+This version of the SAT product included:
+sat
python package and CLI.sat-podman
wrapper script.The following sections detail the changes in this release.
+Two new commands were added to translate between NIDs and XNames:
+sat nid2xname
sat xname2nid
These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.
+sat swap
where creating the offline port policy failed.sat bootsys shutdown --stage bos-operations
to no longer forcefully
+power off all compute nodes and application nodes using CAPMC when BOS
+sessions complete or time out.sat bootsys boot --stage cabinet-power
.In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.
+This version of the SAT product included the following components:
+sat
python package and CLIIt also added the following new component:
+sat-podman
wrapper scriptThe following sections detail the changes in this release.
+SAT is now packaged and released as an independent product. The product
+deliverable is called a “release distribution”. The release distribution is a
+gzipped tar file containing an install script. This install script loads the
+cray/cray-sat container image into the Docker registry in Nexus and loads the
+cray-sat-podman
RPM into a package repository in Nexus.
In this release, the cray-sat-podman
package is still installed in the master
+and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in
+Shasta v1.5.
The sat
command now runs in a container under Podman. The sat
executable is
+now installed on all nodes in the Kubernetes management cluster (i.e., workers
+and masters). This executable is a wrapper script that starts a SAT container in
+Podman and invokes the sat
Python CLI within that container. The admin can run
+individual sat
commands directly on the master or worker NCNs as before, or
+they can run sat
commands inside the SAT container after using sat bash
to
+enter an interactive shell inside the SAT container.
To view man pages for sat
commands, the user can run sat-man SAT_COMMAND
,
+replacing SAT_COMMAND
with the name of the sat
command. Alternatively,
+the user can enter the sat
container with sat bash
and use the man
command.
sat init
Command and Config File Location ChangeThe default location of the SAT config file has been changed from /etc/sat.toml
+to ~/.config/sat/sat.toml
. A new command, sat init
, has been added that
+initializes a configuration file in the new default directory. This better supports
+individual users on the system who want their own config files.
~/.config/sat
is mounted into the container that runs under Podman, so changes
+are persistent across invocations of the sat
container. If desired, an alternate
+configuration directory can be specified with the SAT_CONFIG_DIR
environment variable.
Additionally, if a config file does not yet exist when a user runs a sat
+command, one is generated automatically.
sat hwinv
Additional functionality has been added to sat hwinv
including:
--list-node-enclosure-power-supplies
option.--list-node-accels
option. The count of
+node accelerators is also included for each node.--list-node-accel-risers
+option. The count of node accelerator risers is also included for each node.--list-node-hsn-nics
+option. The count of HSN NICs is also included for each node.Documentation for these new options has been added to the man page for sat hwinv
.
sat setrev
in S3The sat setrev
and sat showrev
commands now use S3 to store and obtain site
+information, including system name, site name, serial number, install date, and
+system type. Since the information is stored in S3, it will now be consistent
+regardless of the node on which sat
is executed.
As a result of this change, S3 credentials must be configured for SAT. For detailed +instructions, see Generate SAT S3 Credentials.
+sat showrev
sat showrev
now shows product information from the cray-product-catalog
+ConfigMap in Kubernetes.
sat showrev
The output from sat showrev
has also been changed in the following ways:
--docker
and --packages
options were considered misleading and have
+been removed.--local
option.sat cablecheck
The sat cablecheck
command has been removed. To verify that the system’s Slingshot
+network is cabled correctly, admins should now use the show cables
command in the
+Slingshot Topology Tool (STT).
sat swap
Command Compatibility with Next-gen Fabric ControllerThe sat swap
command was added in Shasta v1.3.2. This command used the Fabric
+Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the
+Fabric Controller API, so this command has been rewritten to use the new
+backwards-incompatible API. Usage of the command did not change.
sat bootsys
FunctionalityMuch of the functionality added to sat bootsys
in Shasta v1.3.2 was broken
+by changes introduced in Shasta v1.4, which removed the Ansible inventory
+and playbooks.
The functionality in the platform-services
stage of sat bootsys
has been
+re-implemented to use python directly instead of Ansible. This resulted in
+a more robust procedure with better logging to the sat
log file. Failures
+to stop containers on Kubernetes nodes are handled more gracefully, and
+more information about the containers that failed to stop, including how to
+debug the problem, is included.
Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.
+The following improvements were made to the bos-operations
stage
+of sat bootsys
:
--bos-templates
, and a corresponding config-file
+option, bos_templates
, were added, and the --cle-bos-template
and
+--uan-bos-template
options and their corresponding config file options were
+deprecated.The following functionality has been removed from sat bootsys
:
hsn-bringup
stage of sat bootsys boot
has been removed due to removal
+of the underlying Ansible playbook.bgp-check
stage of sat bootys {boot,shutdown}
has been removed. It is
+now a manual procedure.The location of the sat log file has changed from /var/log/cray/sat.log
to
+/var/log/cray/sat/sat.log
. This change simplifies mounting this file into the
+sat container running under Podman.
Shasta v1.3.2 included version 2.4.0 of the sat
python package and CLI.
The following sections detail the changes in this release.
+sat swap
Command for Switch and Cable ReplacementThe sat switch
command which supported operations for replacing a switch has
+been deprecated and replaced with the sat swap
command, which now supports
+replacing a switch OR cable.
The sat swap switch
command is equivalent to sat switch
. The sat switch
+command will be removed in a future release.
sat bootsys
CommandThe sat bootsys
command now has multiple stages for both the boot
and
+shutdown
actions. Please refer to the “System Power On Procedures” and “System
+Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001)
+for more details on using this command in the context of a full system power off
+and power on.
Shasta v1.3 included version 2.2.3 of the sat
python package and CLI.
This version of the sat
CLI contained the following commands:
auth
bootsys
cablecheck
diag
firmware
hwinv
hwmatch
k8s
linkhealth
sensors
setrev
showrev
status
swap
switch
See the System Admin Toolkit Command Overview +and the table of commands in the SAT Authentication section +of this document for more details on each of these commands.
+ + + + + +The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.
+Grafana can be accessed via web browser at the following URL:
+https://sma-grafana.<site-domain>
The value of site-domain
can be obtained as follows:
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+ base64 -d | grep "external:"
+
That command will produce the following output, for example:
+ external: EXAMPLE_DOMAIN.com
+
This would result in the address for Grafana being https://sma-grafana.EXAMPLE_DOMAIN.com
For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the +SMA product documentation.
+For more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry +Kafka Topics in the SMA product documentation.
+There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.
+Dashboard Name | +Display Type | +
---|---|
Fabric Congestion | +Chart Panels | +
Fabric RFC3635 | +Chart Panels | +
Fabric Errors | +Tabular Format | +
Fabric Port State | +Tabular Format | +
The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.
+Shows the Interval and Locations Options for the available telemetry.
+ +The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.
+For additional information, refer to Grafana Templates and Variables.
+The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.
+The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.
+SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.
+This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.
+The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.
+The port type of a link is reported as a port state “subtype” event when defined at port initialization.
+This dashboard reports error counters in a tabular format in three panels.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.
+The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.
+For additional information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.
+Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.
+ + + + + +Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.
+Kibana can be accessed via web browser at the following URL:
+https://sma-kibana.<site-domain>
The value of site-domain
can be obtained as follows:
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+ base64 -d | grep "external:"
+
That command will produce the following output, for example:
+ external: EXAMPLE_DOMAIN.com
+
This would result in the address for Kibana being https://sma-kibana.EXAMPLE_DOMAIN.com
For additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product +documentation.
+Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this +table.
+Dashboard | +Short Description | +Long Description | +Kibana Visualization and Search Name | +
---|---|---|---|
sat-aer | +AER corrected | +Corrected Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-corrected Search: sat-aer-corrected | +
sat-aer | +AER fatal | +Fatal Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-fatal Search: sat-aer-fatal | +
sat-atom | +ATOM failures | +Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. | +sat-atom-failed | +
sat-atom | +ATOM admindown | +Application Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch. | +sat-atom-admindown | +
sat-heartbeat | +Heartbeat loss events | +Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. | +sat-heartbeat | +
sat-kernel | +Kernel assertions | +The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. | +sat-kassertions | +
sat-kernel | +Kernel panics | +The kernel panics when something is seriously wrong. The node goes down. | +sat-kernel-panic | +
sat-kernel | +Lustre bugs (LBUGs) | +The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. | +sat-lbug | +
sat-kernel | +CPU stalls | +CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. | +sat-cpu-stall | +
sat-kernel | +Out of memory | +An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. | +sat-oom | +
sat-mce | +MCE | +Machine Check Exceptions (MCE) are errors detected at the processor level. | +sat-mce | +
sat-rasdaemon | +rasdaemon errors | +Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. |
+sat-rasdaemon-error | +
sat-rasdaemon | +rasdaemon messages | +All messages from the rasdaemon service on nodes. |
+sat-rasdaemon | +
By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.
+The Kibana Dashboard should be open on your system.
+Navigate to Management
+Navigate to Advanced Settings in the Kibana section, below the Elastic search section
+Scroll down to the Discover section
+Change Highlight results from on to off
+Click Save to save changes
+The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.
+Go to the dashboard section.
+Select sat-aer dashboard.
+Choose the time range of interest.
+View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.
+The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health +checks and application test failures. Some test failures are of possible interest even though a node is not marked +admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide +clues if a node otherwise fails. They might also show application problems.
+HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.
+Go to the dashboard section.
+Select sat-atom dashboard.
+Choose the time range of interest.
+View any nodes marked admindown and any ATOM test failures. These failures occur during health checks and +application test failures. Test failures marked admindown are important to note. View the matching log messages +in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, +results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.
+The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods +are responsible for monitoring nodes in the system for heartbeat loss.
+Go to the dashboard section.
+Select sat-heartbeat dashboard.
+Choose the time range of interest.
+View the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for +monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.
+The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.
+Go to the dashboard section.
+Select sat-kernel dashboard.
+Choose the time range of interest.
+View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.
+The MCE Dashboard displays CPU detected processor-level hardware errors.
+Go to the dashboard section.
+Select sat-mce dashboard.
+Choose the time range of interest.
+View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.
+The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon
+service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including
+PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages
+presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one
+for only messages of severity “emerg” or “err” and another for all messages from rasdaemon
.
Go to the dashboard section.
+Select sat-rasdaemon dashboard.
+Choose the time range of interest.
+View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.
+Describes how to install the System Admin Toolkit (SAT) product stream.
+...
) in shell output indicate omitted lines.2.2.x
with the version of the SAT product stream
+being installed.Start a typescript.
+The typescript will record the commands and the output from this installation.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
Copy the release distribution gzipped tar file to ncn-m001
.
Unzip and extract the release distribution, 2.2.x
.
ncn-m001# tar -xvzf sat-2.2.x.tar.gz
+
Change directory to the extracted release distribution directory.
+ncn-m001# cd sat-2.2.x
+
Run the installer: install.sh.
+The script produces a lot of output. A successful install ends with “SAT +version 2.2.x has been installed”.
+ncn-m001# ./install.sh
+...
+====> Updating active CFS configurations
+...
+====> SAT version 2.2.x has been installed.
+
Upgrade only: Record the names of the CFS configuration or
+configurations modified by install.sh
.
The install.sh
script attempts to modify any CFS configurations that apply
+to the master management NCNs. During an upgrade, install.sh
will log
+messages indicating the CFS configuration or configurations that were
+modified. For example, if there are three master nodes all using the same
+CFS configuration named “ncn-personalization”, the output would look like
+this:
====> Updating active CFS configurations
+INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0
+INFO: Found configuration "ncn-personalization" for component x3000c0s1b0n0
+INFO: Found configuration "ncn-personalization" for component x3000c0s3b0n0
+INFO: Found configuration "ncn-personalization" for component x3000c0s5b0n0
+INFO: Updating CFS configuration "ncn-personalization"
+INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml in configuration "ncn-personalization".
+INFO: Key "name" in layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-ncn to sat-2.2.16
+INFO: Successfully updated layers in configuration "ncn-personalization"
+
Save the name of each CFS configuration updated by the installer. In the +previous example, a single configuration named “ncn-personalization” was +updated, so that name is saved to a temporary file.
+ncn-m001# echo ncn-personalization >> /tmp/sat-ncn-cfs-configurations.txt
+
Repeat the previous command for each CFS configuration that was updated.
+Upgrade only: Save the new name of the SAT CFS configuration layer.
+In the example install.sh
output above, the new layer name is
+sat-2.2.16
. Save this value to a file to be used later.
ncn-m001# echo sat-2.2.16 > /tmp/sat-layer-name.txt
+
Fresh install only: Save the CFS configuration layer for SAT to a file +for later use.
+The install.sh
script attempts to modify any CFS configurations that apply
+to the master management NCNs. During a fresh install, no such CFS
+configurations will be found, and it will instead log the SAT configuration
+layer that must be added to the CFS configuration that will be created. Here
+is an example of the output in that case:
====> Updating active CFS configurations
+INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0
+WARNING: No CFS configurations found that apply to components with role Management and subrole Master.
+INFO: The following sat layer should be used in the CFS configuration that will be applied to NCNs with role Management and subrole Master.
+{
+ "name": "sat-2.2.15",
+ "commit": "9a74b8f5ba499af6fbcecfd2518a40e081312933",
+ "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
+ "playbook": "sat-ncn.yml"
+}
+
Save the JSON output to a file for later use. For example:
+ncn-m001# cat > /tmp/sat-layer.json <<EOF
+> {
+> "name": "sat-2.2.15",
+> "commit": "9a74b8f5ba499af6fbcecfd2518a40e081312933",
+> "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
+> "playbook": "sat-ncn.yml"
+> }
+> EOF
+
Do not copy the previous command verbatim. Use the JSON output from the
+install.sh
script.
Optional: Remove the SAT release distribution tar file and extracted directory.
+ncn-m001# rm sat-2.2.x.tar.gz
+ncn-m001# rm -rf sat-2.2.x/
+
Upgrade only: Ensure that the environment variable SAT_TAG
is not set
+in the ~/.bashrc
file on any of the management NCNs.
NOTE: This step should only be required when updating from +Shasta 1.4.1 or Shasta 1.4.2.
+The following example assumes three manager NCNs: ncn-m001
, ncn-m002
, and ncn-m003
,
+and shows output from a system in which no further action is needed.
ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
+ncn-m001: source <(kubectl completion bash)
+ncn-m003: source <(kubectl completion bash)
+ncn-m002: source <(kubectl completion bash)
+
The following example shows that SAT_TAG
is set in ~/.bashrc
on ncn-m002
.
+Remove that line from the ~/.bashrc
file on ncn-m002
.
ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
+ncn-m001: source <(kubectl completion bash)
+ncn-m002: source <(kubectl completion bash)
+ncn-m002: export SAT_TAG=3.5.0
+ncn-m003: source <(kubectl completion bash)
+
Stop the typescript.
+NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT install. See Next Steps.
+ncn-m001# exit
+
SAT version 2.2.x
is now installed/upgraded, meaning the SAT 2.2.x
release
+has been loaded into the system software repository.
sat
command won’t be available until the NCN Personalization
+procedure has been executed.If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.
+If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the sections listed below.
+NOTE: The NCN Personalization procedure is required when +upgrading SAT. The setup procedures in SAT Setup, however, are +not required when upgrading SAT. They should have been executed +during the first installation of SAT.
+Execute the NCN Personalization procedure:
+ +If performing a fresh install, execute the SAT Setup procedures:
+ +If performing an upgrade, execute the upgrade procedures:
+ +Describes how to perform NCN personalization using CFS. This personalization process +will configure the System Admin Toolkit (SAT) product stream.
+/tmp/sat-ncn-cfs-configurations.txt
./tmp/sat-layer-name.txt
./tmp/sat-layer.json
....
) in shell output indicate omitted lines.2.2.x
with the version of the SAT product stream
+being installed.Start a typescript if not already using one.
+The typescript will capture the commands and the output from this installation procedure.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
Fresh install only: Add the SAT layer to the NCN personalization JSON file.
+If the SAT install script, install.sh
, did not identify and modify the CFS
+configurations that apply to each master management NCN, it will have printed
+the SAT CFS configuration layer in JSON format. This layer must be added to
+the JSON file being used to construct the CFS configuration. For example,
+if the file being used is named ncn-personalization.json
, and the SAT
+layer was saved to the file /tmp/sat-layer.json
as described in the
+install instructions, the following jq
command will append the SAT layer
+and save the result in a new file named ncn-personalization.json
.
ncn-m001# jq -s '{layers: (.[0].layers + [.[1]])}' ncn-personalization.json \
+ /tmp/sat-layer.json > ncn-personalization.new.json
+
For instructions on how to create a CFS configuration from the previous +file and how to apply it to the management NCNs, refer to “Perform NCN +Personalization” in the HPE Cray System Management Documentation. After +the CFS configuration has been created and applied, return to this +procedure.
+Upgrade only: Invoke each CFS configuration that was updated during the +upgrade.
+If the SAT install script, install.sh
, identified CFS configurations that
+apply to the master management NCNs and modified them in place, invoke each
+CFS configuration that was created or updated during installation.
This step will create a CFS session for each given configuration and install +SAT on the associated manager NCNs.
+The --configuration-limit
option limits the configuration session to run
+only the SAT layer of the configuration.
You should see a representation of the CFS session in the output.
+ncn-m001# for cfs_configuration in $(cat /tmp/sat-ncn-cfs-configurations.txt);
+do cray cfs sessions create --name "sat-session-${cfs_configuration}" --configuration-name \
+ "${cfs_configuration}" --configuration-limit $(cat /tmp/sat-layer-name.txt);
+done
+
+name="sat-session-ncn-personalization"
+
+[ansible]
+...
+
Upgrade only: Monitor the progress of each CFS session.
+This step assumes a single session named sat-session-ncn-personalization
was created in the previous step.
First, list all containers associated with the CFS session:
+ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \
+ -o json | jq '.items[0].spec.containers[] | .name'
+"inventory"
+"ansible-1"
+"istio-proxy"
+
Next, get the logs for the ansible-1
container.
NOTE: the trailing digit might differ from “1”. It is the zero-based
+index of the sat-ncn
layer within the configuration’s layers.
ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
+ --selector=cfsession=sat-session-ncn-personalization
+
Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:
+...
+PLAY RECAP *********************************************************************
+x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+
Execute this step for each unique CFS configuration.
+NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.
+Verify that SAT was successfully configured.
+If sat
is configured, the --version
command will indicate which version
+is installed. If sat
is not properly configured, the command will fail.
NOTE: This version number will differ from the version number of the SAT
+release distribution. This is the semantic version of the sat
Python package,
+which is different from the version number of the overall SAT release distribution.
ncn-m001# sat --version
+sat 3.7.0
+
NOTE: Upon first running sat
, you may see additional output while the sat
+container image is downloaded. This will occur the first time sat
is run on
+each manager NCN. For example, if you run sat
for the first time on ncn-m001
+and then for the first time on ncn-m002
, you will see this additional output
+both times.
Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
+Getting image source signatures
+Copying blob da64e8df3afc done
+Copying blob 0f36fd81d583 done
+Copying blob 12527cf455ba done
+...
+sat 3.7.0
+
Stop the typescript.
+ncn-m001# exit
+
SAT version 2.2.x
is now configured:
If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.
+If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.
+If performing a fresh install, execute the SAT Setup procedures:
+ +If performing an upgrade, execute the SAT Post-Upgrade procedures:
+ +Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in
+later steps of the install process. The admin account used to authenticate with sat auth
must be enabled in
+Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see
+Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation.
+For additional information on SAT authentication, see System Security and Authentication in the CSM
+documentation.
NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to
+the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat
commands require S3 to
+be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket,
+the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be
+done on every Kubernetes manager node where SAT commands are run.
Below is a table describing SAT commands and the types of authentication they require.
+SAT Subcommand | +Authentication/Credentials Required | +Man Page | +Description | +
---|---|---|---|
sat auth |
+Responsible for authenticating to the API gateway and storing a token. | +sat-auth |
+Authenticate to the API gateway and save the token. | +
sat bmccreds |
+Requires authentication to the API gateway. | +sat-bmccreds |
+Set BMC passwords. | +
sat bootprep |
+Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is done on ncn-m001 during the install. | +sat-bootprep |
+Prepare to boot nodes with images and configurations. | +
sat bootsys |
+Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. |
+sat-bootsys |
+Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. | +
sat diag |
+Requires authentication to the API gateway. | +sat-diag |
+Launch diagnostics on the HSN switches and generate a report. | +
sat firmware |
+Requires authentication to the API gateway. | +sat-firmware |
+Report firmware version. | +
sat hwhist |
+Requires authentication to the API gateway. | +sat-hwhist |
+Report hardware component history. | +
sat hwinv |
+Requires authentication to the API gateway. | +sat-hwinv |
+Give a listing of the hardware of the HPE Cray EX system. | +
sat hwmatch |
+Requires authentication to the API gateway. | +sat-hwmatch |
+Report hardware mismatches. | +
sat init |
+None | +sat-init |
+Create a default SAT configuration file. | +
sat k8s |
+Requires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install. | +sat-k8s |
+Report on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node). | +
sat linkhealth |
++ | + | This command has been deprecated. | +
sat nid2xname |
+Requires authentication to the API gateway. | +sat-nid2xname |
+Translate node IDs to node xnames. | +
sat sensors |
+Requires authentication to the API gateway. | +sat-sensors |
+Report current sensor data. | +
sat setrev |
+Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-setrev |
+Set HPE Cray EX system revision information. | +
sat showrev |
+Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-showrev |
+Print revision information for the HPE Cray EX system. | +
sat slscheck |
+Requires authentication to the API gateway. | +sat-slscheck |
+Perform a cross-check between SLS and HSM. | +
sat status |
+Requires authentication to the API gateway. | +sat-status |
+Report node status across the HPE Cray EX system. | +
sat swap |
+Requires authentication to the API gateway. | +sat-swap |
+Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. | +
sat xname2nid |
+Requires authentication to the API gateway. | +sat-xname2nid |
+Translate node and node BMC xnames to node IDs. | +
sat switch |
+This command has been deprecated. It has been replaced by sat swap . |
++ | + |
In order to authenticate to the API gateway, you must run the sat auth
command. This command will prompt for a password
+on the command line. The username value is obtained from the following locations, in order of higher precedence to lower
+precedence:
--username
global command-line option.username
option in the api_gateway
section of the config file at ~/.config/sat/sat.toml
.sat
command.If credentials are entered correctly when prompted by sat auth
, a token file will be obtained and saved to
+~/.config/sat/tokens
. Subsequent sat commands will determine the username the same way as sat auth
described above,
+and will use the token for that username if it has been obtained and saved by sat auth
.
sat
CLI has been installed following Install The System Admin Toolkit Product Stream.The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:
+Generate a default SAT configuration file, if one does not exist.
+ncn-m001# sat init
+Configuration file "/root/.config/sat/sat.toml" generated.
+
Note: If the config file already exists, it will print out an error:
+ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
+Not generating configuration file.
+
Edit ~/.config/sat/sat.toml
and set the username option in the api_gateway
section of the config file. E.g.:
username = "crayadmin"
+
Run sat auth
. Enter your password when prompted. E.g.:
ncn-m001# sat auth
+Password for crayadmin:
+Succeeded!
+
Other sat
commands are now authenticated to make requests to the API gateway. E.g.:
ncn-m001# sat status
+
Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT +S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. +This must be done on every Kubernetes master node where SAT commands are run.
+SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev
+(see: Run Sat Setrev to Set System Information).
NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Ensure the files are readable only by root
.
ncn-m001# touch /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
Write the credentials to local files using kubectl
.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.access_key}' | base64 -d > \
+ /root/.config/sat/s3_access_key
+
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.secret_key}' | base64 -d > \
+ /root/.config/sat/s3_secret_key
+
Verify the S3 endpoint specified in the SAT configuration file is correct.
+Get the SAT configuration file’s endpoint value.
+NOTE: If the command’s output is commented out, indicated by an initial #
+character, the SAT configuration will take the default value – "https://rgw-vip.nmn"
.
ncn-m001# grep endpoint ~/.config/sat/sat.toml
+# endpoint = "https://rgw-vip.nmn"
+
Get the sat-s3-credentials
secret’s endpoint value.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
+https://rgw-vip.nmn
+
Compare the two endpoint values.
+If the values differ, change the SAT configuration file’s endpoint value to match the secret’s.
+Copy SAT configurations to each manager node on the system.
+ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
+ mkdir -p /root/.config/sat; \
+ scp -pr /root/.config/sat ${i}:/root/.config; done
+
NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may +be different. This example assumes three manager nodes, where the configuration files must be +copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 +and ncn-m003.
+NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Run sat setrev
to set System Revision Information. Follow the on-screen prompts to set
+the following site-specific values:
TIP: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. I.e., “System type” is EX-1C.
+ncn-m001# sat setrev
+--------------------------------------------------------------------------------
+Setting: Serial number
+Purpose: System identification. This will affect how snapshots are
+ identified in the HPE backend services.
+Description: This is the top-level serial number which uniquely identifies
+ the system. It can be requested from an HPE representative.
+Valid values: Alpha-numeric string, 4 - 20 characters.
+Type: <class 'str'>
+Default: None
+Current value: None
+--------------------------------------------------------------------------------
+Please do one of the following to set the value of the above setting:
+ - Input a new value
+ - Press CTRL-C to exit
+...
+
Run sat showrev
to verify System Revision Information. The following tables contain example information.
ncn-m001# sat showrev
+################################################################################
+System Revision Information
+################################################################################
++---------------------+---------------+
+| component | data |
++---------------------+---------------+
+| Company name | HPE |
+| Country code | US |
+| Interconnect | Sling |
+| Product number | R4K98A |
+| Serial number | 12345 |
+| Site name | HPE |
+| Slurm version | slurm 20.02.5 |
+| System description | Test System |
+| System install date | 2021-01-29 |
+| System name | eniac |
+| System type | EX-1C |
++---------------------+---------------+
+################################################################################
+Product Revision Information
+################################################################################
++--------------+-----------------+------------------------------+------------------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+------------------------------+------------------------------+
+| csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
+| sat | 2.0.1 | - | - |
+| sdu | 1.0.8 | - | - |
+| slingshot | 0.8.0 | - | - |
+| sma | 1.4.12 | - | - |
++--------------+-----------------+------------------------------+------------------------------+
+################################################################################
+Local Host Operating System
+################################################################################
++-----------+----------------------+
+| component | version |
++-----------+----------------------+
+| Kernel | 5.3.18-24.15-default |
+| SLES | SLES 15-SP2 |
++-----------+----------------------+
+
After upgrading SAT, if using the configuration file from a previous version, there may be
+configuration file sections no longer used in the new version. For example, when upgrading
+from Shasta 1.4 to Shasta 1.5, the [redfish]
configuration file section is no longer used.
+In that case, the following warning may appear upon running sat
commands.
WARNING: Ignoring unknown section 'redfish' in config file.
+
Remove the [redfish]
section from /root/.config/sat/sat.toml
to resolve the warning.
[redfish]
+username = "admin"
+password = "adminpass"
+
Repeat this process for any configuration file sections for which there are “unknown section” warnings.
+As of SAT version 2.2, some command output that was previously printed to stdout
+is now logged to stderr
. These messages are logged at the INFO
level. The
+default logging threshold was changed from WARNING
to INFO
to accomodate
+this logging change. Additionally, some messages previously logged at the INFO
+are now logged at the DEBUG
level.
These changes take effect automatically. However, if the default output threshold
+has been manually set in ~/.config/sat/sat.toml
, it should be changed to ensure
+that important output is shown in the terminal.
In the following example, the stderr log level, logging.stderr_level
, is set to
+WARNING
, which will exclude INFO
-level logging from terminal output.
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+
To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.
+If logging.stderr_level
is commented out, its value will not affect logging
+behavior. However, it may be helpful set its value to INFO
as a reminder of
+the new default behavior.
The following commands trigger messages that have been changed from stdout
+print calls to INFO
-level (or WARNING
- or ERROR
-level) log messages:
sat bootsys --stage shutdown --stage session-checks
+sat sensors
+
The following commands trigger messages that have been changed from INFO
-level
+log messages to DEBUG
-level log messages:
sat nid2xname
+sat xname2nid
+sat swap
+
prodmgr
. Older versions must be uninstalled manually.prodmgr
command is available.Use sat showrev
to list versions of SAT.
NOTE: It is not recommended to uninstall a version designated as “active”. +If the active version is uninstalled, then the activate procedure must be executed +on a remaining version.
+ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+-------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+-------------------+-----------------------+
+| sat | 2.3.3 | True | - | - |
+| sat | 2.2.10 | False | - | - |
++--------------+-----------------+--------+-------------------+-----------------------+
+
Use prodmgr
to uninstall a version of SAT.
This command will do three things:
+cray-product-catalog
Kubernetes ConfigMap, so that it will no longer show up
+in the output of sat showrev
.ncn-m001# prodmgr uninstall sat 2.2.10
+Repository sat-2.2.10-sle-15sp2 has been removed.
+Removed Docker image cray/cray-sat:3.9.0
+Removed Docker image cray/sat-cfs-install:1.0.2
+Removed Docker image cray/sat-install-utility:1.4.0
+Deleted sat-2.2.10 from product catalog.
+
This procedure can be used to downgrade the active version of SAT.
+prodmgr
command is available.Use sat showrev
to list versions of SAT.
ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+--------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+--------------------+-----------------------+
+| sat | 2.3.3 | True | - | - |
+| sat | 2.2.10 | False | - | - |
++--------------+-----------------+--------+--------------------+-----------------------+
+
Use prodmgr
to activate a different version of SAT.
This command will do three things:
+2.2.10
+sets the repository sat-2.2.10-sle-15sp2
as the only member of the sat-sle-15sp2
group.2.2.10
as active within the product catalog, so that it appears active in the output of
+sat showrev
.ncn-personalization
). Specifically, it will ensure that the layer refers to the version of SAT CFS
+configuration content associated with the version of SAT being activated.ncn-m001# prodmgr activate sat 2.2.10
+Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2.
+Set sat-2.2.10 as active in product catalog.
+Updated CFS configurations: [ncn-personalization]
+
Verify that the chosen version is marked as active.
+ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+--------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+--------------------+-----------------------+
+| sat | 2.3.3 | False | - | - |
+| sat | 2.2.10 | True | - | - |
++--------------+-----------------+--------+--------------------+-----------------------+
+
Run NCN Personalization.
+At this point, the command has modified Nexus package repositories to set a particular package repository
+as active, but no packages on the NCNs have been changed. In order to complete the activation process,
+NCN Personalization must be executed to change the cray-sat-podman
package version on the manager NCNs.
NOTE: Refer to the command output from step 2 for the names of all CFS configurations that were updated,
+which may not necessarily be just ncn-personalization
. If multiple configurations were updated in step 2, then
+a cray cfs sessions create
command should be run for each of them. This example assumes a single configuration
+named ncn-personalization
was updated. If multiple were updated, set cfs_configurations
to a space-separated
+list below.
ncn-m001# cfs_configurations="ncn-personalization"
+ncn-m001# for cfs_configuration in ${cfs_configurations}
+do cray cfs sessions create --name "sat-session-${cfs_configuration}" --configuration-name \
+ "${cfs_configuration}" --configuration-limit sat-ncn;
+done
+
Monitor the progress of each CFS session.
+This step assumes a single session named sat-session-ncn-personalization
was created in the previous step.
First, list all containers associated with the CFS session:
+ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \
+ -o json | jq '.items[0].spec.containers[] | .name'
+"inventory"
+"ansible-1"
+"istio-proxy"
+
Next, get the logs for the ansible-1
container.
NOTE: the trailing digit might differ from “1”. It is the zero-based
+index of the sat-ncn
layer within the configuration’s layers.
ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
+ --selector=cfsession=sat-session-ncn-personalization
+
Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:
+...
+PLAY RECAP *********************************************************************
+x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+
Execute this step for each unique CFS configuration.
+NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.
+Verify the new version of the SAT CLI.
+NOTE: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the SAT Python package, +which is different from the version number of the overall SAT release distribution.
+ncn-m001# sat --version
+3.9.0
+
The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.
+SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.
+Six Kibana Dashboards are included with SAT. They provide organized output for system health information.
+Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.
+SAT is installed as a separate product as part of the HPE Cray EX System base installation.
+Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.
+The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes
+(ncn-m
nodes).
It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.
+The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.
+The sat
command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes
+manager nodes. A few important points about the SAT container environment include the following:
sat
or sat bash
always launches a container.There are two ways to run sat.
+sat bash
, followed by a sat
command.sat
command directly on a Kubernetes manager node.In both of these cases, a container is launched in the background to execute the command. The first option, running
+sat bash
first, gives an interactive shell, at which point sat
commands can be run. In the second option, the
+container is launched, executes the command, and upon the command’s completion the container exits. The following two
+examples show the same action, checking the system status, using interactive and non-interactive modes.
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+
ncn-m001# sat status
+
Running sat
using the interactive command prompt gives the ability to read and write local files on ephemeral
+container storage. If multiple sat
commands are being run in succession, then use sat bash to launch the
+container beforehand. This will save time because the container does not need to be launched for each sat
command.
The non-interactive mode is useful if calling sat
with a script, or when running a single sat
command as a part of
+several steps that need to be executed from a management NCN.
To view a sat
man page from a Kubernetes manager node, use sat-man
on the manager node as shown in the following
+example.
ncn-m001# sat-man status
+
A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed
+either with man sat
or man sat-podman
from the manager node.
ncn-m001# man sat
+
ncn-m001# man sat-podman
+
The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.
+root
or super-user account always has the #
character at the end of the prompt and has the host name of the
+host in the prompt.root
account is indicated with account@hostname>. A user account that is neither root
nor crayadm
is
+referred to as user
.Command Prompt | +Meaning | +
---|---|
ncn-m001# |
+Run on one of the Kubernetes Manager servers. (Non-interactive) | +
(CONTAINER_ID) sat-container# |
+Run the command inside the SAT container environment by first running sat bash . (Interactive) |
+
Examples of the sat status
command used by an administrator:
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+
Most sat
subcommands depend on services or components from other products in the
+HPE Cray EX (Shasta) software stack. The following list shows these dependencies
+for each subcommand. Each service or component is listed under the product it belongs to.
sat auth
sat bmccreds
sat bootprep
sat bootsys
sat diag
sat firmware
sat hwhist
sat hwinv
sat hwmatch
sat init
None
+sat k8s
sat nid2xname
sat sensors
sat setrev
sat showrev
sat slscheck
sat status
sat swap
sat switch
Deprecated: See sat swap
sat xname2nid
SAT 2.2.16 was released on February 25th, 2022.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptsat-cfs-install
container image and Helm chartIt also added the following new components:
+sat-install-utility
container imagecfs-config-util
container imageThe following sections detail the changes in this release.
+sat
command unavailable in sat bash
shellAfter launching a shell within the SAT container with sat bash
, the sat
command will not
+be found. For example:
(CONTAINER-ID) sat-container:~ # sat status
+bash: sat: command not found
+
This can be resolved temporarily in one of two ways. /sat/venv/bin/
may be prepended to the
+$PATH
environment variable:
(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH
+(CONTAINER-ID) sat-container:~ # sat status
+
Or, the file /sat/venv/bin/activate
may be sourced:
(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate
+(CONTAINER-ID) sat-container:~ # sat status
+
sat bash
shellAfter launching a shell within the SAT container with sat bash
, tab completion for sat
+commands does not work.
This can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash
:
source /etc/bash_completion.d/sat-completion.bash
+
sat
in root directorysat
commands will not work if the current directory is /
. For example:
ncn-m001:/ # sat --help
+Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+
To resolve, run sat
in another directory.
sat
in config directorysat
commands will not work if the current directory is ~/.config/sat
. For example:
ncn-m001:~/.config/sat # sat --help
+Error: /root/.config/sat: duplicate mount destination
+
To resolve, run sat
in another directory.
sat
commandssat bootprep
automates the creation of CFS configurations, the build and
+customization of IMS images, and the creation of BOS session templates. See
+SAT Bootprep for details.sat slscheck
performs a check for consistency between the System Layout
+Service (SLS) and the Hardware State Manager (HSM).sat bmccreds
provides a simple interface for interacting with the System
+Configuration Service (SCSD) to set BMC Redfish credentials.sat hwhist
displays hardware component history by xname (location) or by
+its Field-Replaceable Unit ID (FRUID). This command queries the Hardware
+State Manager (HSM) API to obtain this information. Since the sat hwhist
+command supports querying for the history of a component by its FRUID, the
+FRUID of components has been added to the output of sat hwinv
.The following automation has been added to the install script, install.sh
:
sat-config-import
Kubernetes job, which is
+started when the sat-cfs-install Helm chart is deployed.The SAT product uploads additional information to the cray-product-catalog
+Kubernetes ConfigMap detailing the components it provides, including container
+(Docker) images, Helm charts, RPMs, and package repositories.
This information is used to support uninstall and activation of SAT product +versions moving forward.
+Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and activation of the SAT product stream.
+See Uninstall: Removing a Version of SAT +and Activate: Switching Between Versions +for details.
+sat status
A Subrole
column has been added to the output of sat status
. This allows you
+to easily differentiate between master, worker, and storage nodes in the
+management role, for example.
Hostname information from SLS has been added to sat status
output.
Support for JSON-formatted output has been added to commands which currently
+support the --format
option, such as hwinv
, status
, and showrev
.
Many usability improvements have been made to multiple sat
commands,
+mostly related to filtering command output. The following are some highlights:
--fields
option to display only specific fields for subcommands which
+display tabular reports.--filter
queries
+so that the first match is used, similar to --sort-by
.--filter
, --fields
, and --reverse
for summaries
+displayed by sat hwinv
.sat hwinv
.The default log level for stderr has been changed from “WARNING” to “INFO”. For +details, see SAT Logging.
+With the command-line options --loglevel-stderr
and --loglevel-file
, the log level
+can now be configured separately for stderr and the log file.
The existing --loglevel
option is now an alias for the --loglevel-stderr
option.
The Podman wrapper script is the script installed at /usr/bin/sat
on the
+master management NCNs by the cray-sat-podman
RPM that runs the cray-sat
+container in podman
. The following subsections detail improvements that were
+made to the wrapper script in this release.
The Podman wrapper script that launches the cray-sat
container with podman
+has been modified to mount the user’s current directory and home directory into
+the cray-sat
container to provide access to local files in the container.
The man page for the Podman wrapper script, which is accessed by typing man sat
on a master management NCN, has been improved to document the following:
Fixed issues with redirecting stdout and stderr, and piping output to commands,
+such as awk
, less
, and more
.
A new sat
option has been added to configure the HTTP timeout length for
+requests to the API gateway. See sat-man sat
for details.
sat bootsys
ImprovementsMany improvements and fixes have been made to sat bootsys
. The following are some
+highlights:
--excluded-ncns
option, which can be used to omit NCNs
+from the platform-services
and ncn-power
stages in case they are
+inaccessible.sat bootsys shutdown
now prompt the user to
+continue before proceeding. A new option, --disruptive
, will bypass this.platform-services
+stage of sat bootsys boot
.sat xname2nid
Improvementssat xname2nid
can now recursively expand slot, chassis, and cabinet xnames to
+a list of nids in those locations.
A new --format
option has been added to sat xname2nid
. It sets the output format to
+either “range” (the default) or “nid”. The “range” format displays nids in a
+compressed range format suitable for use with a workload manager like Slurm.
The commands which interact with HSM (e.g., sat status
and sat hwinv
) now
+use the v2 HSM API.
sat diag
Limited to HSN Switchessat diag
will now only operate against HSN switches by default. These are the
+only controllers that support running diagnostics with HMJTD.
sat showrev
EnhancementsA column has been added to the output of sat showrev
that indicates whether a
+product version is “active”. The definition of “active” varies across products,
+and not all products may set an “active” version.
For SAT, the active version is the one with its hosted-type package repository in
+Nexus set as the member of the group-type package repository in Nexus,
+meaning that it will be used when installing the cray-sat-podman
RPM.
cray-sat
Container Image Size ReductionThe size of the cray-sat
container image has been approximately cut in half by
+leveraging multi-stage builds. This also improved the repeatability of the unit
+tests by running them in the container.
Minor bug fixes were made in cray-sat
and in cray-sat-podman
. For full change lists,
+see each repository’s CHANGELOG.md
file.
We released version 2.1.16 of the SAT product in Shasta v1.5.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptIt also added the following new component:
+sat-cfs-install
docker image and helm chartThe following sections detail the changes in this release.
+This release further decouples the installation of the SAT product from the CSM
+product. The cray-sat-podman
RPM is no longer installed in the management
+non-compute node (NCN) image. Instead, the cray-sat-podman
RPM is installed on
+all master management NCNs via an Ansible playbook which is referenced by a
+layer of the CFS configuration that applies to management NCNs. This CFS
+configuration is typically named “ncn-personalization”.
The SAT product now includes a Docker image and a Helm chart named
+sat-cfs-install
. The SAT install script, install.sh
, deploys the Helm chart
+with Loftsman. This helm chart deploys a Kubernetes job that imports the
+SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management
.
+This repository is referenced by the layer added to the NCN personalization
+CFS configuration.
All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:
+sat sensors
sat diag
sat linkhealth
The sat sensors
command has been rewritten to use the SMA telemetry API to
+obtain the latest sensor values. The command’s usage has changed slightly, but
+legacy options work as before, so it is backwards compatible. Additionally, new
+commands have been added.
The sat diag
command has been rewritten to use a new service called Fox, which
+is delivered with the CSM-diags product. The sat diag
command now launches
+diagnostics using the Fox service, which launches the corresponding diagnostic
+executables on controllers using the Hardware Management Job and Task Daemon
+(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start
+diagnostics over Redfish.
The sat linkhealth
command has been removed. Its functionality has been
+replaced by functionality from the Slingshot Topology Tool (STT) in the
+fabric manager pod.
The Redfish username and password command line options and config file options +have been removed. For further instructions, see Remove Obsolete Configuration +File Sections.
+sat setrev
and sat showrev
sat setrev
now collects the following information from the admin, which is then displayed by sat showrev
:
Additional guidance and validation has been added to each field collected by
+sat setrev
. This sets the stage for sdu setup
to stop collecting this
+information and instead collect it from sat showrev
or its S3 bucket.
sat bootsys
The platform-services
stage of the sat bootsys boot
command has been
+improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph
+health in the correct order. The ceph-check
stage has been removed as it is no
+longer needed.
The platform-services
stage of sat bootsys
boot now prompts for confirmation
+of the storage NCN hostnames in addition to the Kubernetes masters and workers.
sat firmware
.cray-sat
container image.sat firmware
command.We released version 2.0.4 of the SAT product in Shasta v1.4.1.
+This version of the SAT product included:
+sat
python package and CLI.sat-podman
wrapper script.The following sections detail the changes in this release.
+Two new commands were added to translate between NIDs and XNames:
+sat nid2xname
sat xname2nid
These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.
+sat swap
where creating the offline port policy failed.sat bootsys shutdown --stage bos-operations
to no longer forcefully
+power off all compute nodes and application nodes using CAPMC when BOS
+sessions complete or time out.sat bootsys boot --stage cabinet-power
.In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.
+This version of the SAT product included the following components:
+sat
python package and CLIIt also added the following new component:
+sat-podman
wrapper scriptThe following sections detail the changes in this release.
+SAT is now packaged and released as an independent product. The product
+deliverable is called a “release distribution”. The release distribution is a
+gzipped tar file containing an install script. This install script loads the
+cray/cray-sat container image into the Docker registry in Nexus and loads the
+cray-sat-podman
RPM into a package repository in Nexus.
In this release, the cray-sat-podman
package is still installed in the master
+and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in
+Shasta v1.5.
The sat
command now runs in a container under Podman. The sat
executable is
+now installed on all nodes in the Kubernetes management cluster (i.e., workers
+and masters). This executable is a wrapper script that starts a SAT container in
+Podman and invokes the sat
Python CLI within that container. The admin can run
+individual sat
commands directly on the master or worker NCNs as before, or
+they can run sat
commands inside the SAT container after using sat bash
to
+enter an interactive shell inside the SAT container.
To view man pages for sat
commands, the user can run sat-man SAT_COMMAND
,
+replacing SAT_COMMAND
with the name of the sat
command. Alternatively,
+the user can enter the sat
container with sat bash
and use the man
command.
sat init
Command and Config File Location ChangeThe default location of the SAT config file has been changed from /etc/sat.toml
+to ~/.config/sat/sat.toml
. A new command, sat init
, has been added that
+initializes a configuration file in the new default directory. This better supports
+individual users on the system who want their own config files.
~/.config/sat
is mounted into the container that runs under Podman, so changes
+are persistent across invocations of the sat
container. If desired, an alternate
+configuration directory can be specified with the SAT_CONFIG_DIR
environment variable.
Additionally, if a config file does not yet exist when a user runs a sat
+command, one is generated automatically.
sat hwinv
Additional functionality has been added to sat hwinv
including:
--list-node-enclosure-power-supplies
option.--list-node-accels
option. The count of
+node accelerators is also included for each node.--list-node-accel-risers
+option. The count of node accelerator risers is also included for each node.--list-node-hsn-nics
+option. The count of HSN NICs is also included for each node.Documentation for these new options has been added to the man page for sat hwinv
.
sat setrev
in S3The sat setrev
and sat showrev
commands now use S3 to store and obtain site
+information, including system name, site name, serial number, install date, and
+system type. Since the information is stored in S3, it will now be consistent
+regardless of the node on which sat
is executed.
As a result of this change, S3 credentials must be configured for SAT. For detailed +instructions, see Generate SAT S3 Credentials.
+sat showrev
sat showrev
now shows product information from the cray-product-catalog
+ConfigMap in Kubernetes.
sat showrev
The output from sat showrev
has also been changed in the following ways:
--docker
and --packages
options were considered misleading and have
+been removed.--local
option.sat cablecheck
The sat cablecheck
command has been removed. To verify that the system’s Slingshot
+network is cabled correctly, admins should now use the show cables
command in the
+Slingshot Topology Tool (STT).
sat swap
Command Compatibility with Next-gen Fabric ControllerThe sat swap
command was added in Shasta v1.3.2. This command used the Fabric
+Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the
+Fabric Controller API, so this command has been rewritten to use the new
+backwards-incompatible API. Usage of the command did not change.
sat bootsys
FunctionalityMuch of the functionality added to sat bootsys
in Shasta v1.3.2 was broken
+by changes introduced in Shasta v1.4, which removed the Ansible inventory
+and playbooks.
The functionality in the platform-services
stage of sat bootsys
has been
+re-implemented to use python directly instead of Ansible. This resulted in
+a more robust procedure with better logging to the sat
log file. Failures
+to stop containers on Kubernetes nodes are handled more gracefully, and
+more information about the containers that failed to stop, including how to
+debug the problem, is included.
Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.
+The following improvements were made to the bos-operations
stage
+of sat bootsys
:
--bos-templates
, and a corresponding config-file
+option, bos_templates
, were added, and the --cle-bos-template
and
+--uan-bos-template
options and their corresponding config file options were
+deprecated.The following functionality has been removed from sat bootsys
:
hsn-bringup
stage of sat bootsys boot
has been removed due to removal
+of the underlying Ansible playbook.bgp-check
stage of sat bootys {boot,shutdown}
has been removed. It is
+now a manual procedure.The location of the sat log file has changed from /var/log/cray/sat.log
to
+/var/log/cray/sat/sat.log
. This change simplifies mounting this file into the
+sat container running under Podman.
Shasta v1.3.2 included version 2.4.0 of the sat
python package and CLI.
The following sections detail the changes in this release.
+sat swap
Command for Switch and Cable ReplacementThe sat switch
command which supported operations for replacing a switch has
+been deprecated and replaced with the sat swap
command, which now supports
+replacing a switch OR cable.
The sat swap switch
command is equivalent to sat switch
. The sat switch
+command will be removed in a future release.
sat bootsys
CommandThe sat bootsys
command now has multiple stages for both the boot
and
+shutdown
actions. Please refer to the “System Power On Procedures” and “System
+Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001)
+for more details on using this command in the context of a full system power off
+and power on.
Shasta v1.3 included version 2.2.3 of the sat
python package and CLI.
This version of the sat
CLI contained the following commands:
auth
bootsys
cablecheck
diag
firmware
hwinv
hwmatch
k8s
linkhealth
sensors
setrev
showrev
status
swap
switch
See the System Admin Toolkit Command Overview +and the table of commands in the SAT Authentication section +of this document for more details on each of these commands.
+ + + + + +SAT provides an automated solution for creating CFS configurations, building +and configuring images in IMS, and creating BOS session templates based on a +given input file which defines how those configurations, images, and session +templates should be created.
+This automated process centers around the sat bootprep
command. Man page
+documentation for sat bootprep
can be viewed similarly to other SAT commands.
ncn-m001# sat-man sat-bootprep
+
sat bootprep
is used to create CFS configurations, build and
+rename IMS images, and create BOS session templates which tie the
+configurations and images together during a BOS session.
sat bootsys
automates several portions of the boot and shutdown processes,
+including (but not limited to) performing BOS operations (such as creating BOS
+sessions), powering on and off cabinets, and checking the state of the system
+prior to shutdown.
The input file provided to sat bootprep
is a YAML-formatted file containing
+information which CFS, IMS, and BOS use to create configurations, images, and
+BOS session templates respectively. Writing and modifying these input files is
+the main task associated with using sat bootprep
. An input file is composed of
+three main sections, one each for configurations, images, and session templates.
+These sections may be specified in any order, and any of the sections may be
+omitted if desired.
The configurations
section begins with a configurations:
key.
---
+configurations:
+
Under this key, the user can list one or more configurations to create. For +each configuration, a name should be given, in addition to the list of layers +which comprise the configuration. Each layer can be defined by a product name +and optionally a version number, or commit hash or branch in the product’s +configuration repository. Alternatively, a layer can be defined by a Git +repository URL directly, along with an associated branch or commit hash.
+When a configuration layer is specified in terms of a product name, the layer +is created in CFS by looking up relevant configuration information (including +the configuration repository and commit information) from the +cray-product-catalog Kubernetes ConfigMap as necessary. A version may be +supplied, but if it is absent, the version is assumed to be the latest version +found in the cray-product-catalog.
+---
+configurations:
+- name: example-configuration
+ layers:
+ - name: example product
+ playbook: example.yml
+ product:
+ name: example
+ version: 1.2.3
+
Alternatively, a configuration layer may be specified by explicitly referencing
+the desired configuration repository, along with the branch containing the
+intended version of the Ansible playbooks. A commit hash may be specified by replacing
+branch
with commit
.
...
+ - name: another example product
+ playbook: another-example.yml
+ git:
+ url: "https://vcs.local/vcs/another-example-config-management.git"
+ branch: main
+ ...
+
When sat bootprep
is run against an input file, a CFS configuration will be
+created corresponding to each configuration in the configurations
section. For
+example, the configuration created from an input file with the layers listed
+above might look something like the following:
{
+ "lastUpdated": "2022-02-07T21:47:49Z",
+ "layers": [
+ {
+ "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "example product",
+ "playbook": "example.yml"
+ },
+ {
+ "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "another example product",
+ "playbook": "another-example.yml"
+ }
+ ],
+ "name": "example-configuration"
+}
+
After specifying configurations, the user may add images to the input file
+which are to be built by IMS. To add an images
section, the user should add
+an images
key.
---
+configurations:
+ ... (omitted for brevity)
+images:
+
Under the images
key, the user may define one or more images to be created in
+a list. Each element of the list defines a separate IMS image to be built and/or
+configured. Images must contain a name, as well as an ims
section containing a
+definition of the image to be built and/or configured. Images may be defined by
+an image recipe, or by a pre-built image. Recipes and pre-built images are
+referred to by their names or IDs in IMS. The ims
section should also contain
+an is_recipe
property, which indicates whether the name or ID refers to an
+image recipe or a pre-built image. Images may also optionally provide a text
+description of the image. This description is not stored or used by sat bootprep
or any CSM services, but is useful for documenting images in the input
+file.
---
+configurations:
+ ... (omitted for brevity)
+images:
+- name: example-compute-image
+ description: >
+ An example compute node image for illustrative purposes.
+ ims:
+ name: example-compute-image-recipe
+ is_recipe: true
+- name: another-example-compute-image
+ description: >
+ Another example compute node image.
+ ims:
+ id: <IMS image UUID>
+ is_recipe: false
+
Images may also contain a configuration
property in their definition, which
+specifies a configuration with which to customize the built image prior to
+booting. If a configuration is specified, then configuration groups must also
+be specified using the configuration_group_names
property.
---
+configurations:
+ ... (omitted for brevity)
+images:
+- name: example-compute-image
+ description: >
+ An example compute node image for illustrative purposes.
+ ims:
+ name: example-compute-image-recipe
+ is_recipe: true
+ configuration: example configuration
+ configuration_group_names:
+ - Compute
+
BOS session templates are the final section of the input file, and are defined
+under the session_templates
key.
---
+configurations:
+ ... (omitted for brevity)
+images:
+ ... (omitted for brevity)
+session_templates:
+
Each session template is defined in terms of its name, an image, a
+configuration, and a set of parameters which can be used to configure the
+session. The name, image, and configuration are specified with their respective
+name
, image
, and configuration
keys. bos_parameters
may also be
+specified; currently, the only setting under bos_parameters
that is supported
+is boot_sets
, which can be used to define boot sets in the BOS session
+template. Each boot set is defined under its own property under boot_sets
, and
+the value of each boot set can contain the following properties, all of
+which are optional:
kernel_parameters
: the parameters passed to the kernel on the command linenetwork
: the network over which the nodes will bootnode_list
: nodes to add to the boot setnode_roles_groups
: HSM roles to add to the boot setnode_groups
: HSM groups to add to the boot setrootfs_provider
: the root file system providerrootfs_provider_passthrough
: parameters to add to the rootfs=
kernel
+parameterThe properties listed previously are the same as the parameters that can be +specified directly through BOS boot sets. More information can be found in the +CSM documentation on session +templates. +Additional properties not listed are passed through to the BOS session template +as written.
+An example session template might look like the following:
+configurations:
+ ... (omitted for brevity)
+images:
+ ... (omitted for brevity)
+session_templates:
+- name: example-session-template
+ image: example-image
+ configuration: example-configuration
+ bos_parameters:
+ boot_sets:
+ example_boot_set:
+ kernel_parameters: ip=dhcp quiet
+ node_list: []
+ rootfs_provider: cpss3
+ rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+
Putting together all of the previous input file sections, an example bootprep input +file might look something like the following.
+---
+configurations:
+- name: cos-config
+ layers:
+ - name: cos-integration-2.2.87
+ playbook: site.yml
+ product:
+ name: cos
+ version: 2.2.87
+ branch: integration
+ - name: cpe-integration-21.12.3
+ playbook: pe_deploy.yml
+ product:
+ name: cpe
+ version: 21.12.3
+ branch: integration
+ - name: slurm-master-1.1.1
+ playbook: site.yml
+ product:
+ name: slurm
+ version: 1.1.1
+ branch: master
+images:
+- name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+ ims:
+ is_recipe: true
+ name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+ configuration: cos-config
+ configuration_group_names:
+ - Compute
+session_templates:
+- name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+ image: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+ configuration: cos-config
+ bos_parameters:
+ boot_sets:
+ compute:
+ kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+ node_roles_groups:
+ - Compute
+
It is possible to create an example bootprep input file using values from the
+system’s product catalog using the sat bootprep generate-example
command.
ncn-m001# sat bootprep generate-example
+INFO: Using latest version (2.3.24-20220113160653) of product cos
+INFO: Using latest version (21.11.4) of product cpe
+INFO: Using latest version (1.0.7) of product slurm
+INFO: Using latest version (1.1.24) of product analytics
+INFO: Using latest version (2.1.5) of product uan
+INFO: Using latest version (21.11.4) of product cpe
+INFO: Using latest version (1.0.7) of product slurm
+INFO: Using latest version (1.1.24) of product analytics
+INFO: Using latest version (2.3.24-20220113160653) of product cos
+INFO: Using latest version (2.1.5) of product uan
+INFO: Wrote example bootprep input file to ./example-bootprep-input.yaml.
+
This file should be reviewed and edited to match the desired parameters of the +configurations, images, and session templates.
+The contents of the YAML input files described above must conform to a schema +which defines the structure of the data. The schema definition is written using +the JSON Schema format. (Although the format is named “JSON Schema”, the schema +itself is written in YAML as well.) More information, including introductory +materials and a formal specification of the JSON Schema metaschema, can be found +on the JSON Schema website.
+To view the exact schema specification, run sat bootprep view-schema
.
ncn-m001# sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft-07/schema"
+title: Bootprep Input File
+description: >
+ A description of the set of CFS configurations to create, the set of IMS
+ images to create and optionally customize with the defined CFS configurations,
+ and the set of BOS session templates to create that reference the defined
+ images and configurations.
+type: object
+additionalProperties: false
+properties:
+ ...
+
The raw schema definition can be difficult to understand without experience +working with JSON Schema specifications. For this reason, a feature was included +which can generate user-friendly HTML documentation for the input file schema +which can be browsed with the user’s preferred web browser.
+Create a documentation tarball using sat bootprep
.
ncn-m001# sat bootprep generate-docs
+INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
+
An alternate output directory can be specified with the --output-dir
+option. The generated tarball is always named bootprep-schema-docs.tar.gz
.
ncn-m001# sat bootprep generate-docs --output-dir /tmp
+INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
+
From another machine, copy the tarball to a local directory.
+another-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz .
+
Extract the contents of the tarball and open the contained index.html
.
another-machine$ tar xzvf bootprep-schema-docs.tar.gz
+x bootprep-schema-docs/
+x bootprep-schema-docs/index.html
+x bootprep-schema-docs/schema_doc.css
+x bootprep-schema-docs/schema_doc.min.js
+another-machine$ open bootprep-schema-docs/index.html
+
The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.
+Grafana can be accessed via web browser at the following URL:
+https://sma-grafana.cmn.<site-domain>
The value of site-domain
can be obtained as follows:
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+ base64 -d | grep "external:"
+
That command will produce the following output, for example:
+ external: EXAMPLE_DOMAIN.com
+
This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com
For additional details about how to access the Grafana Dashboards refer to Access the Grafana Monitoring UI in the +SMA product documentation.
+For more information about the interpretation of metrics for the SAT Grafana Dashboards refer to Fabric Telemetry +Kafka Topics in the SMA product documentation.
+There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.
+Dashboard Name | +Display Type | +
---|---|
Fabric Congestion | +Chart Panels | +
Fabric RFC3635 | +Chart Panels | +
Fabric Errors | +Tabular Format | +
Fabric Port State | +Tabular Format | +
The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.
+Shows the Interval and Locations Options for the available telemetry.
+ +The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.
+For additional information, refer to Grafana Templates and Variables.
+The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.
+The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.
+SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.
+This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.
+The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.
+The port type of a link is reported as a port state “subtype” event when defined at port initialization.
+This dashboard reports error counters in a tabular format in three panels.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.
+The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.
+For additional information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.
+Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.
+ + + + + +Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.
+Kibana can be accessed via web browser at the following URL:
+https://sma-kibana.cmn.<site-domain>
The value of site-domain
can be obtained as follows:
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+ base64 -d | grep "external:"
+
That command will produce the following output, for example:
+ external: EXAMPLE_DOMAIN.com
+
This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com
For additional details about how to access the Kibana Dashboards refer to View Logs Via Kibana in the SMA product +documentation.
+Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and Rasdaemon Kibana Dashboards are included in this +table.
+Dashboard | +Short Description | +Long Description | +Kibana Visualization and Search Name | +
---|---|---|---|
sat-aer | +AER corrected | +Corrected Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-corrected Search: sat-aer-corrected | +
sat-aer | +AER fatal | +Fatal Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-fatal Search: sat-aer-fatal | +
sat-atom | +ATOM failures | +Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. | +sat-atom-failed | +
sat-atom | +ATOM admindown | +Application Task Orchestration and Management test failures can result in nodes being marked admindown. An admindown node is not available for job launch. | +sat-atom-admindown | +
sat-heartbeat | +Heartbeat loss events | +Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. | +sat-heartbeat | +
sat-kernel | +Kernel assertions | +The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. | +sat-kassertions | +
sat-kernel | +Kernel panics | +The kernel panics when something is seriously wrong. The node goes down. | +sat-kernel-panic | +
sat-kernel | +Lustre bugs (LBUGs) | +The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. | +sat-lbug | +
sat-kernel | +CPU stalls | +CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. | +sat-cpu-stall | +
sat-kernel | +Out of memory | +An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. | +sat-oom | +
sat-mce | +MCE | +Machine Check Exceptions (MCE) are errors detected at the processor level. | +sat-mce | +
sat-rasdaemon | +rasdaemon errors | +Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. |
+sat-rasdaemon-error | +
sat-rasdaemon | +rasdaemon messages | +All messages from the rasdaemon service on nodes. |
+sat-rasdaemon | +
By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.
+The Kibana Dashboard should be open on your system.
+Navigate to Management
+Navigate to Advanced Settings in the Kibana section, below the Elastic search section
+Scroll down to the Discover section
+Change Highlight results from on to off
+Click Save to save changes
+The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.
+Go to the dashboard section.
+Select sat-aer dashboard.
+Choose the time range of interest.
+View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.
+The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health +checks and application test failures. Some test failures are of possible interest even though a node is not marked +admindown or otherwise fails. They are of clear interest if a node is marked admindown, and might provide +clues if a node otherwise fails. They might also show application problems.
+HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.
+Go to the dashboard section.
+Select sat-atom dashboard.
+Choose the time range of interest.
+View any nodes marked admindown and any ATOM test failures. These failures occur during health checks and +application test failures. Test failures marked admindown are important to note. View the matching log messages +in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired, +results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.
+The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods +are responsible for monitoring nodes in the system for heartbeat loss.
+Go to the dashboard section.
+Select sat-heartbeat dashboard.
+Choose the time range of interest.
+View the heartbeat loss messages that are logged by the hbtd pods in the system. The hbtd pods are responsible for +monitoring nodes in the system for heartbeat loss.View the matching log messages in the panel.
+The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.
+Go to the dashboard section.
+Select sat-kernel dashboard.
+Choose the time range of interest.
+View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.
+The MCE Dashboard displays CPU detected processor-level hardware errors.
+Go to the dashboard section.
+Select sat-mce dashboard.
+Choose the time range of interest.
+View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.
+The Rasdaemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon
+service on nodes in the system. This service collects all hardware error events reported by the linux kernel, including
+PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages
+presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one
+for only messages of severity “emerg” or “err” and another for all messages from rasdaemon
.
Go to the dashboard section.
+Select sat-rasdaemon dashboard.
+Choose the time range of interest.
+View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.
+Describes how to install the System Admin Toolkit (SAT) product stream.
+...
) in shell output indicate omitted lines.2.2.x
with the version of the SAT product stream
+being installed.Start a typescript.
+The typescript will record the commands and the output from this installation.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
Copy the release distribution gzipped tar file to ncn-m001
.
Unzip and extract the release distribution, 2.2.x
.
ncn-m001# tar -xvzf sat-2.2.x.tar.gz
+
Change directory to the extracted release distribution directory.
+ncn-m001# cd sat-2.2.x
+
Run the installer: install.sh.
+The script produces a lot of output. A successful install ends with “SAT +version 2.2.x has been installed”.
+ncn-m001# ./install.sh
+...
+====> Updating active CFS configurations
+...
+====> SAT version 2.2.x has been installed.
+
Upgrade only: Record the names of the CFS configuration or
+configurations modified by install.sh
.
The install.sh
script attempts to modify any CFS configurations that apply
+to the master management NCNs. During an upgrade, install.sh
will log
+messages indicating the CFS configuration or configurations that were
+modified. For example, if there are three master nodes all using the same
+CFS configuration named “ncn-personalization”, the output would look like
+this:
====> Updating active CFS configurations
+INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0
+INFO: Found configuration "ncn-personalization" for component x3000c0s1b0n0
+INFO: Found configuration "ncn-personalization" for component x3000c0s3b0n0
+INFO: Found configuration "ncn-personalization" for component x3000c0s5b0n0
+INFO: Updating CFS configuration "ncn-personalization"
+INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml in configuration "ncn-personalization".
+INFO: Key "name" in layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-ncn to sat-2.2.16
+INFO: Successfully updated layers in configuration "ncn-personalization"
+
Save the name of each CFS configuration updated by the installer. In the +previous example, a single configuration named “ncn-personalization” was +updated, so that name is saved to a temporary file.
+ncn-m001# echo ncn-personalization >> /tmp/sat-ncn-cfs-configurations.txt
+
Repeat the previous command for each CFS configuration that was updated.
+Upgrade only: Save the new name of the SAT CFS configuration layer.
+In the example install.sh
output above, the new layer name is
+sat-2.2.16
. Save this value to a file to be used later.
ncn-m001# echo sat-2.2.16 > /tmp/sat-layer-name.txt
+
Fresh install only: Save the CFS configuration layer for SAT to a file +for later use.
+The install.sh
script attempts to modify any CFS configurations that apply
+to the master management NCNs. During a fresh install, no such CFS
+configurations will be found, and it will instead log the SAT configuration
+layer that must be added to the CFS configuration that will be created. Here
+is an example of the output in that case:
====> Updating active CFS configurations
+INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, x3000c0s3b0n0, x3000c0s5b0n0
+WARNING: No CFS configurations found that apply to components with role Management and subrole Master.
+INFO: The following sat layer should be used in the CFS configuration that will be applied to NCNs with role Management and subrole Master.
+{
+ "name": "sat-2.2.15",
+ "commit": "9a74b8f5ba499af6fbcecfd2518a40e081312933",
+ "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
+ "playbook": "sat-ncn.yml"
+}
+
Save the JSON output to a file for later use. For example:
+ncn-m001# cat > /tmp/sat-layer.json <<EOF
+> {
+> "name": "sat-2.2.15",
+> "commit": "9a74b8f5ba499af6fbcecfd2518a40e081312933",
+> "cloneUrl": "https://api-gw-service-nmn.local/vcs/cray/sat-config-management.git",
+> "playbook": "sat-ncn.yml"
+> }
+> EOF
+
Do not copy the previous command verbatim. Use the JSON output from the
+install.sh
script.
Optional: Remove the SAT release distribution tar file and extracted directory.
+ncn-m001# rm sat-2.2.x.tar.gz
+ncn-m001# rm -rf sat-2.2.x/
+
Upgrade only: Ensure that the environment variable SAT_TAG
is not set
+in the ~/.bashrc
file on any of the management NCNs.
NOTE: This step should only be required when updating from +Shasta 1.4.1 or Shasta 1.4.2.
+The following example assumes three manager NCNs: ncn-m001
, ncn-m002
, and ncn-m003
,
+and shows output from a system in which no further action is needed.
ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
+ncn-m001: source <(kubectl completion bash)
+ncn-m003: source <(kubectl completion bash)
+ncn-m002: source <(kubectl completion bash)
+
The following example shows that SAT_TAG
is set in ~/.bashrc
on ncn-m002
.
+Remove that line from the ~/.bashrc
file on ncn-m002
.
ncn-m001# pdsh -w ncn-m00[1-3] cat ~/.bashrc
+ncn-m001: source <(kubectl completion bash)
+ncn-m002: source <(kubectl completion bash)
+ncn-m002: export SAT_TAG=3.5.0
+ncn-m003: source <(kubectl completion bash)
+
Stop the typescript.
+NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT install. See Next Steps.
+ncn-m001# exit
+
SAT version 2.2.x
is now installed/upgraded, meaning the SAT 2.2.x
release
+has been loaded into the system software repository.
sat
command won’t be available until the NCN Personalization
+procedure has been executed.If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.
+If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the sections listed below.
+NOTE: The NCN Personalization procedure is required when +upgrading SAT. The setup procedures in SAT Setup, however, are +not required when upgrading SAT. They should have been executed +during the first installation of SAT.
+Execute the NCN Personalization procedure:
+ +If performing a fresh install, execute the SAT Setup procedures:
+ +If performing an upgrade, execute the upgrade procedures:
+ +Describes how to perform NCN personalization using CFS. This personalization process +will configure the System Admin Toolkit (SAT) product stream.
+/tmp/sat-ncn-cfs-configurations.txt
./tmp/sat-layer-name.txt
./tmp/sat-layer.json
....
) in shell output indicate omitted lines.2.2.x
with the version of the SAT product stream
+being installed.Start a typescript if not already using one.
+The typescript will capture the commands and the output from this installation procedure.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
Fresh install only: Add the SAT layer to the NCN personalization JSON file.
+If the SAT install script, install.sh
, did not identify and modify the CFS
+configurations that apply to each master management NCN, it will have printed
+the SAT CFS configuration layer in JSON format. This layer must be added to
+the JSON file being used to construct the CFS configuration. For example,
+if the file being used is named ncn-personalization.json
, and the SAT
+layer was saved to the file /tmp/sat-layer.json
as described in the
+install instructions, the following jq
command will append the SAT layer
+and save the result in a new file named ncn-personalization.json
.
ncn-m001# jq -s '{layers: (.[0].layers + [.[1]])}' ncn-personalization.json \
+ /tmp/sat-layer.json > ncn-personalization.new.json
+
For instructions on how to create a CFS configuration from the previous +file and how to apply it to the management NCNs, refer to “Perform NCN +Personalization” in the HPE Cray System Management Documentation. After +the CFS configuration has been created and applied, return to this +procedure.
+Upgrade only: Invoke each CFS configuration that was updated during the +upgrade.
+If the SAT install script, install.sh
, identified CFS configurations that
+apply to the master management NCNs and modified them in place, invoke each
+CFS configuration that was created or updated during installation.
This step will create a CFS session for each given configuration and install +SAT on the associated manager NCNs.
+The --configuration-limit
option limits the configuration session to run
+only the SAT layer of the configuration.
You should see a representation of the CFS session in the output.
+ncn-m001# for cfs_configuration in $(cat /tmp/sat-ncn-cfs-configurations.txt);
+do cray cfs sessions create --name "sat-session-${cfs_configuration}" --configuration-name \
+ "${cfs_configuration}" --configuration-limit $(cat /tmp/sat-layer-name.txt);
+done
+
+name="sat-session-ncn-personalization"
+
+[ansible]
+...
+
Upgrade only: Monitor the progress of each CFS session.
+This step assumes a single session named sat-session-ncn-personalization
was created in the previous step.
First, list all containers associated with the CFS session:
+ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \
+ -o json | jq '.items[0].spec.containers[] | .name'
+"inventory"
+"ansible-1"
+"istio-proxy"
+
Next, get the logs for the ansible-1
container.
NOTE: the trailing digit might differ from “1”. It is the zero-based
+index of the sat-ncn
layer within the configuration’s layers.
ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
+ --selector=cfsession=sat-session-ncn-personalization
+
Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:
+...
+PLAY RECAP *********************************************************************
+x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+
Execute this step for each unique CFS configuration.
+NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.
+Verify that SAT was successfully configured.
+If sat
is configured, the --version
command will indicate which version
+is installed. If sat
is not properly configured, the command will fail.
NOTE: This version number will differ from the version number of the SAT
+release distribution. This is the semantic version of the sat
Python package,
+which is different from the version number of the overall SAT release distribution.
ncn-m001# sat --version
+sat 3.7.0
+
NOTE: Upon first running sat
, you may see additional output while the sat
+container image is downloaded. This will occur the first time sat
is run on
+each manager NCN. For example, if you run sat
for the first time on ncn-m001
+and then for the first time on ncn-m002
, you will see this additional output
+both times.
Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
+Getting image source signatures
+Copying blob da64e8df3afc done
+Copying blob 0f36fd81d583 done
+Copying blob 12527cf455ba done
+...
+sat 3.7.0
+
Stop the typescript.
+ncn-m001# exit
+
SAT version 2.2.x
is now configured:
If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +to determine which step to execute next.
+If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the remaining SAT Setup or SAT Post-Upgrade procedures.
+If performing a fresh install, execute the SAT Setup procedures:
+ +If performing an upgrade, execute the SAT Post-Upgrade procedures:
+ +Initially, as part of the installation and configuration, SAT authentication is set up so sat commands can be used in
+later steps of the install process. The admin account used to authenticate with sat auth
must be enabled in
+Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see
+Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation.
+For additional information on SAT authentication, see System Security and Authentication in the CSM
+documentation.
NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to
+the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat
commands require S3 to
+be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket,
+the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be
+done on every Kubernetes manager node where SAT commands are run.
Below is a table describing SAT commands and the types of authentication they require.
+SAT Subcommand | +Authentication/Credentials Required | +Man Page | +Description | +
---|---|---|---|
sat auth |
+Responsible for authenticating to the API gateway and storing a token. | +sat-auth |
+Authenticate to the API gateway and save the token. | +
sat bmccreds |
+Requires authentication to the API gateway. | +sat-bmccreds |
+Set BMC passwords. | +
sat bootprep |
+Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is done on ncn-m001 during the install. | +sat-bootprep |
+Prepare to boot nodes with images and configurations. | +
sat bootsys |
+Requires authentication to the API gateway. Requires kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. |
+sat-bootsys |
+Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. | +
sat diag |
+Requires authentication to the API gateway. | +sat-diag |
+Launch diagnostics on the HSN switches and generate a report. | +
sat firmware |
+Requires authentication to the API gateway. | +sat-firmware |
+Report firmware version. | +
sat hwhist |
+Requires authentication to the API gateway. | +sat-hwhist |
+Report hardware component history. | +
sat hwinv |
+Requires authentication to the API gateway. | +sat-hwinv |
+Give a listing of the hardware of the HPE Cray EX system. | +
sat hwmatch |
+Requires authentication to the API gateway. | +sat-hwmatch |
+Report hardware mismatches. | +
sat init |
+None | +sat-init |
+Create a default SAT configuration file. | +
sat k8s |
+Requires kubernetes configuration and authentication, which is automatically configured on ncn-w001 during the install. | +sat-k8s |
+Report on kubernetes replicasets that have co-located replicas (i.e. replicas on the same node). | +
sat linkhealth |
++ | + | This command has been deprecated. | +
sat nid2xname |
+Requires authentication to the API gateway. | +sat-nid2xname |
+Translate node IDs to node xnames. | +
sat sensors |
+Requires authentication to the API gateway. | +sat-sensors |
+Report current sensor data. | +
sat setrev |
+Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-setrev |
+Set HPE Cray EX system revision information. | +
sat showrev |
+Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-showrev |
+Print revision information for the HPE Cray EX system. | +
sat slscheck |
+Requires authentication to the API gateway. | +sat-slscheck |
+Perform a cross-check between SLS and HSM. | +
sat status |
+Requires authentication to the API gateway. | +sat-status |
+Report node status across the HPE Cray EX system. | +
sat swap |
+Requires authentication to the API gateway. | +sat-swap |
+Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. | +
sat xname2nid |
+Requires authentication to the API gateway. | +sat-xname2nid |
+Translate node and node BMC xnames to node IDs. | +
sat switch |
+This command has been deprecated. It has been replaced by sat swap . |
++ | + |
In order to authenticate to the API gateway, you must run the sat auth
command. This command will prompt for a password
+on the command line. The username value is obtained from the following locations, in order of higher precedence to lower
+precedence:
--username
global command-line option.username
option in the api_gateway
section of the config file at ~/.config/sat/sat.toml
.sat
command.If credentials are entered correctly when prompted by sat auth
, a token file will be obtained and saved to
+~/.config/sat/tokens
. Subsequent sat commands will determine the username the same way as sat auth
described above,
+and will use the token for that username if it has been obtained and saved by sat auth
.
sat
CLI has been installed following Install The System Admin Toolkit Product Stream.The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:
+Generate a default SAT configuration file, if one does not exist.
+ncn-m001# sat init
+Configuration file "/root/.config/sat/sat.toml" generated.
+
Note: If the config file already exists, it will print out an error:
+ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
+Not generating configuration file.
+
Edit ~/.config/sat/sat.toml
and set the username option in the api_gateway
section of the config file. E.g.:
username = "crayadmin"
+
Run sat auth
. Enter your password when prompted. E.g.:
ncn-m001# sat auth
+Password for crayadmin:
+Succeeded!
+
Other sat
commands are now authenticated to make requests to the API gateway. E.g.:
ncn-m001# sat status
+
Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT +S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. +This must be done on every Kubernetes master node where SAT commands are run.
+SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev
+(see: Run Sat Setrev to Set System Information).
NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Ensure the files are readable only by root
.
ncn-m001# touch /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
Write the credentials to local files using kubectl
.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.access_key}' | base64 -d > \
+ /root/.config/sat/s3_access_key
+
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.secret_key}' | base64 -d > \
+ /root/.config/sat/s3_secret_key
+
Verify the S3 endpoint specified in the SAT configuration file is correct.
+Get the SAT configuration file’s endpoint value.
+NOTE: If the command’s output is commented out, indicated by an initial #
+character, the SAT configuration will take the default value – "https://rgw-vip.nmn"
.
ncn-m001# grep endpoint ~/.config/sat/sat.toml
+# endpoint = "https://rgw-vip.nmn"
+
Get the sat-s3-credentials
secret’s endpoint value.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
+https://rgw-vip.nmn
+
Compare the two endpoint values.
+If the values differ, change the SAT configuration file’s endpoint value to match the secret’s.
+Copy SAT configurations to each manager node on the system.
+ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
+ mkdir -p /root/.config/sat; \
+ scp -pr /root/.config/sat ${i}:/root/.config; done
+
NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may +be different. This example assumes three manager nodes, where the configuration files must be +copied from ncn-m001 to ncn-m002 and ncn-m003. Therefore, the list of hosts above is ncn-m002 +and ncn-m003.
+NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Run sat setrev
to set System Revision Information. Follow the on-screen prompts to set
+the following site-specific values:
TIP: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. I.e., “System type” is EX-1C.
+ncn-m001# sat setrev
+--------------------------------------------------------------------------------
+Setting: Serial number
+Purpose: System identification. This will affect how snapshots are
+ identified in the HPE backend services.
+Description: This is the top-level serial number which uniquely identifies
+ the system. It can be requested from an HPE representative.
+Valid values: Alpha-numeric string, 4 - 20 characters.
+Type: <class 'str'>
+Default: None
+Current value: None
+--------------------------------------------------------------------------------
+Please do one of the following to set the value of the above setting:
+ - Input a new value
+ - Press CTRL-C to exit
+...
+
Run sat showrev
to verify System Revision Information. The following tables contain example information.
ncn-m001# sat showrev
+################################################################################
+System Revision Information
+################################################################################
++---------------------+---------------+
+| component | data |
++---------------------+---------------+
+| Company name | HPE |
+| Country code | US |
+| Interconnect | Sling |
+| Product number | R4K98A |
+| Serial number | 12345 |
+| Site name | HPE |
+| Slurm version | slurm 20.02.5 |
+| System description | Test System |
+| System install date | 2021-01-29 |
+| System name | eniac |
+| System type | EX-1C |
++---------------------+---------------+
+################################################################################
+Product Revision Information
+################################################################################
++--------------+-----------------+------------------------------+------------------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+------------------------------+------------------------------+
+| csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
+| sat | 2.0.1 | - | - |
+| sdu | 1.0.8 | - | - |
+| slingshot | 0.8.0 | - | - |
+| sma | 1.4.12 | - | - |
++--------------+-----------------+------------------------------+------------------------------+
+################################################################################
+Local Host Operating System
+################################################################################
++-----------+----------------------+
+| component | version |
++-----------+----------------------+
+| Kernel | 5.3.18-24.15-default |
+| SLES | SLES 15-SP2 |
++-----------+----------------------+
+
After upgrading SAT, if using the configuration file from a previous version, there may be
+configuration file sections no longer used in the new version. For example, when upgrading
+from Shasta 1.4 to Shasta 1.5, the [redfish]
configuration file section is no longer used.
+In that case, the following warning may appear upon running sat
commands.
WARNING: Ignoring unknown section 'redfish' in config file.
+
Remove the [redfish]
section from /root/.config/sat/sat.toml
to resolve the warning.
[redfish]
+username = "admin"
+password = "adminpass"
+
Repeat this process for any configuration file sections for which there are “unknown section” warnings.
+As of SAT version 2.2, some command output that was previously printed to stdout
+is now logged to stderr
. These messages are logged at the INFO
level. The
+default logging threshold was changed from WARNING
to INFO
to accomodate
+this logging change. Additionally, some messages previously logged at the INFO
+are now logged at the DEBUG
level.
These changes take effect automatically. However, if the default output threshold
+has been manually set in ~/.config/sat/sat.toml
, it should be changed to ensure
+that important output is shown in the terminal.
In the following example, the stderr log level, logging.stderr_level
, is set to
+WARNING
, which will exclude INFO
-level logging from terminal output.
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+
To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.
+If logging.stderr_level
is commented out, its value will not affect logging
+behavior. However, it may be helpful set its value to INFO
as a reminder of
+the new default behavior.
The following commands trigger messages that have been changed from stdout
+print calls to INFO
-level (or WARNING
- or ERROR
-level) log messages:
sat bootsys --stage shutdown --stage session-checks
+sat sensors
+
The following commands trigger messages that have been changed from INFO
-level
+log messages to DEBUG
-level log messages:
sat nid2xname
+sat xname2nid
+sat swap
+
prodmgr
.prodmgr
command is available.Use sat showrev
to list versions of SAT.
NOTE: It is not recommended to uninstall a version designated as “active”. +If the active version is uninstalled, then the activate procedure must be executed +on a remaining version.
+ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+-------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+-------------------+-----------------------+
+| sat | 2.3.3 | True | - | - |
+| sat | 2.2.10 | False | - | - |
++--------------+-----------------+--------+-------------------+-----------------------+
+
Use prodmgr
to uninstall a version of SAT.
This command will do three things:
+cray-product-catalog
Kubernetes ConfigMap, so that it will no longer show up
+in the output of sat showrev
.ncn-m001# prodmgr uninstall sat 2.2.10
+Repository sat-2.2.10-sle-15sp2 has been removed.
+Removed Docker image cray/cray-sat:3.9.0
+Removed Docker image cray/sat-cfs-install:1.0.2
+Removed Docker image cray/sat-install-utility:1.4.0
+Deleted sat-2.2.10 from product catalog.
+
This procedure can be used to downgrade the active version of SAT.
+prodmgr
command is available.Use sat showrev
to list versions of SAT.
ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+--------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+--------------------+-----------------------+
+| sat | 2.3.3 | True | - | - |
+| sat | 2.2.10 | False | - | - |
++--------------+-----------------+--------+--------------------+-----------------------+
+
Use prodmgr
to activate a different version of SAT.
This command will do three things:
+2.2.10
+sets the repository sat-2.2.10-sle-15sp2
as the only member of the sat-sle-15sp2
group.2.2.10
as active within the product catalog, so that it appears active in the output of
+sat showrev
.ncn-personalization
). Specifically, it will ensure that the layer refers to the version of SAT CFS
+configuration content associated with the version of SAT being activated.ncn-m001# prodmgr activate sat 2.2.10
+Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2.
+Set sat-2.2.10 as active in product catalog.
+Updated CFS configurations: [ncn-personalization]
+
Verify that the chosen version is marked as active.
+ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+--------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+--------------------+-----------------------+
+| sat | 2.3.3 | False | - | - |
+| sat | 2.2.10 | True | - | - |
++--------------+-----------------+--------+--------------------+-----------------------+
+
Run NCN Personalization.
+At this point, the command has modified Nexus package repositories to set a particular package repository
+as active, but no packages on the NCNs have been changed. In order to complete the activation process,
+NCN Personalization must be executed to change the cray-sat-podman
package version on the manager NCNs.
NOTE: Refer to the command output from step 2 for the names of all CFS configurations that were updated,
+which may not necessarily be just ncn-personalization
. If multiple configurations were updated in step 2, then
+a cray cfs sessions create
command should be run for each of them. This example assumes a single configuration
+named ncn-personalization
was updated. If multiple were updated, set cfs_configurations
to a space-separated
+list below.
ncn-m001# cfs_configurations="ncn-personalization"
+ncn-m001# for cfs_configuration in ${cfs_configurations}
+do cray cfs sessions create --name "sat-session-${cfs_configuration}" --configuration-name \
+ "${cfs_configuration}" --configuration-limit sat-ncn;
+done
+
Monitor the progress of each CFS session.
+This step assumes a single session named sat-session-ncn-personalization
was created in the previous step.
First, list all containers associated with the CFS session:
+ncn-m001# kubectl get pod -n services --selector=cfsession=sat-session-ncn-personalization \
+ -o json | jq '.items[0].spec.containers[] | .name'
+"inventory"
+"ansible-1"
+"istio-proxy"
+
Next, get the logs for the ansible-1
container.
NOTE: the trailing digit might differ from “1”. It is the zero-based
+index of the sat-ncn
layer within the configuration’s layers.
ncn-m001# kubectl logs -c ansible-1 --tail 100 -f -n services \
+ --selector=cfsession=sat-session-ncn-personalization
+
Ansible plays, which are run by the CFS session, will install SAT on all the +manager NCNs on the system. Successful results for all of the manager NCN xnames +can be found at the end of the container log. For example:
+...
+PLAY RECAP *********************************************************************
+x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+
Execute this step for each unique CFS configuration.
+NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.
+Verify the new version of the SAT CLI.
+NOTE: This version number will differ from the version number of the SAT +release distribution. This is the semantic version of the SAT Python package, +which is different from the version number of the overall SAT release distribution.
+ncn-m001# sat --version
+3.9.0
+
The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.
+SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt commands +used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.
+Six Kibana Dashboards are included with SAT. They provide organized output for system health information.
+Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.
+SAT is installed as a separate product as part of the HPE Cray EX System base installation.
+Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.
+The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes
+(ncn-m
nodes).
It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the +HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are +similarities between SAT commands and xt commands used on the Cray XC platform.
+The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.
+The sat
command-line utility runs in a container using podman, a daemonless container runtime. SAT runs on Kubernetes
+manager nodes. A few important points about the SAT container environment include the following:
sat
or sat bash
always launches a container.There are two ways to run sat.
+sat bash
, followed by a sat
command.sat
command directly on a Kubernetes manager node.In both of these cases, a container is launched in the background to execute the command. The first option, running
+sat bash
first, gives an interactive shell, at which point sat
commands can be run. In the second option, the
+container is launched, executes the command, and upon the command’s completion the container exits. The following two
+examples show the same action, checking the system status, using interactive and non-interactive modes.
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+
ncn-m001# sat status
+
Running sat
using the interactive command prompt gives the ability to read and write local files on ephemeral
+container storage. If multiple sat
commands are being run in succession, then use sat bash to launch the
+container beforehand. This will save time because the container does not need to be launched for each sat
command.
The non-interactive mode is useful if calling sat
with a script, or when running a single sat
command as a part of
+several steps that need to be executed from a management NCN.
To view a sat
man page from a Kubernetes manager node, use sat-man
on the manager node as shown in the following
+example.
ncn-m001# sat-man status
+
A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed
+either with man sat
or man sat-podman
from the manager node.
ncn-m001# man sat
+
ncn-m001# man sat-podman
+
The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.
+root
or super-user account always has the #
character at the end of the prompt and has the host name of the
+host in the prompt.root
account is indicated with account@hostname>. A user account that is neither root
nor crayadm
is
+referred to as user
.Command Prompt | +Meaning | +
---|---|
ncn-m001# |
+Run on one of the Kubernetes Manager servers. (Non-interactive) | +
(CONTAINER_ID) sat-container# |
+Run the command inside the SAT container environment by first running sat bash . (Interactive) |
+
Examples of the sat status
command used by an administrator:
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+
Most sat
subcommands depend on services or components from other products in the
+HPE Cray EX (Shasta) software stack. The following list shows these dependencies
+for each subcommand. Each service or component is listed under the product it belongs to.
sat auth
sat bmccreds
sat bootprep
sat bootsys
sat diag
sat firmware
sat hwhist
sat hwinv
sat hwmatch
sat init
None
+sat k8s
sat nid2xname
sat sensors
sat setrev
sat showrev
sat slscheck
sat status
sat swap
sat switch
Deprecated: See sat swap
sat xname2nid
The 2.3.4 version of the SAT product includes:
+sat
CommandsNone.
+When running sat
commands, the current working directory is now mounted in the
+container as /sat/share
, and the current working directory within the container
+is also /sat/share
.
Files in the current working directory must be specified using relative paths to
+that directory, because the current working directory is always mounted on /sat/share
.
+Absolute paths should be avoided, and paths that are outside of $HOME
or $PWD
+are never accessible to the container environment.
The home directory is still mounted on the same path inside the container as it +is on the host.
+sat bootsys
The following options were added to sat bootsys
.
--bos-limit
--recursive
The --bos-limit
option passes a given limit string to a BOS session. The --recursive
+option specifies a slot or other higher-level component in the limit string
sat bootprep
The --delete-ims-jobs
option was added to sat bootprep run
. It deletes IMS
+jobs after sat bootprep
is run. Jobs are no longer deleted by default.
sat status
sat status
now includes information about nodes’ CFS configuration statuses, such
+as desired configuration, configuration status, and error count.
The output of sat status
now splits different component types into different report tables.
The following options were added to sat status
.
--hsm-fields
, --sls-fields
, --cfs-fields
--bos-template
The --hsm-fields
, --sls-fields
, --cfs-fields
options limit the output columns
+according to specified CSM services.
The --bos-template
option filters the status report according to the specified
+session template’s boot sets.
The following components were modified to be compatible with CSM 1.2.
+sat-cfs-install
container image and Helm chartsat-install-utility
container imageThe sat-ncn
ansible role provided by sat-cfs-install
was modified to enable
+GPG checks on packages while leaving GPG checks disabled on repository metadata.
Updated urllib3 dependency to version 1.26.5 to mitigate CVE-2021-33503 and refreshed +Python dependency versions.
+Minor bug fixes were made in each of the repositories. For full change lists, see each +repository’s CHANGELOG.md file.
+The known issues listed under the SAT 2.2 release were fixed.
+SAT 2.2.16 was released on February 25th, 2022.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptsat-cfs-install
container image and Helm chartIt also added the following new components:
+sat-install-utility
container imagecfs-config-util
container imageThe following sections detail the changes in this release.
+sat
command unavailable in sat bash
shellAfter launching a shell within the SAT container with sat bash
, the sat
command will not
+be found. For example:
(CONTAINER-ID) sat-container:~ # sat status
+bash: sat: command not found
+
This can be resolved temporarily in one of two ways. /sat/venv/bin/
may be prepended to the
+$PATH
environment variable:
(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH
+(CONTAINER-ID) sat-container:~ # sat status
+
Or, the file /sat/venv/bin/activate
may be sourced:
(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate
+(CONTAINER-ID) sat-container:~ # sat status
+
sat bash
shellAfter launching a shell within the SAT container with sat bash
, tab completion for sat
+commands does not work.
This can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash
:
source /etc/bash_completion.d/sat-completion.bash
+
sat
in root directorysat
commands will not work if the current directory is /
. For example:
ncn-m001:/ # sat --help
+Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+
To resolve, run sat
in another directory.
sat
in config directorysat
commands will not work if the current directory is ~/.config/sat
. For example:
ncn-m001:~/.config/sat # sat --help
+Error: /root/.config/sat: duplicate mount destination
+
To resolve, run sat
in another directory.
sat
commandssat bootprep
automates the creation of CFS configurations, the build and
+customization of IMS images, and the creation of BOS session templates. See
+SAT Bootprep for details.sat slscheck
performs a check for consistency between the System Layout
+Service (SLS) and the Hardware State Manager (HSM).sat bmccreds
provides a simple interface for interacting with the System
+Configuration Service (SCSD) to set BMC Redfish credentials.sat hwhist
displays hardware component history by xname (location) or by
+its Field-Replaceable Unit ID (FRUID). This command queries the Hardware
+State Manager (HSM) API to obtain this information. Since the sat hwhist
+command supports querying for the history of a component by its FRUID, the
+FRUID of components has been added to the output of sat hwinv
.The following automation has been added to the install script, install.sh
:
sat-config-import
Kubernetes job, which is
+started when the sat-cfs-install Helm chart is deployed.The SAT product uploads additional information to the cray-product-catalog
+Kubernetes ConfigMap detailing the components it provides, including container
+(Docker) images, Helm charts, RPMs, and package repositories.
This information is used to support uninstall and activation of SAT product +versions moving forward.
+Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and activation of the SAT product stream.
+See Uninstall: Removing a Version of SAT +and Activate: Switching Between Versions +for details.
+sat status
A Subrole
column has been added to the output of sat status
. This allows you
+to easily differentiate between master, worker, and storage nodes in the
+management role, for example.
Hostname information from SLS has been added to sat status
output.
Support for JSON-formatted output has been added to commands which currently
+support the --format
option, such as hwinv
, status
, and showrev
.
Many usability improvements have been made to multiple sat
commands,
+mostly related to filtering command output. The following are some highlights:
--fields
option to display only specific fields for subcommands which
+display tabular reports.--filter
queries
+so that the first match is used, similar to --sort-by
.--filter
, --fields
, and --reverse
for summaries
+displayed by sat hwinv
.sat hwinv
.The default log level for stderr has been changed from “WARNING” to “INFO”. For +details, see SAT Logging.
+With the command-line options --loglevel-stderr
and --loglevel-file
, the log level
+can now be configured separately for stderr and the log file.
The existing --loglevel
option is now an alias for the --loglevel-stderr
option.
The Podman wrapper script is the script installed at /usr/bin/sat
on the
+master management NCNs by the cray-sat-podman
RPM that runs the cray-sat
+container in podman
. The following subsections detail improvements that were
+made to the wrapper script in this release.
The Podman wrapper script that launches the cray-sat
container with podman
+has been modified to mount the user’s current directory and home directory into
+the cray-sat
container to provide access to local files in the container.
The man page for the Podman wrapper script, which is accessed by typing man sat
on a master management NCN, has been improved to document the following:
Fixed issues with redirecting stdout and stderr, and piping output to commands,
+such as awk
, less
, and more
.
A new sat
option has been added to configure the HTTP timeout length for
+requests to the API gateway. See sat-man sat
for details.
sat bootsys
ImprovementsMany improvements and fixes have been made to sat bootsys
. The following are some
+highlights:
--excluded-ncns
option, which can be used to omit NCNs
+from the platform-services
and ncn-power
stages in case they are
+inaccessible.sat bootsys shutdown
now prompt the user to
+continue before proceeding. A new option, --disruptive
, will bypass this.platform-services
+stage of sat bootsys boot
.sat xname2nid
Improvementssat xname2nid
can now recursively expand slot, chassis, and cabinet xnames to
+a list of nids in those locations.
A new --format
option has been added to sat xname2nid
. It sets the output format to
+either “range” (the default) or “nid”. The “range” format displays nids in a
+compressed range format suitable for use with a workload manager like Slurm.
The commands which interact with HSM (e.g., sat status
and sat hwinv
) now
+use the v2 HSM API.
sat diag
Limited to HSN Switchessat diag
will now only operate against HSN switches by default. These are the
+only controllers that support running diagnostics with HMJTD.
sat showrev
EnhancementsA column has been added to the output of sat showrev
that indicates whether a
+product version is “active”. The definition of “active” varies across products,
+and not all products may set an “active” version.
For SAT, the active version is the one with its hosted-type package repository in
+Nexus set as the member of the group-type package repository in Nexus,
+meaning that it will be used when installing the cray-sat-podman
RPM.
cray-sat
Container Image Size ReductionThe size of the cray-sat
container image has been approximately cut in half by
+leveraging multi-stage builds. This also improved the repeatability of the unit
+tests by running them in the container.
Minor bug fixes were made in cray-sat
and in cray-sat-podman
. For full change lists,
+see each repository’s CHANGELOG.md
file.
We released version 2.1.16 of the SAT product in Shasta v1.5.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptIt also added the following new component:
+sat-cfs-install
docker image and helm chartThe following sections detail the changes in this release.
+This release further decouples the installation of the SAT product from the CSM
+product. The cray-sat-podman
RPM is no longer installed in the management
+non-compute node (NCN) image. Instead, the cray-sat-podman
RPM is installed on
+all master management NCNs via an Ansible playbook which is referenced by a
+layer of the CFS configuration that applies to management NCNs. This CFS
+configuration is typically named “ncn-personalization”.
The SAT product now includes a Docker image and a Helm chart named
+sat-cfs-install
. The SAT install script, install.sh
, deploys the Helm chart
+with Loftsman. This helm chart deploys a Kubernetes job that imports the
+SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management
.
+This repository is referenced by the layer added to the NCN personalization
+CFS configuration.
All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:
+sat sensors
sat diag
sat linkhealth
The sat sensors
command has been rewritten to use the SMA telemetry API to
+obtain the latest sensor values. The command’s usage has changed slightly, but
+legacy options work as before, so it is backwards compatible. Additionally, new
+commands have been added.
The sat diag
command has been rewritten to use a new service called Fox, which
+is delivered with the CSM-diags product. The sat diag
command now launches
+diagnostics using the Fox service, which launches the corresponding diagnostic
+executables on controllers using the Hardware Management Job and Task Daemon
+(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start
+diagnostics over Redfish.
The sat linkhealth
command has been removed. Its functionality has been
+replaced by functionality from the Slingshot Topology Tool (STT) in the
+fabric manager pod.
The Redfish username and password command line options and config file options +have been removed. For further instructions, see Remove Obsolete Configuration +File Sections.
+sat setrev
and sat showrev
sat setrev
now collects the following information from the admin, which is then displayed by sat showrev
:
Additional guidance and validation has been added to each field collected by
+sat setrev
. This sets the stage for sdu setup
to stop collecting this
+information and instead collect it from sat showrev
or its S3 bucket.
sat bootsys
The platform-services
stage of the sat bootsys boot
command has been
+improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph
+health in the correct order. The ceph-check
stage has been removed as it is no
+longer needed.
The platform-services
stage of sat bootsys
boot now prompts for confirmation
+of the storage NCN hostnames in addition to the Kubernetes masters and workers.
sat firmware
.cray-sat
container image.sat firmware
command.We released version 2.0.4 of the SAT product in Shasta v1.4.1.
+This version of the SAT product included:
+sat
python package and CLI.sat-podman
wrapper script.The following sections detail the changes in this release.
+Two new commands were added to translate between NIDs and XNames:
+sat nid2xname
sat xname2nid
These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.
+sat swap
where creating the offline port policy failed.sat bootsys shutdown --stage bos-operations
to no longer forcefully
+power off all compute nodes and application nodes using CAPMC when BOS
+sessions complete or time out.sat bootsys boot --stage cabinet-power
.In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.
+This version of the SAT product included the following components:
+sat
python package and CLIIt also added the following new component:
+sat-podman
wrapper scriptThe following sections detail the changes in this release.
+SAT is now packaged and released as an independent product. The product
+deliverable is called a “release distribution”. The release distribution is a
+gzipped tar file containing an install script. This install script loads the
+cray/cray-sat container image into the Docker registry in Nexus and loads the
+cray-sat-podman
RPM into a package repository in Nexus.
In this release, the cray-sat-podman
package is still installed in the master
+and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in
+Shasta v1.5.
The sat
command now runs in a container under Podman. The sat
executable is
+now installed on all nodes in the Kubernetes management cluster (i.e., workers
+and masters). This executable is a wrapper script that starts a SAT container in
+Podman and invokes the sat
Python CLI within that container. The admin can run
+individual sat
commands directly on the master or worker NCNs as before, or
+they can run sat
commands inside the SAT container after using sat bash
to
+enter an interactive shell inside the SAT container.
To view man pages for sat
commands, the user can run sat-man SAT_COMMAND
,
+replacing SAT_COMMAND
with the name of the sat
command. Alternatively,
+the user can enter the sat
container with sat bash
and use the man
command.
sat init
Command and Config File Location ChangeThe default location of the SAT config file has been changed from /etc/sat.toml
+to ~/.config/sat/sat.toml
. A new command, sat init
, has been added that
+initializes a configuration file in the new default directory. This better supports
+individual users on the system who want their own config files.
~/.config/sat
is mounted into the container that runs under Podman, so changes
+are persistent across invocations of the sat
container. If desired, an alternate
+configuration directory can be specified with the SAT_CONFIG_DIR
environment variable.
Additionally, if a config file does not yet exist when a user runs a sat
+command, one is generated automatically.
sat hwinv
Additional functionality has been added to sat hwinv
including:
--list-node-enclosure-power-supplies
option.--list-node-accels
option. The count of
+node accelerators is also included for each node.--list-node-accel-risers
+option. The count of node accelerator risers is also included for each node.--list-node-hsn-nics
+option. The count of HSN NICs is also included for each node.Documentation for these new options has been added to the man page for sat hwinv
.
sat setrev
in S3The sat setrev
and sat showrev
commands now use S3 to store and obtain site
+information, including system name, site name, serial number, install date, and
+system type. Since the information is stored in S3, it will now be consistent
+regardless of the node on which sat
is executed.
As a result of this change, S3 credentials must be configured for SAT. For detailed +instructions, see Generate SAT S3 Credentials.
+sat showrev
sat showrev
now shows product information from the cray-product-catalog
+ConfigMap in Kubernetes.
sat showrev
The output from sat showrev
has also been changed in the following ways:
--docker
and --packages
options were considered misleading and have
+been removed.--local
option.sat cablecheck
The sat cablecheck
command has been removed. To verify that the system’s Slingshot
+network is cabled correctly, admins should now use the show cables
command in the
+Slingshot Topology Tool (STT).
sat swap
Command Compatibility with Next-gen Fabric ControllerThe sat swap
command was added in Shasta v1.3.2. This command used the Fabric
+Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the
+Fabric Controller API, so this command has been rewritten to use the new
+backwards-incompatible API. Usage of the command did not change.
sat bootsys
FunctionalityMuch of the functionality added to sat bootsys
in Shasta v1.3.2 was broken
+by changes introduced in Shasta v1.4, which removed the Ansible inventory
+and playbooks.
The functionality in the platform-services
stage of sat bootsys
has been
+re-implemented to use python directly instead of Ansible. This resulted in
+a more robust procedure with better logging to the sat
log file. Failures
+to stop containers on Kubernetes nodes are handled more gracefully, and
+more information about the containers that failed to stop, including how to
+debug the problem, is included.
Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.
+The following improvements were made to the bos-operations
stage
+of sat bootsys
:
--bos-templates
, and a corresponding config-file
+option, bos_templates
, were added, and the --cle-bos-template
and
+--uan-bos-template
options and their corresponding config file options were
+deprecated.The following functionality has been removed from sat bootsys
:
hsn-bringup
stage of sat bootsys boot
has been removed due to removal
+of the underlying Ansible playbook.bgp-check
stage of sat bootys {boot,shutdown}
has been removed. It is
+now a manual procedure.The location of the sat log file has changed from /var/log/cray/sat.log
to
+/var/log/cray/sat/sat.log
. This change simplifies mounting this file into the
+sat container running under Podman.
Shasta v1.3.2 included version 2.4.0 of the sat
python package and CLI.
The following sections detail the changes in this release.
+sat swap
Command for Switch and Cable ReplacementThe sat switch
command which supported operations for replacing a switch has
+been deprecated and replaced with the sat swap
command, which now supports
+replacing a switch OR cable.
The sat swap switch
command is equivalent to sat switch
. The sat switch
+command will be removed in a future release.
sat bootsys
CommandThe sat bootsys
command now has multiple stages for both the boot
and
+shutdown
actions. Please refer to the “System Power On Procedures” and “System
+Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001)
+for more details on using this command in the context of a full system power off
+and power on.
Shasta v1.3 included version 2.2.3 of the sat
python package and CLI.
This version of the sat
CLI contained the following commands:
auth
bootsys
cablecheck
diag
firmware
hwinv
hwmatch
k8s
linkhealth
sensors
setrev
showrev
status
swap
switch
See the System Admin Toolkit Command Overview +and the table of commands in the SAT Authentication section +of this document for more details on each of these commands.
+ + + + + +SAT provides an automated solution for creating CFS configurations, building +and configuring images in IMS, and creating BOS session templates based on a +given input file which defines how those configurations, images, and session +templates should be created.
+This automated process centers around the sat bootprep
command. Man page
+documentation for sat bootprep
can be viewed similarly to other SAT commands.
ncn-m001# sat-man sat-bootprep
+
sat bootprep
is used to create CFS configurations, build and
+rename IMS images, and create BOS session templates which tie the
+configurations and images together during a BOS session.
sat bootsys
automates several portions of the boot and shutdown processes,
+including (but not limited to) performing BOS operations (such as creating BOS
+sessions), powering on and off cabinets, and checking the state of the system
+prior to shutdown.
The input file provided to sat bootprep
is a YAML-formatted file containing
+information which CFS, IMS, and BOS use to create configurations, images, and
+BOS session templates respectively. Writing and modifying these input files is
+the main task associated with using sat bootprep
. An input file is composed of
+three main sections, one each for configurations, images, and session templates.
+These sections may be specified in any order, and any of the sections may be
+omitted if desired.
The configurations
section begins with a configurations:
key.
---
+configurations:
+
Under this key, the user can list one or more configurations to create. For +each configuration, a name should be given, in addition to the list of layers +which comprise the configuration. Each layer can be defined by a product name +and optionally a version number, or commit hash or branch in the product’s +configuration repository. Alternatively, a layer can be defined by a Git +repository URL directly, along with an associated branch or commit hash.
+When a configuration layer is specified in terms of a product name, the layer +is created in CFS by looking up relevant configuration information (including +the configuration repository and commit information) from the +cray-product-catalog Kubernetes ConfigMap as necessary. A version may be +supplied, but if it is absent, the version is assumed to be the latest version +found in the cray-product-catalog.
+---
+configurations:
+- name: example-configuration
+ layers:
+ - name: example product
+ playbook: example.yml
+ product:
+ name: example
+ version: 1.2.3
+
Alternatively, a configuration layer may be specified by explicitly referencing
+the desired configuration repository, along with the branch containing the
+intended version of the Ansible playbooks. A commit hash may be specified by replacing
+branch
with commit
.
...
+ - name: another example product
+ playbook: another-example.yml
+ git:
+ url: "https://vcs.local/vcs/another-example-config-management.git"
+ branch: main
+ ...
+
When sat bootprep
is run against an input file, a CFS configuration will be
+created corresponding to each configuration in the configurations
section. For
+example, the configuration created from an input file with the layers listed
+above might look something like the following:
{
+ "lastUpdated": "2022-02-07T21:47:49Z",
+ "layers": [
+ {
+ "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "example product",
+ "playbook": "example.yml"
+ },
+ {
+ "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "another example product",
+ "playbook": "another-example.yml"
+ }
+ ],
+ "name": "example-configuration"
+}
+
After specifying configurations, the user may add images to the input file
+which are to be built by IMS. To add an images
section, the user should add
+an images
key.
---
+configurations:
+ ... (omitted for brevity)
+images:
+
Under the images
key, the user may define one or more images to be created in
+a list. Each element of the list defines a separate IMS image to be built and/or
+configured. Images must contain a name, as well as an ims
section containing a
+definition of the image to be built and/or configured. Images may be defined by
+an image recipe, or by a pre-built image. Recipes and pre-built images are
+referred to by their names or IDs in IMS. The ims
section should also contain
+an is_recipe
property, which indicates whether the name or ID refers to an
+image recipe or a pre-built image. Images may also optionally provide a text
+description of the image. This description is not stored or used by sat bootprep
or any CSM services, but is useful for documenting images in the input
+file.
---
+configurations:
+ ... (omitted for brevity)
+images:
+- name: example-compute-image
+ description: >
+ An example compute node image for illustrative purposes.
+ ims:
+ name: example-compute-image-recipe
+ is_recipe: true
+- name: another-example-compute-image
+ description: >
+ Another example compute node image.
+ ims:
+ id: <IMS image UUID>
+ is_recipe: false
+
Images may also contain a configuration
property in their definition, which
+specifies a configuration with which to customize the built image prior to
+booting. If a configuration is specified, then configuration groups must also
+be specified using the configuration_group_names
property.
---
+configurations:
+ ... (omitted for brevity)
+images:
+- name: example-compute-image
+ description: >
+ An example compute node image for illustrative purposes.
+ ims:
+ name: example-compute-image-recipe
+ is_recipe: true
+ configuration: example configuration
+ configuration_group_names:
+ - Compute
+
BOS session templates are the final section of the input file, and are defined
+under the session_templates
key.
---
+configurations:
+ ... (omitted for brevity)
+images:
+ ... (omitted for brevity)
+session_templates:
+
Each session template is defined in terms of its name, an image, a
+configuration, and a set of parameters which can be used to configure the
+session. The name, image, and configuration are specified with their respective
+name
, image
, and configuration
keys. bos_parameters
may also be
+specified; currently, the only setting under bos_parameters
that is supported
+is boot_sets
, which can be used to define boot sets in the BOS session
+template. Each boot set is defined under its own property under boot_sets
, and
+the value of each boot set can contain the following properties, all of
+which are optional:
kernel_parameters
: the parameters passed to the kernel on the command linenetwork
: the network over which the nodes will bootnode_list
: nodes to add to the boot setnode_roles_groups
: HSM roles to add to the boot setnode_groups
: HSM groups to add to the boot setrootfs_provider
: the root file system providerrootfs_provider_passthrough
: parameters to add to the rootfs=
kernel
+parameterThe properties listed previously are the same as the parameters that can be +specified directly through BOS boot sets. More information can be found in the +CSM documentation on session +templates. +Additional properties not listed are passed through to the BOS session template +as written.
+An example session template might look like the following:
+configurations:
+ ... (omitted for brevity)
+images:
+ ... (omitted for brevity)
+session_templates:
+- name: example-session-template
+ image: example-image
+ configuration: example-configuration
+ bos_parameters:
+ boot_sets:
+ example_boot_set:
+ kernel_parameters: ip=dhcp quiet
+ node_list: []
+ rootfs_provider: cpss3
+ rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+
Putting together all of the previous input file sections, an example bootprep input +file might look something like the following.
+---
+configurations:
+- name: cos-config
+ layers:
+ - name: cos-integration-2.2.87
+ playbook: site.yml
+ product:
+ name: cos
+ version: 2.2.87
+ branch: integration
+ - name: cpe-integration-21.12.3
+ playbook: pe_deploy.yml
+ product:
+ name: cpe
+ version: 21.12.3
+ branch: integration
+ - name: slurm-master-1.1.1
+ playbook: site.yml
+ product:
+ name: slurm
+ version: 1.1.1
+ branch: master
+images:
+- name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+ ims:
+ is_recipe: true
+ name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+ configuration: cos-config
+ configuration_group_names:
+ - Compute
+session_templates:
+- name: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+ image: cray-shasta-compute-sles15sp3.x86_64-2.2.35
+ configuration: cos-config
+ bos_parameters:
+ boot_sets:
+ compute:
+ kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+ node_roles_groups:
+ - Compute
+
It is possible to create an example bootprep input file using values from the
+system’s product catalog using the sat bootprep generate-example
command.
ncn-m001# sat bootprep generate-example
+INFO: Using latest version (2.3.24-20220113160653) of product cos
+INFO: Using latest version (21.11.4) of product cpe
+INFO: Using latest version (1.0.7) of product slurm
+INFO: Using latest version (1.1.24) of product analytics
+INFO: Using latest version (2.1.5) of product uan
+INFO: Using latest version (21.11.4) of product cpe
+INFO: Using latest version (1.0.7) of product slurm
+INFO: Using latest version (1.1.24) of product analytics
+INFO: Using latest version (2.3.24-20220113160653) of product cos
+INFO: Using latest version (2.1.5) of product uan
+INFO: Wrote example bootprep input file to ./example-bootprep-input.yaml.
+
This file should be reviewed and edited to match the desired parameters of the +configurations, images, and session templates.
+The contents of the YAML input files described above must conform to a schema +which defines the structure of the data. The schema definition is written using +the JSON Schema format. (Although the format is named “JSON Schema”, the schema +itself is written in YAML as well.) More information, including introductory +materials and a formal specification of the JSON Schema metaschema, can be found +on the JSON Schema website.
+To view the exact schema specification, run sat bootprep view-schema
.
ncn-m001# sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft-07/schema"
+title: Bootprep Input File
+description: >
+ A description of the set of CFS configurations to create, the set of IMS
+ images to create and optionally customize with the defined CFS configurations,
+ and the set of BOS session templates to create that reference the defined
+ images and configurations.
+type: object
+additionalProperties: false
+properties:
+ ...
+
The raw schema definition can be difficult to understand without experience +working with JSON Schema specifications. For this reason, a feature was included +which can generate user-friendly HTML documentation for the input file schema +which can be browsed with the user’s preferred web browser.
+Create a documentation tarball using sat bootprep
.
ncn-m001# sat bootprep generate-docs
+INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
+
An alternate output directory can be specified with the --output-dir
+option. The generated tarball is always named bootprep-schema-docs.tar.gz
.
ncn-m001# sat bootprep generate-docs --output-dir /tmp
+INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
+
From another machine, copy the tarball to a local directory.
+another-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz .
+
Extract the contents of the tarball and open the contained index.html
.
another-machine$ tar xzvf bootprep-schema-docs.tar.gz
+x bootprep-schema-docs/
+x bootprep-schema-docs/index.html
+x bootprep-schema-docs/schema_doc.css
+x bootprep-schema-docs/schema_doc.min.js
+another-machine$ open bootprep-schema-docs/index.html
+
Describes how to upgrade the System Admin Toolkit (SAT) product
+stream by using the Compute Node Environment (CNE) installer (cne-install
).
+The CNE installer can be used only for upgrades and not for fresh installations.
+For installation instructions, see Install the System Admin Toolkit Product
+Stream.
Upgrading SAT with cne-install
is recommended because the process is both
+automated and logged to help you save time. The CNE installer can be used to
+upgrade SAT alone or with other supported products. For more information
+on cne-install
and its options, refer to the HPE Cray EX System Software
+Getting Started Guide (S-8000).
...
) in shell output indicate omitted lines.x.y.z
with the version of the SAT product stream
+being upgraded.Start a typescript and set the shell prompt.
+The typescript will record the commands and the output from this upgrade. +The prompt is set to include the date and time.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
Copy the release distribution gzipped tar file to ncn-m001
.
The cne-install
command installs all files in the media directory
+by default. If you are upgrading SAT alone, ensure only the SAT tarball is in
+the media directory.
Run the CNE installer.
+If you are upgrading SAT along with other supported products, run the +following command.
+ncn-m001# cne-install -m MEDIA_DIR install -B WORKING_BRANCH -bpc BOOTPREP_CONFIG_CN \
+ -bpn BOOTPREP_CONFIG_NCN
+
The cne-install
command will use the provided BOOTPREP_CONFIG_CN
and
+BOOTPREP_CONFIG_NCN
files for the run.
If you are upgrading SAT alone, run the following commands.
+ncn-m001# cne-install -m MEDIA_DIR install -B '{{product_type}}-{{version_x_y_z}}' \
+ -bpn BOOTPREP_CONFIG_NCN -e update_working_branches
+ncn-m001# cne-install -m MEDIA_DIR install -B '{{product_type}}-{{version_x_y_z}}' \
+ -bpn BOOTPREP_CONFIG_NCN -b sat_bootprep_ncn -e ncn_personalization
+
Optional: Stop the typescript.
+NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT upgrade (see Next Steps).
+ncn-m001# exit
+
SAT version x.y.z
is now upgraded, meaning the SAT x.y.z
release
+has been loaded into the system software repository.
sat
command is available.At this point, the release distribution files can be removed from the system as +described in Post-Upgrade Cleanup Procedure.
+If other HPE Cray EX software products are being upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +(S-8000) to determine which step +to execute next.
+If no other HPE Cray EX software products are being upgraded at this time, +execute the SAT Post-Upgrade procedures:
+ +Optional: Remove the SAT release distribution tar file and extracted directory.
+ncn-m001# rm sat-x.y.z.tar.gz
+ncn-m001# rm -rf sat-x.y.z/
+
After upgrading SAT, if using the configuration file from a previous version,
+there may be configuration file sections no longer used in the new version.
+For example, when upgrading from Shasta 1.4 to Shasta 1.5, the [redfish]
+configuration file section is no longer used. In that case, the following
+warning may appear upon running sat
commands.
WARNING: Ignoring unknown section 'redfish' in config file.
+
Remove the [redfish]
section from /root/.config/sat/sat.toml
to resolve
+the warning.
[redfish]
+username = "admin"
+password = "adminpass"
+
Repeat this process for any configuration file sections for which there are +“unknown section” warnings.
+As of SAT version 2.2, some command output that was previously printed to stdout
+is now logged to stderr
. These messages are logged at the INFO
level. The
+default logging threshold was changed from WARNING
to INFO
to accommodate
+this logging change. Additionally, some messages previously logged at the INFO
+are now logged at the DEBUG
level.
These changes take effect automatically. However, if the default output threshold
+has been manually set in ~/.config/sat/sat.toml
, it should be changed to ensure
+that important output is shown in the terminal.
In the following example, the stderr
log level, logging.stderr_level
, is set to
+WARNING
, which will exclude INFO
-level logging from terminal output.
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+
To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.
+If logging.stderr_level
is commented out, its value will not affect logging
+behavior. However, it may be helpful set its value to INFO
as a reminder of
+the new default behavior.
The following commands trigger messages that have been changed from stdout
+print calls to INFO
-level (or WARNING
- or ERROR
-level) log messages:
sat bootsys --stage shutdown --stage session-checks
sat sensors
The following commands trigger messages that have been changed from INFO
-level
+log messages to DEBUG
-level log messages:
sat nid2xname
sat xname2nid
sat swap
The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.
+Grafana can be accessed via web browser at the following URL:
+https://sma-grafana.cmn.<site-domain>
The value of site-domain
can be obtained as follows:
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+ base64 -d | grep "external:"
+
That command will produce the following output, for example:
+ external: EXAMPLE_DOMAIN.com
+
This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com
For more information on accessing the Grafana Dashboards, refer to Access the Grafana Monitoring UI in the +SMA product documentation.
+For more information on the interpretation of metrics for the SAT Grafana Dashboards, refer to “Fabric Telemetry +Kafka Topics” in the SMA product documentation.
+There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.
+Dashboard Name | +Display Type | +
---|---|
Fabric Congestion | +Chart Panels | +
Fabric RFC3635 | +Chart Panels | +
Fabric Errors | +Tabular Format | +
Fabric Port State | +Tabular Format | +
The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.
+Shows the Interval and Locations Options for the available telemetry.
+ +The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.
+For more information, refer to Grafana Templates and Variables.
+The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.
+The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.
+SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.
+This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.
+The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.
+The port type of a link is reported as a port state “subtype” event when defined at port initialization.
+This dashboard reports error counters in a tabular format in three panels.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.
+The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.
+For more information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.
+Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.
+ + + + + +Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.
+Kibana can be accessed via web browser at the following URL:
+https://sma-kibana.cmn.<site-domain>
The value of site-domain
can be obtained as follows:
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+ base64 -d | grep "external:"
+
That command will produce the following output, for example:
+ external: EXAMPLE_DOMAIN.com
+
This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com
For more information on accessing the Kibana Dashboards, refer to View Logs Via Kibana in the SMA product +documentation.
+Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and RAS Daemon Kibana Dashboards are included in this +table.
+Dashboard | +Short Description | +Long Description | +Kibana Visualization and Search Name | +
---|---|---|---|
sat-aer |
+AER corrected | +Corrected Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-corrected Search: sat-aer-corrected |
+
sat-aer |
+AER fatal | +Fatal Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-fatal Search: sat-aer-fatal |
+
sat-atom |
+ATOM failures | +Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. | +sat-atom-failed |
+
sat-atom |
+ATOM admindown |
+Application Task Orchestration and Management test failures can result in nodes being marked admindown . An admindown node is not available for job launch. |
+sat-atom-admindown |
+
sat-heartbeat |
+Heartbeat loss events | +Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. |
+sat-heartbeat |
+
sat-kernel |
+Kernel assertions | +The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. | +sat-kassertions |
+
sat-kernel |
+Kernel panics | +The kernel panics when something is seriously wrong. The node goes down. | +sat-kernel-panic |
+
sat-kernel |
+Lustre bugs (LBUGs) | +The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. | +sat-lbug |
+
sat-kernel |
+CPU stalls | +CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. | +sat-cpu-stall |
+
sat-kernel |
+Out of memory | +An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. | +sat-oom |
+
sat-mce |
+MCE | +Machine Check Exceptions (MCE) are errors detected at the processor level. | +sat-mce |
+
sat-rasdaemon |
+rasdaemon errors |
+Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the Linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. |
+sat-rasdaemon-error |
+
sat-rasdaemon |
+rasdaemon messages |
+All messages from the rasdaemon service on nodes. |
+sat-rasdaemon |
+
By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.
+The Kibana Dashboard should be open on your system.
+Navigate to Management
+Navigate to Advanced Settings in the Kibana section, below the Elastic search section
+Scroll down to the Discover section
+Change Highlight results from on to off
+Click Save to save changes
+The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.
+Go to the dashboard section.
+Select sat-aer
dashboard.
Choose the time range of interest.
+View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.
+The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health
+checks and application test failures. Some test failures are of possible interest even though a node is not marked
+admindown
or otherwise fails. They are of clear interest if a node is marked admindown
, and might provide
+clues if a node otherwise fails. They might also show application problems.
HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.
+Go to the dashboard section.
+Select sat-atom
dashboard.
Choose the time range of interest.
+View any nodes marked admindown
and any ATOM test failures. These failures occur during health checks and
+application test failures. Test failures marked admindown
are important to note. View the matching log messages
+in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired,
+results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.
The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd
pods in the system. The hbtd
+pods are responsible for monitoring nodes in the system for heartbeat loss.
Go to the dashboard section.
+Select sat-heartbeat
dashboard.
Choose the time range of interest.
+View the heartbeat loss messages that are logged by the hbtd
pods in the system. The hbtd
pods are responsible
+for monitoring nodes in the system for heartbeat loss. View the matching log messages in the panel.
The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.
+Go to the dashboard section.
+Select sat-kernel
dashboard.
Choose the time range of interest.
+View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.
+The MCE Dashboard displays CPU detected processor-level hardware errors.
+Go to the dashboard section.
+Select sat-mce
dashboard.
Choose the time range of interest.
+View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.
+The RAS Daemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon
+service on nodes in the system. This service collects all hardware error events reported by the Linux kernel, including
+PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages
+presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one
+for only messages of severity emerg
or err
and another for all messages from rasdaemon
.
Go to the dashboard section.
+Select sat-rasdaemon
dashboard.
Choose the time range of interest.
+View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.
+Describes how to install or upgrade the System Admin Toolkit (SAT) product +stream.
+Ellipses (...
) in shell output indicate omitted lines.
In the examples below, replace x.y.z
with the version of the SAT product stream
+being installed.
‘manager’ and ‘master’ are used interchangeably in the steps below.
+To upgrade SAT, execute the pre-installation, installation, and post-installation +procedures for a newer distribution. The newly installed version will become +the default.
+In SAT 2.4, you can instead upgrade the product stream by using the +Compute Node Environment (CNE) installer. It is recommended that you upgrade +SAT with the CNE installer because the process is both automated and logged +to help you save time. For more information, see +SAT Upgrade with CNE Installer.
+Start a typescript and set the shell prompt.
+The typescript will record the commands and the output from this installation. +The prompt is set to include the date and time.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
Copy the release distribution gzipped tar file to ncn-m001
.
Unzip and extract the release distribution.
+ncn-m001# tar -xvzf sat-x.y.z.tar.gz
+
Change directory to the extracted release distribution directory.
+ncn-m001# cd sat-x.y.z
+
Run the installer: install.sh
.
The script produces a lot of output. A successful install ends with “SAT
+version x.y.z
has been installed”, where x.y.z
is the SAT product version.
ncn-m001# ./install.sh
+====> Installing System Admin Toolkit version x.y.z
+...
+====> Waiting 300 seconds for sat-config-import-x.y.z to complete
+...
+====> SAT version x.y.z has been installed.
+
Optional: Stop the typescript.
+NOTE: This step can be skipped if you wish to use the same typescript +for the remainder of the SAT install (see Next Steps).
+ncn-m001# exit
+
SAT version x.y.z
is now installed/upgraded, meaning the SAT x.y.z
release
+has been loaded into the system software repository.
sat
command won’t be available until the NCN Personalization
+procedure has been executed.If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +(S-8000) to determine which step to +execute next.
+If no other HPE Cray EX software products are being installed or upgraded at this time, +proceed to the sections listed below.
+NOTE: The procedures in Configure SAT are only required during the +first installation of SAT. However, the NCN Personalization procedure +is required both when installing and upgrading SAT.
+If performing a fresh install, execute the Configure SAT procedures:
+ +Execute the NCN Personalization procedure:
+ +If performing an upgrade, execute the SAT Post-Upgrade procedures:
+ +NOTE: The Set System Revision Information procedure is not required +after upgrading from SAT 2.1 or later.
+Initially, as part of the installation and configuration, SAT authentication is set up so SAT commands can be used in
+later steps of the install process. The admin account used to authenticate with sat auth
must be enabled in
+Keycloak and must have its assigned role set to admin. For instructions on editing Role Mappings see
+Create Internal User Accounts in the Keycloak Shasta Realm in the CSM product documentation.
+For additional information on SAT authentication, see System Security and Authentication in the CSM
+documentation.
NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Some SAT subcommands make requests to the Shasta services through the API gateway and thus require authentication to
+the API gateway in order to function. Other SAT subcommands use the Kubernetes API. Some sat
commands require S3 to
+be configured (see: Generate SAT S3 Credentials). In order to use the SAT S3 bucket,
+the System Administrator must generate the S3 access key and secret keys and write them to a local file. This must be
+done on every Kubernetes manager node where SAT commands are run.
Below is a table describing SAT commands and the types of authentication they require.
+SAT Subcommand | +Authentication/Credentials Required | +Man Page | +Description | +
---|---|---|---|
sat auth |
+Responsible for authenticating to the API gateway and storing a token. | +sat-auth |
+Authenticate to the API gateway and save the token. | +
sat bmccreds |
+Requires authentication to the API gateway. | +sat-bmccreds |
+Set BMC passwords. | +
sat bootprep |
+Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install. |
+sat-bootprep |
+Prepare to boot nodes with images and configurations. | +
sat bootsys |
+Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. |
+sat-bootsys |
+Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. | +
sat diag |
+Requires authentication to the API gateway. | +sat-diag |
+Launch diagnostics on the HSN switches and generate a report. | +
sat firmware |
+Requires authentication to the API gateway. | +sat-firmware |
+Report firmware version. | +
sat hwhist |
+Requires authentication to the API gateway. | +sat-hwhist |
+Report hardware component history. | +
sat hwinv |
+Requires authentication to the API gateway. | +sat-hwinv |
+Give a listing of the hardware of the HPE Cray EX system. | +
sat hwmatch |
+Requires authentication to the API gateway. | +sat-hwmatch |
+Report hardware mismatches. | +
sat init |
+None | +sat-init |
+Create a default SAT configuration file. | +
sat k8s |
+Requires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install. |
+sat-k8s |
+Report on Kubernetes replica sets that have co-located (on the same node) replicas. | +
sat linkhealth |
++ | + | This command has been deprecated. | +
sat nid2xname |
+Requires authentication to the API gateway. | +sat-nid2xname |
+Translate node IDs to node XNames. | +
sat sensors |
+Requires authentication to the API gateway. | +sat-sensors |
+Report current sensor data. | +
sat setrev |
+Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-setrev |
+Set HPE Cray EX system revision information. | +
sat showrev |
+Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-showrev |
+Print revision information for the HPE Cray EX system. | +
sat slscheck |
+Requires authentication to the API gateway. | +sat-slscheck |
+Perform a cross-check between SLS and HSM. | +
sat status |
+Requires authentication to the API gateway. | +sat-status |
+Report node status across the HPE Cray EX system. | +
sat swap |
+Requires authentication to the API gateway. | +sat-swap |
+Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. | +
sat xname2nid |
+Requires authentication to the API gateway. | +sat-xname2nid |
+Translate node and node BMC XNames to node IDs. | +
sat switch |
+This command has been deprecated. It has been replaced by sat swap . |
++ | + |
In order to authenticate to the API gateway, you must run the sat auth
command. This command will prompt for a password
+on the command line. The username value is obtained from the following locations, in order of higher precedence to lower
+precedence:
--username
global command-line option.username
option in the api_gateway
section of the config file at ~/.config/sat/sat.toml
.sat
command.If credentials are entered correctly when prompted by sat auth
, a token file will be obtained and saved to
+~/.config/sat/tokens
. Subsequent sat commands will determine the username the same way as sat auth
described above,
+and will use the token for that username if it has been obtained and saved by sat auth
.
sat
CLI has been installed following Install The System Admin Toolkit Product Stream.The following is the procedure to globally configure the username used by SAT and authenticate to the API gateway:
+Generate a default SAT configuration file, if one does not exist.
+ncn-m001# sat init
+Configuration file "/root/.config/sat/sat.toml" generated.
+
Note: If the config file already exists, it will print out an error:
+ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
+Not generating configuration file.
+
Edit ~/.config/sat/sat.toml
and set the username option in the api_gateway
section of the config file. For
+example:
username = "crayadmin"
+
Run sat auth
. Enter your password when prompted. For example:
ncn-m001# sat auth
+Password for crayadmin:
+Succeeded!
+
Other sat
commands are now authenticated to make requests to the API gateway. For example:
ncn-m001# sat status
+
Generate S3 credentials and write them to a local file so the SAT user can access S3 storage. In order to use the SAT +S3 bucket, the System Administrator must generate the S3 access key and secret keys and write them to a local file. +This must be done on every Kubernetes master node where SAT commands are run.
+SAT uses S3 storage for several purposes, most importantly to store the site-specific information set with sat setrev
+(see: Set System Revision Information).
NOTE: This procedure is only required after initially installing SAT. It is not +required after upgrading SAT.
+Ensure the files are readable only by root
.
ncn-m001# touch /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
Write the credentials to local files using kubectl
.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.access_key}' | base64 -d > \
+ /root/.config/sat/s3_access_key
+
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.secret_key}' | base64 -d > \
+ /root/.config/sat/s3_secret_key
+
Verify the S3 endpoint specified in the SAT configuration file is correct.
+Get the SAT configuration file’s endpoint value.
+NOTE: If the command’s output is commented out, indicated by an initial #
+character, the SAT configuration will take the default value – "https://rgw-vip.nmn"
.
ncn-m001# grep endpoint ~/.config/sat/sat.toml
+# endpoint = "https://rgw-vip.nmn"
+
Get the sat-s3-credentials
secret’s endpoint value.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
+https://rgw-vip.nmn
+
Compare the two endpoint values.
+If the values differ, change the SAT configuration file’s endpoint value to match the secret’s.
+Copy SAT configurations to each manager node on the system.
+ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
+ mkdir -p /root/.config/sat; \
+ scp -pr /root/.config/sat ${i}:/root/.config; done
+
NOTE: Depending on how many manager nodes are on the system, the list of manager nodes may
+be different. This example assumes three manager nodes, where the configuration files must be
+copied from ncn-m001
to ncn-m002
and ncn-m003
. Therefore, the list of hosts above is
+ncn-m002
and ncn-m003
.
HPE service representatives use system revision information data to identify +systems in support cases.
+Set System Revision Information.
+Run sat setrev
and follow the prompts to set the following site-specific values:
TIP: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.
+ncn-m001# sat setrev
+--------------------------------------------------------------------------------
+Setting: Serial number
+Purpose: System identification. This will affect how snapshots are
+ identified in the HPE backend services.
+Description: This is the top-level serial number which uniquely identifies
+ the system. It can be requested from an HPE representative.
+Valid values: Alpha-numeric string, 4 - 20 characters.
+Type: <class 'str'>
+Default: None
+Current value: None
+--------------------------------------------------------------------------------
+Please do one of the following to set the value of the above setting:
+ - Input a new value
+ - Press CTRL-C to exit
+...
+
Verify System Revision Information.
+Run sat showrev
and verify the output shown in the “System Revision Information table.”
The following example shows sample table output.
+ncn-m001# sat showrev
+################################################################################
+System Revision Information
+################################################################################
++---------------------+---------------+
+| component | data |
++---------------------+---------------+
+| Company name | HPE |
+| Country code | US |
+| Interconnect | Sling |
+| Product number | R4K98A |
+| Serial number | 12345 |
+| Site name | HPE |
+| Slurm version | slurm 20.02.5 |
+| System description | Test System |
+| System install date | 2021-01-29 |
+| System name | eniac |
+| System type | EX-1C |
++---------------------+---------------+
+################################################################################
+Product Revision Information
+################################################################################
++--------------+-----------------+------------------------------+------------------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+------------------------------+------------------------------+
+| csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
+| sat | 2.0.1 | - | - |
+| sdu | 1.0.8 | - | - |
+| slingshot | 0.8.0 | - | - |
+| sma | 1.4.12 | - | - |
++--------------+-----------------+------------------------------+------------------------------+
+################################################################################
+Local Host Operating System
+################################################################################
++-----------+----------------------+
+| component | version |
++-----------+----------------------+
+| Kernel | 5.3.18-24.15-default |
+| SLES | SLES 15-SP2 |
++-----------+----------------------+
+
A new CFS configuration layer must be added to the CFS configuration used on +management NCNs. It is required following SAT installation and configuration. +This procedure describes how to add that layer.
+...
) in shell output indicate omitted lines.x.y.z
with the version of the SAT product stream
+being installed.Start a typescript if not already using one, and set the shell prompt.
+The typescript will record the commands and the output from this installation. +The prompt is set to include the date and time.
+ncn-m001# script -af product-sat.$(date +%Y-%m-%d).txt
+ncn-m001# export PS1='\u@\H \D{%Y-%m-%d} \t \w # '
+
The SAT release distribution includes a script, update-mgmt-ncn-cfs-config.sh
,
+that updates a CFS configuration to include the SAT layer required to
+install and configure SAT on the management NCNs.
The script supports modifying a named CFS configuration in CFS, a CFS +configuration defined in a JSON file, or the CFS configuration +currently applied to particular components in CFS.
+The script also includes options for specifying:
+This procedure is split into three alternatives, which cover common use cases:
+If none of these alternatives fit your use case, see Advanced Options for +Updating CFS Configurations.
+Use this alternative if there is already a CFS configuration assigned to the +management NCNs and you would like to update it in place for the new version of +SAT.
+Run the script with the following options:
+ncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-query role=Management,type=Node --save
+
Examine the output to ensure the CFS configuration was updated.
+For example, if there is a single CFS configuration that applies to NCNs, and if +that configuration does not have a layer yet for any version of SAT, the +output will look like this:
+====> Updating CFS configuration(s)
+INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, ..., x3000c0s9b0n0
+INFO: Found configuration "ncn-personalization" for component x3000c0s1b0n0
+...
+INFO: Found configuration "ncn-personalization" for component x3000c0s9b0n0
+...
+INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found.
+INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end.
+INFO: Successfully saved CFS configuration "ncn-personalization"
+INFO: Successfully saved 1 changed CFS configurations.
+====> Completed CFS configuration(s)
+====> Cleaning up install dependencies
+
Alternatively, if the CFS configuration already contains a layer for +SAT that just needs to be updated, the output will look like this:
+====> Updating CFS configuration(s)
+INFO: Querying CFS configurations for the following NCNs: x3000c0s1b0n0, ..., x3000c0s9b0n0
+INFO: Found configuration "ncn-personalization" for component x3000c0s1b0n0
+...
+INFO: Found configuration "ncn-personalization" for component x3000c0s9b0n0
+...
+INFO: Updating existing layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml
+INFO: Property "commit" of layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from 01ae28c92b9b4740e9e0e01ae01216c6c2d89a65 to bcbd6db0803cc4137c7558df9546b0faab303cbd
+INFO: Property "name" of layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml updated from sat-2.2.16 to sat-sat-ncn-bcbd6db-20220608T170152
+INFO: Successfully saved CFS configuration "ncn-personalization"
+INFO: Successfully saved 1 changed CFS configurations.
+====> Completed CFS configuration(s)
+====> Cleaning up install dependencies
+
Use this alternative if you are constructing a new CFS configuration for +management NCNs in a JSON file.
+Run the script with the following options, where JSON_FILE
is an
+environment variable set to the path of the JSON file to modify:
ncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-file $JSON_FILE --save
+
Examine the output to ensure the JSON file was updated.
+For example, if the configuration defined in the JSON file does not have a layer yet for any +version of SAT, the output will look like this:
+====> Updating CFS configuration(s)
+INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found.
+INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end.
+INFO: Successfully saved 1 changed CFS configurations.
+====> Completed CFS configuration(s)
+====> Cleaning up install dependencies
+
Use this alternative if you are updating a specific named CFS configuration. +This may be the case if you are constructing a new CFS configuration during an +install or upgrade of multiple products.
+Run the script with the following options, where CFS_CONFIG_NAME
is an
+environment variable set to the name of the CFS configuration to update.
ncn-m001# ./update-mgmt-ncn-cfs-config.sh --base-config $CFS_CONFIG_NAME --save
+
Examine the output to ensure the CFS configuration was updated.
+For example, if the CFS configuration does not have a layer yet for any version of SAT, +the output will look like this:
+====> Updating CFS configuration(s)
+INFO: No layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml found.
+INFO: Adding a layer with repo path /vcs/cray/sat-config-management.git and playbook sat-ncn.yml to the end.
+INFO: Successfully saved CFS configuration "CFS_CONFIG_NAME"
+INFO: Successfully saved 1 changed CFS configurations.
+====> Completed CFS configuration(s)
+====> Cleaning up install dependencies
+
If none of the alternatives described in the previous sections apply, view the
+full description of the options accepted by the update-mgmt-ncn-cfs-config.sh
+script by invoking it with the --help
option.
ncn-m001# ./update-mgmt-ncn-cfs-config.sh --help
+
After the CFS configuration that applies to management NCNs has been updated as +described in the Procedure to Update CFS Configuration, +execute the following steps to ensure the modified CFS configuration is re-applied to the management NCNs.
+Set an environment variable that refers to the name of the CFS configuration +to be applied to the management NCNs.
+ncn-m001# export CFS_CONFIG_NAME="ncn-personalization"
+
Note: If the Update Active CFS Configuration +section was followed above, the name of the updated CFS configuration will +have been logged in the following format. If multiple CFS configurations +were modified, any one of them can be used in this procedure.
+INFO: Successfully saved CFS configuration "ncn-personalization"
+
Obtain the name of the CFS configuration layer for SAT and save it in an +environment variable:
+ncn-m001# export SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \
+ | jq -r '.layers | map(select(.cloneUrl | contains("sat-config-management.git")))[0].name')
+
Create a CFS session that executes only the SAT layer of the given CFS +configuration.
+The --configuration-limit
option limits the configuration session to run
+only the SAT layer of the configuration.
ncn-m001# cray cfs sessions create --name "sat-session-${CFS_CONFIG_NAME}" --configuration-name \
+ "${CFS_CONFIG_NAME}" --configuration-limit "${SAT_LAYER_NAME}"
+
Monitor the progress of the CFS session.
+Set an environment variable to name of the Ansible container within the pod +for the CFS session:
+ncn-m001# export ANSIBLE_CONTAINER=$(kubectl get pod -n services \
+ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \
+ -o json | jq -r '.items[0].spec.containers | map(select(.name | contains("ansible"))) | .[0].name')
+
Next, get the logs for the Ansible container.
+ncn-m001# kubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \
+ --selector=cfsession=sat-session-${CFS_CONFIG_NAME}
+
Ansible plays, which are run by the CFS session, will install SAT on all the +master management NCNs on the system. A summary of results can be found at +the end of the log output. The following example shows a successful session.
+...
+PLAY RECAP *********************************************************************
+x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+
NOTE: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.
+Verify that SAT was successfully configured.
+If sat
is configured, the --version
command will indicate which version
+is installed. If sat
is not properly configured, the command will fail.
NOTE: This version number will differ from the version number of the SAT
+release distribution. This is the semantic version of the sat
Python package,
+which is different from the version number of the overall SAT release distribution.
ncn-m001# sat --version
+sat 3.7.0
+
NOTE: Upon first running sat
, you may see additional output while the sat
+container image is downloaded. This will occur the first time sat
is run on
+each manager NCN. For example, if you run sat
for the first time on ncn-m001
+and then for the first time on ncn-m002
, you will see this additional output
+both times.
Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
+Getting image source signatures
+Copying blob da64e8df3afc done
+Copying blob 0f36fd81d583 done
+Copying blob 12527cf455ba done
+...
+sat 3.7.0
+
Stop the typescript.
+ncn-m001# exit
+
SAT version x.y.z
is now installed and configured:
The previous procedure is not always necessary because the CFS Batcher service +automatically detects configuration changes and will automatically create new +sessions to apply configuration changes according to certain rules. For more +information on these rules, refer to Configuration Management with +the CFS Batcher in the Cray System Management Documentation.
+The main scenario in which the CFS batcher will not automatically re-apply the +SAT layer is when the commit hash of the sat-config-management git repository +has not changed between SAT versions. The previous procedure ensures the +configuration is re-applied in all cases, and it is harmless if the batcher has +already applied an updated configuration.
+At this point, the release distribution files can be removed from the system as +described in Post-Installation Cleanup Procedure.
+If other HPE Cray EX software products are being installed or upgraded in conjunction +with SAT, refer to the HPE Cray EX System Software Getting Started Guide +(S-8000) to determine which step +to execute next.
+If no other HPE Cray EX software products are being installed at this time, +the installation process is complete. If no other HPE Cray EX software products +are being upgraded at this time, proceed to the remaining SAT Post-Upgrade +procedures:
+ +NOTE: The Set System Revision Information procedure is not required after upgrading from SAT 2.1 or later.
+Optional: Remove the SAT release distribution tar file and extracted directory.
+ncn-m001# rm sat-x.y.z.tar.gz
+ncn-m001# rm -rf sat-x.y.z/
+
After upgrading SAT, if using the configuration file from a previous version, there may be
+configuration file sections no longer used in the new version. For example, when upgrading
+from Shasta 1.4 to Shasta 1.5, the [redfish]
configuration file section is no longer used.
+In that case, the following warning may appear upon running sat
commands.
WARNING: Ignoring unknown section 'redfish' in config file.
+
Remove the [redfish]
section from /root/.config/sat/sat.toml
to resolve the warning.
[redfish]
+username = "admin"
+password = "adminpass"
+
Repeat this process for any configuration file sections for which there are “unknown section” warnings.
+As of SAT version 2.2, some command output that was previously printed to stdout
+is now logged to stderr
. These messages are logged at the INFO
level. The
+default logging threshold was changed from WARNING
to INFO
to accommodate
+this logging change. Additionally, some messages previously logged at the INFO
+are now logged at the DEBUG
level.
These changes take effect automatically. However, if the default output threshold
+has been manually set in ~/.config/sat/sat.toml
, it should be changed to ensure
+that important output is shown in the terminal.
In the following example, the stderr
log level, logging.stderr_level
, is set to
+WARNING
, which will exclude INFO
-level logging from terminal output.
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+
To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.
+If logging.stderr_level
is commented out, its value will not affect logging
+behavior. However, it may be helpful set its value to INFO
as a reminder of
+the new default behavior.
The following commands trigger messages that have been changed from stdout
+print calls to INFO
-level (or WARNING
- or ERROR
-level) log messages:
sat bootsys --stage shutdown --stage session-checks
sat sensors
The following commands trigger messages that have been changed from INFO
-level
+log messages to DEBUG
-level log messages:
sat nid2xname
sat xname2nid
sat swap
This procedure can be used to uninstall a version of SAT.
+prodmgr
.prodmgr
command is available.Use sat showrev
to list versions of SAT.
NOTE: It is not recommended to uninstall a version designated as “active”. +If the active version is uninstalled, then the activate procedure must be executed +on a remaining version.
+ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+-------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+-------------------+-----------------------+
+| sat | 2.3.3 | True | - | - |
+| sat | 2.2.10 | False | - | - |
++--------------+-----------------+--------+-------------------+-----------------------+
+
Use prodmgr
to uninstall a version of SAT.
This command will do three things:
+cray-product-catalog
Kubernetes ConfigMap, so that it will no longer show up
+in the output of sat showrev
.ncn-m001# prodmgr uninstall sat 2.2.10
+Repository sat-2.2.10-sle-15sp2 has been removed.
+Removed Docker image cray/cray-sat:3.9.0
+Removed Docker image cray/sat-cfs-install:1.0.2
+Removed Docker image cray/sat-install-utility:1.4.0
+Deleted sat-2.2.10 from product catalog.
+
This procedure can be used to downgrade the active version of SAT.
+prodmgr
command is available.Use sat showrev
to list versions of SAT.
ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+--------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+--------------------+-----------------------+
+| sat | 2.3.3 | True | - | - |
+| sat | 2.2.10 | False | - | - |
++--------------+-----------------+--------+--------------------+-----------------------+
+
Use prodmgr
to activate a different version of SAT.
This command will do three things:
+2.2.10
+sets the repository sat-2.2.10-sle-15sp2
as the only member of the sat-sle-15sp2
group.2.2.10
as active within the product catalog, so that it appears active in the output of
+sat showrev
.ncn-personalization
). Specifically, it will ensure that the layer refers to the version of SAT CFS
+configuration content associated with the version of SAT being activated.ncn-m001# prodmgr activate sat 2.2.10
+Repository sat-2.2.10-sle-15sp2 is now the default in sat-sle-15sp2.
+Set sat-2.2.10 as active in product catalog.
+Updated CFS configurations: [ncn-personalization]
+
Verify that the chosen version is marked as active.
+ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------+--------------------+-----------------------+
+| product_name | product_version | active | images | image_recipes |
++--------------+-----------------+--------+--------------------+-----------------------+
+| sat | 2.3.3 | False | - | - |
+| sat | 2.2.10 | True | - | - |
++--------------+-----------------+--------+--------------------+-----------------------+
+
Apply the modified CFS configuration to the management NCNs.
+At this point, Nexus package repositories have been modified to set a +particular package repository as active, but the SAT package may not have +been updated on management NCNs.
+To ensure that management NCNs have been updated to use the active SAT
+version, follow the Procedure to Apply CFS Configuration.
+Refer to the output from the prodmgr activate
command to find the name of
+the modified CFS configuration. If more than one CFS configuration was
+modified, use the first one.
SAT can optionally be installed and configured on an external system to interact with CSM over the CAN.
+Most SAT subcommands work by accessing APIs which are reachable via the CAN. However, certain SAT commands depend on +host-based functionality on the management NCNs and will not work from an external system. This includes the following:
+platform-services
and ncn-power
stages of sat bootsys
--local
option of sat showrev
Installing SAT on an external system is not an officially supported configuration. These instructions are provided +“as-is” with the hope that they can useful for users who desire additional flexibility.
+Certain additional steps may need to be taken to install and configure SAT depending on the configuration of the +external system in use. These additional steps may include provisioning virtual machines, installing packages, or +configuring TLS certificates, and these steps are outside the scope of this documentation. This section covers only the +steps needed to configure SAT to use externally-accessible API endpoints exposed by CSM.
+kubectl
, openssh
, git
, and curl
are installed on the external system.Create a Python virtual environment.
+$ SAT_VENV_PATH="$(pwd)/venv"
+$ python3 -m venv ${SAT_VENV_PATH}
+$ . ${SAT_VENV_PATH}/bin/activate
+
Clone the SAT source code.
+Note: To use SAT version 3.19, this example clones the release/3.19
branch of
+Cray-HPE/sat
. However, for better clarity, these instructions include steps that apply only to
+versions newer than 3.19. Specifically, the instructions include references to the
+csm-api-client
package, which was not a dependency of SAT in version 3.19.
(venv) $ git clone --branch=release/3.19 https://github.com/Cray-HPE/sat.git
+
Set up the SAT CSM Python dependencies to be installed from their source code.
+SAT CSM Python dependency packages are not currently distributed publicly as
+source packages or binary distributions. They must be installed from
+their source code hosted on GitHub. Also, to install the cray-product-catalog
+Python package, you must first clone it locally. Use the following steps to
+modify the SAT CSM Python dependencies so they can be installed from their source code.
Clone the source code for cray-product-catalog
.
(venv) $ git clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog
+
In the cray-product-catalog
directory, create a file named .version
+that contains the version of cray-product-catalog
.
(venv) $ echo 1.6.0 > cray-product-catalog/.version
+
Open the “locked” requirements file in a text editor.
+(venv) $ vim sat/requirements.lock.txt
+
Update the line containing cray-product-catalog
so that it reflects the local path
+to cray-product-catalog
.
It should read as follows:
+./cray-product-catalog
+
For versions of SAT newer than 3.19, change the line containing csm-api-client
to
+read as follows:
csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
+
(Optional) Confirm that requirements.lock.txt
is modified as expected.
Note: For versions newer than 3.19, you will see both cray-product-catalog
and csm-api-client
.
+For version 3.19 and older, you will only see cray-product-catalog
.
(venv) $ grep -E 'cray-product-catalog|csm-api-client' sat/requirements.lock.txt
+./cray-product-catalog
+csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
+
Install the modified SAT dependencies.
+(venv) $ pip install -r sat/requirements.lock.txt
+...
+
Install the SAT Python package.
+(venv) $ pip install ./sat
+...
+
Optional: Add the sat
virtual environment to the user’s PATH
environment variable.
If a shell other than bash
is in use, replace ~/.bash_profile
with the appropriate profile path.
If the virtual environment is not added to the user’s PATH
environment variable, then
+source ${SAT_VENV_PATH}/bin/activate
will need to be run before running any SAT commands.
(venv) $ deactivate
+$ echo export PATH=\"${SAT_VENV_PATH}/bin:${PATH}\" >> ~/.bash_profile
+$ source ~/.bash_profile
+
Copy the file /etc/kubernetes/admin.conf
from ncn-m001
to ~/.kube/config
on the external system.
Note that this file contains credentials to authenticate against the Kubernetes API as the administrative user, so +it should be treated as sensitive.
+$ mkdir -p ~/.kube
+$ scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config
+admin.conf 100% 5566 3.0MB/s 00:00
+
Add a new entry for the hostname kubernetes
to the external system’s /etc/hosts
file.
The kubernetes
hostname should correspond to the CAN IP address on ncn-m001
. On CSM 1.2, this can be determined
+by querying the IP address of the bond0.cmn0
interface.
$ ssh ncn-m001 ip addr show bond0.cmn0
+13: bond0.cmn0@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
+inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007
+ valid_lft forever preferred_lft forever
+inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
+ valid_lft forever preferred_lft forever
+$ IP_ADDRESS=10.102.1.11
+
On CSM versions prior to 1.2, the CAN IP can be determined by querying the IP address of the vlan007
interface.
$ ssh ncn-m001 ip addr show vlan007
+13: vlan007@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
+inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007
+ valid_lft forever preferred_lft forever
+inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
+ valid_lft forever preferred_lft forever
+$ IP_ADDRESS=10.102.1.10
+
Once the IP address is determined, add an entry to /etc/hosts
mapping the IP address to the hostname kubernetes
.
$ echo "${IP_ADDRESS} kubernetes" | sudo tee -a /etc/hosts
+10.102.1.11 kubernetes
+
Modify ~/.kube/config
to set the cluster server address.
The value of the server
key for the kubernetes
cluster under the clusters
section should be set to
+https://kubernetes:6443
.
---
+clusters:
+- cluster:
+ certificate-authority-data: REDACTED
+ server: https://kubernetes:6443
+ name: kubernetes
+...
+
Confirm that kubectl
can access the CSM Kubernetes cluster.
$ kubectl get nodes
+NAME STATUS ROLES AGE VERSION
+ncn-m001 Ready master 135d v1.19.9
+ncn-m002 Ready master 136d v1.19.9
+ncn-m003 Ready master 136d v1.19.9
+ncn-w001 Ready <none> 136d v1.19.9
+ncn-w002 Ready <none> 136d v1.19.9
+ncn-w003 Ready <none> 136d v1.19.9
+
Use sat init
to create a configuration file for SAT.
$ sat init
+INFO: Configuration file "/home/user/.config/sat/sat.toml" generated.
+
Copy the platform CA certificates from the management NCN and configure the certificates for use with SAT.
+If a shell other than bash
is in use, replace ~/.bash_profile
with the appropriate profile path.
$ scp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt .
+$ echo export REQUESTS_CA_BUNDLE=\"$(realpath platform-ca-certs.crt)\" >> ~/.bash_profile
+$ source ~/.bash_profile
+
Edit the SAT configuration file to set the API and S3 hostnames.
+Externally available API endpoints are given domain names in PowerDNS, so the endpoints in the configuration file
+should each be set to subdomain.system-name.site-domain
, where system-name
and site-domain
are replaced with
+the values specified during csi config init
, and subdomain
is the DNS name for the externally available service.
+For more information, refer to Externally Exposed Services in the Cray System Management Documentation.
The API gateway has the subdomain api
, and S3 has the subdomain s3
. The S3 endpoint runs on port 8080. The
+following options should be set in the SAT configuration file:
[api_gateway]
+host = "api.system-name.site-domain"
+
+[s3]
+endpoint = "http://s3.system-name.site-domain:8080"
+
Edit the SAT configuration file to specify the Keycloak user which will be accessing the REST API.
+[api_gateway]
+username = "user"
+
Authenticate against the API gateway with sat auth
.
For more information, see SAT Authentication.
+Generate S3 credentials.
+For more information, see Generate SAT S3 Credentials.
+The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.
+SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt
commands
+used on the Cray XC platform. For more information on SAT commands, see System Admin Toolkit Command Overview.
Six Kibana Dashboards are included with SAT. They provide organized output for system health information.
+Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.
+In CSM 1.3 and newer, the sat
command is automatically available on all the
+Kubernetes NCNs. For more information, see SAT in CSM. Older
+versions of CSM do not have the sat
command automatically available, and SAT
+must be installed as a separate product.
Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.
+The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes
+(ncn-m
nodes).
It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the
+HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are
+similarities between SAT commands and xt
commands used on the Cray XC platform.
The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.
+The sat
command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on
+Kubernetes manager nodes. A few important points about the SAT container environment include the following:
sat
or sat bash
always launches a container.There are two ways to run sat.
+sat bash
, followed by a sat
command.sat
command directly on a Kubernetes manager node.In both of these cases, a container is launched in the background to execute the command. The first option, running
+sat bash
first, gives an interactive shell, at which point sat
commands can be run. In the second option, the
+container is launched, executes the command, and upon the command’s completion the container exits. The following two
+examples show the same action, checking the system status, using interactive and non-interactive modes.
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+
ncn-m001# sat status
+
Running sat
using the interactive command prompt gives the ability to read and write local files on ephemeral
+container storage. If multiple sat
commands are being run in succession, then use sat bash to launch the
+container beforehand. This will save time because the container does not need to be launched for each sat
command.
The non-interactive mode is useful if calling sat
with a script, or when running a single sat
command as a part of
+several steps that need to be executed from a management NCN.
To view a sat
man page from a Kubernetes manager node, use sat-man
on the manager node as shown in the following
+example.
ncn-m001# sat-man status
+
A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed
+either with man sat
or man sat-podman
from the manager node.
ncn-m001# man sat
+
ncn-m001# man sat-podman
+
The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.
+root
or super-user account always has the #
character at the end of the prompt and has the host name of the
+host in the prompt.root
account is indicated with account@hostname>. A user account that is neither root
nor crayadm
is
+referred to as user
.Command Prompt | +Meaning | +
---|---|
ncn-m001# |
+Run on one of the Kubernetes Manager servers. (Non-interactive) | +
(CONTAINER_ID) sat-container# |
+Run the command inside the SAT container environment by first running sat bash . (Interactive) |
+
Examples of the sat status
command used by an administrator:
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+
In CSM 1.3 and newer, the sat
command is automatically available on all the Kubernetes NCNs, but it is still possible
+to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the
+sat
command available in CSM. Installing the SAT product stream allows additional supporting components to be added:
An entry for SAT in the cray-product-catalog
Kubernetes ConfigMap is only created by installing the SAT product
+stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev
.
The sat-install-utility
container image is only available with the full SAT product stream. This container image
+provides uninstall and activate functionality when used with the prodmgr
command. (In SAT 2.3 and older, SAT was
+only available to install as a separate product stream. Because these versions were packaged with
+sat-install-utility
, it is still possible to uninstall these versions of SAT.)
The docs-sat
RPM package is only available with the full SAT product stream.
The sat-config-management
git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is
+only available with the full SAT product stream.
If the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS
+configurations that apply to NCNs (for example, ncn-personalization
) should not include a SAT layer.
The SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the
+Keycloak username that authenticates to the API gateway cannot be read by users other than root
. Specifically, it
+it does the following:
Modifies the sat.toml
configuration file which contains the username so that it is only readable by root
.
Modifies the /root/.config/sat/tokens
directory so that the directory is only readable by root
. This is needed
+because the names of the files within the tokens
directory contain the username.
Regardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other +users. These permission changes only apply to files created by previous installations of SAT. In the current version of +SAT all files and directories are created with the appropriate permissions.
+Most sat
subcommands depend on services or components from other products in the
+HPE Cray EX (Shasta) software stack. The following list shows these dependencies
+for each subcommand. Each service or component is listed under the product it belongs to.
sat auth
sat bmccreds
sat bootprep
sat bootsys
sat diag
sat firmware
sat hwhist
sat hwinv
sat hwmatch
sat init
None
+sat k8s
sat nid2xname
sat sensors
sat setrev
sat showrev
sat slscheck
sat status
sat swap
sat switch
Deprecated: See sat swap
sat xname2nid
The 2.4.13 version of the SAT product includes:
+sat
python package and CLI.sat-podman
wrapper script.sat-install-utility
container image.cfs-config-util
container image.Because of installation refactoring efforts, the following two components +are no longer delivered with SAT:
+sat-cfs-install
container imagesat-cfs-install
Helm chartA version of the cray-sat
container image is now included in CSM. For more
+information, see SAT in CSM.
The SAT install.sh
script no longer uses a sat-cfs-install
Helm chart and
+container image to upload its Ansible content to the sat-config-management
+repository in VCS. Instead, it uses Podman to run the cf-gitea-import
container
+directly. Some of the benefits of this change include the following:
cray-sat
container image and cray-sat-podman
packagecray-sat
Container Image and cray-sat-podman
PackageIn older SAT releases, the sat
wrapper script that was provided by the
+cray-sat-podman
package installed on Kubernetes master NCNs included a
+hard-coded version of the cray-sat
container image. As a result, every new
+version of the cray-sat
image required a corresponding new version of the
+cray-sat-podman
package.
In this release, this tight coupling of the cray-sat-podman
package and the
+cray-sat
container image was removed. The sat
wrapper script provided
+by the cray-sat-podman
package now looks for the version of the cray-sat
+container image in the /opt/cray/etc/sat/version
file. This file is populated
+with the correct version of the cray-sat
container image by the SAT layer of
+the CFS configuration that is applied to management NCNs. If the version
file
+does not exist, the wrapper script defaults to the version of the cray-sat
+container image delivered with the latest version of CSM installed on the system.
The steps for performing NCN personalization as part of the SAT installation
+were moved out of the install.sh
script and into a new
+update-mgmt-ncn-cfs-config.sh
script that is provided in the SAT release
+distribution. The new script provides additional flexibility in how it modifies
+the NCN personalization CFS configuration for SAT. It can modify an existing CFS
+configuration by name, a CFS configuration being built in a JSON file, or an
+existing CFS configuration that applies to certain components. For more information,
+see Perform NCN Personalization.
sat bootprep
FeaturesThe following new features were added to the sat bootprep
command:
Variable substitutions using Jinja2 templates in certain fields of the
+sat bootprep
input file
For more information, see +HPC CSM Software Recipe Variable Substitutions +and Dynamic Variable Substitutions.
+Schema version validation in the sat bootprep
input files
For more information, see Providing a Schema Version.
+Ability to look up images and recipes provided by products
+For more information, see Defining IMS Images.
+The schema of the sat bootprep
input files was also changed to support these
+new features:
base
key instead of under an ims
key. The old ims
+key is deprecated.base.image_ref
.
+You should no longer use the IMS name of the image on which it depends.image.ims.name
, image.ims.id
, or image.image_ref
. Specifying a string
+value directly under the image
key is deprecated.For more information on defining IMS images and BOS session templates in the
+sat bootprep
input file, see Defining IMS Images
+and Defining BOS Session Templates.
sat swap
The sat swap
command was updated to support swapping compute and UAN blades
+with sat swap blade
. This functionality is described in the following processes
+of the Cray System Management Documentation:
v2
A new v2
version of the Boot Orchestration Service (BOS) is available in CSM
+1.3.0. SAT has added support for BOS v2
. This impacts the following commands
+that interact with BOS:
sat bootprep
sat bootsys
sat status
By default, SAT uses BOS v1
. However, you can choose the BOS version you want
+to use. For more information, see Change the BOS Version.
sat status
When using BOS v2
, sat status
outputs additional fields. These fields show
+the most recent BOS session, session template, booted image, and boot status for
+each node. An additional --bos-fields
option was added to limit the output of
+sat status
to these fields. The fields are not displayed when using BOS v1
.
This is the first release of SAT built from open source code repositories. +As a result, build infrastructure was changed to use an external Jenkins instance, +and artifacts are now published to an external Artifactory instance. These +changes should not impact the functionality of the SAT product in any way.
+paramiko
Python package version was updated from 2.9.2 to 2.10.1 to
+mitigate CVE-2022-24302.oauthlib
Python package version was updated from 3.2.0 to 3.2.1 to
+mitigate CVE-2022-36087.SAT stores information used to authenticate to the API gateway with Keycloak.
+Token files are stored in the ~/.config/sat/tokens/
directory. Those files
+have always had permissions appropriately set to restrict them to be readable
+only by the user.
Keycloak usernames used to authenticate to the API gateway are stored in the
+SAT config file at /.config/sat/sat.toml
. Keycloak usernames are also used in
+the file names of tokens stored in /.config/sat/tokens
. As an additional
+security measure, SAT now restricts the permissions of the SAT config file
+to be readable and writable only by the user. It also restricts the tokens
+directory and the entire SAT config directory ~/.config/sat
to be accessible
+only by the user. This prevents other users on the system from viewing
+Keycloak usernames used to authenticate to the API gateway.
sat init
did not print a message confirming a new
+configuration file was created.sat showrev
exited with a traceback if the file
+/opt/cray/etc/site_info.yaml
existed but was empty. This could occur if the
+user exited sat setrev
with Ctrl-C
.sat bootsys
man page, and added a
+description of the command stages.The 2.3.4 version of the SAT product includes:
+sat
python package and CLIsat-podman
wrapper scriptsat-cfs-install
container imagesat-cfs-install
Helm chartsat-install-utility
container imagecfs-config-util
container imagesat
CommandsNone.
+When running sat
commands, the current working directory is now mounted in the
+container as /sat/share
, and the current working directory within the container
+is also /sat/share
.
Files in the current working directory must be specified using relative paths to
+that directory, because the current working directory is always mounted on /sat/share
.
+Absolute paths should be avoided, and paths that are outside of $HOME
or $PWD
+are never accessible to the container environment.
The home directory is still mounted on the same path inside the container as it +is on the host.
+sat bootsys
The following options were added to sat bootsys
.
--bos-limit
--recursive
The --bos-limit
option passes a given limit string to a BOS session. The --recursive
+option specifies a slot or other higher-level component in the limit string
sat bootprep
The --delete-ims-jobs
option was added to sat bootprep run
. It deletes IMS
+jobs after sat bootprep
is run. Jobs are no longer deleted by default.
sat status
sat status
now includes information about nodes’ CFS configuration statuses, such
+as desired configuration, configuration status, and error count.
The output of sat status
now splits different component types into different report tables.
The following options were added to sat status
.
--hsm-fields
, --sls-fields
, --cfs-fields
--bos-template
The --hsm-fields
, --sls-fields
, --cfs-fields
options limit the output columns
+according to specified CSM services.
The --bos-template
option filters the status report according to the specified
+session template’s boot sets.
The following components were modified to be compatible with CSM 1.2.
+sat-cfs-install
container image and Helm chartsat-install-utility
container imageThe sat-ncn
Ansible role provided by sat-cfs-install
was modified to enable
+GPG checks on packages while leaving GPG checks disabled on repository metadata.
Updated urllib3
dependency to version 1.26.5 to mitigate CVE-2021-33503 and refreshed
+Python dependency versions.
Minor bug fixes were made in each of the repositories. For full change lists,
+refer to each repository’s CHANGELOG.md
file.
The known issues listed under the SAT 2.2 release were fixed.
+SAT 2.2.16 was released on February 25th, 2022.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptsat-cfs-install
container image and Helm chartIt also added the following new components:
+sat-install-utility
container imagecfs-config-util
container imageThe following sections detail the changes in this release.
+sat
Command Unavailable in sat bash
ShellAfter launching a shell within the SAT container with sat bash
, the sat
command will not
+be found. For example:
(CONTAINER-ID) sat-container:~ # sat status
+bash: sat: command not found
+
This can be resolved temporarily in one of two ways. /sat/venv/bin/
may be prepended to the
+$PATH
environment variable:
(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH
+(CONTAINER-ID) sat-container:~ # sat status
+
Or, the file /sat/venv/bin/activate
may be sourced:
(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate
+(CONTAINER-ID) sat-container:~ # sat status
+
sat bash
ShellAfter launching a shell within the SAT container with sat bash
, tab completion for sat
+commands does not work.
This can be resolved temporarily by sourcing the file /etc/bash_completion.d/sat-completion.bash
:
source /etc/bash_completion.d/sat-completion.bash
+
sat
in Root Directorysat
commands will not work if the current directory is /
. For example:
ncn-m001:/ # sat --help
+Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+
To resolve, run sat
in another directory.
sat
in Config Directorysat
commands will not work if the current directory is ~/.config/sat
. For example:
ncn-m001:~/.config/sat # sat --help
+Error: /root/.config/sat: duplicate mount destination
+
To resolve, run sat
in another directory.
sat
Commandssat bootprep
automates the creation of CFS configurations, the build and
+customization of IMS images, and the creation of BOS session templates. For
+more information, see SAT Bootprep.sat slscheck
performs a check for consistency between the System Layout
+Service (SLS) and the Hardware State Manager (HSM).sat bmccreds
provides a simple interface for interacting with the System
+Configuration Service (SCSD) to set BMC Redfish credentials.sat hwhist
displays hardware component history by XName (location) or by
+its Field-Replaceable Unit ID (FRUID). This command queries the Hardware
+State Manager (HSM) API to obtain this information. Since the sat hwhist
+command supports querying for the history of a component by its FRUID, the
+FRUID of components has been added to the output of sat hwinv
.The following automation has been added to the install script, install.sh
:
sat-config-import
Kubernetes job, which is
+started when the sat-cfs-install
Helm chart is deployed.ncn-personalization
).The SAT product uploads additional information to the cray-product-catalog
+Kubernetes ConfigMap detailing the components it provides, including container
+(Docker) images, Helm charts, RPMs, and package repositories.
This information is used to support uninstall and activation of SAT product +versions moving forward.
+Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and activation of the SAT product stream.
+For more information, see Uninstall: Removing a Version of +SAT and Activate: Switching +Between Versions.
+sat status
A Subrole
column has been added to the output of sat status
. This allows you
+to easily differentiate between master, worker, and storage nodes in the
+management role, for example.
Hostname information from SLS has been added to sat status
output.
Support for JSON-formatted output has been added to commands which currently
+support the --format
option, such as hwinv
, status
, and showrev
.
Many usability improvements have been made to multiple sat
commands,
+mostly related to filtering command output. The following are some highlights:
--fields
option to display only specific fields for subcommands which
+display tabular reports.--filter
queries
+so that the first match is used, similar to --sort-by
.--filter
, --fields
, and --reverse
for summaries
+displayed by sat hwinv
.sat hwinv
.The default log level for stderr
has been changed from “WARNING” to “INFO”. For
+more information, see SAT Logging.
With the command-line options --loglevel-stderr
and --loglevel-file
, the log level
+can now be configured separately for stderr
and the log file.
The existing --loglevel
option is now an alias for the --loglevel-stderr
option.
The Podman wrapper script is the script installed at /usr/bin/sat
on the
+master management NCNs by the cray-sat-podman
RPM that runs the cray-sat
+container in podman
. The following subsections detail improvements that were
+made to the wrapper script in this release.
cray-sat
ContainerThe Podman wrapper script that launches the cray-sat
container with podman
+has been modified to mount the user’s current directory and home directory into
+the cray-sat
container to provide access to local files in the container.
The man page for the Podman wrapper script, which is accessed by typing man sat
on a master management NCN, has been improved to document the following:
Fixed issues with redirecting stdout
and stderr
, and piping output to commands,
+such as awk
, less
, and more
.
A new sat
option has been added to configure the HTTP timeout length for
+requests to the API gateway. For more information, refer to sat-man sat
.
sat bootsys
ImprovementsMany improvements and fixes have been made to sat bootsys
. The following are some
+highlights:
--excluded-ncns
option, which can be used to omit NCNs
+from the platform-services
and ncn-power
stages in case they are
+inaccessible.sat bootsys shutdown
now prompt the user to
+continue before proceeding. A new option, --disruptive
, will bypass this.platform-services
+stage of sat bootsys boot
.sat xname2nid
Improvementssat xname2nid
can now recursively expand slot, chassis, and cabinet XNames to
+a list of NIDs in those locations.
A new --format
option has been added to sat xname2nid
. It sets the output format to
+either “range” (the default) or “NID”. The “range” format displays NIDs in a
+compressed range format suitable for use with a workload manager like Slurm.
v2
HSM APIThe commands which interact with HSM (for example, sat status
and sat hwinv
) now
+use the v2
HSM API.
sat diag
Limited to HSN Switchessat diag
will now only operate against HSN switches by default. These are the
+only controllers that support running diagnostics with HMJTD.
sat showrev
EnhancementsA column has been added to the output of sat showrev
that indicates whether a
+product version is “active”. The definition of “active” varies across products,
+and not all products may set an “active” version.
For SAT, the active version is the one with its hosted-type package repository in
+Nexus set as the member of the group-type package repository in Nexus,
+meaning that it will be used when installing the cray-sat-podman
RPM.
cray-sat
Container Image Size ReductionThe size of the cray-sat
container image has been approximately cut in half by
+leveraging multi-stage builds. This also improved the repeatability of the unit
+tests by running them in the container.
Minor bug fixes were made in cray-sat
and in cray-sat-podman
. For full change lists,
+refer to each repository’s CHANGELOG.md
file.
We released version 2.1.16 of the SAT product in Shasta v1.5.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptIt also added the following new component:
+sat-cfs-install
docker image and helm chartThe following sections detail the changes in this release.
+This release further decouples the installation of the SAT product from the CSM
+product. The cray-sat-podman
RPM is no longer installed in the management
+non-compute node (NCN) image. Instead, the cray-sat-podman
RPM is installed on
+all master management NCNs via an Ansible playbook which is referenced by a
+layer of the CFS configuration that applies to management NCNs. This CFS
+configuration is typically named ncn-personalization
.
The SAT product now includes a Docker image and a Helm chart named
+sat-cfs-install
. The SAT install script, install.sh
, deploys the Helm chart
+with Loftsman. This helm chart deploys a Kubernetes job that imports the
+SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management
.
+This repository is referenced by the layer added to the NCN personalization
+CFS configuration.
All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:
+sat sensors
sat diag
sat linkhealth
The sat sensors
command has been rewritten to use the SMA telemetry API to
+obtain the latest sensor values. The command’s usage has changed slightly, but
+legacy options work as before, so it is backwards compatible. Additionally, new
+commands have been added.
The sat diag
command has been rewritten to use a new service called Fox, which
+is delivered with the CSM-Diags product. The sat diag
command now launches
+diagnostics using the Fox service, which launches the corresponding diagnostic
+programs on controllers using the Hardware Management Job and Task Daemon
+(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start
+diagnostics over Redfish.
The sat linkhealth
command has been removed. Its functionality has been
+replaced by functionality from the Slingshot Topology Tool (STT) in the
+fabric manager pod.
The Redfish username and password command line options and config file options +have been removed. For more information, see Remove Obsolete Configuration +File Sections.
+sat setrev
and sat showrev
sat setrev
now collects the following information from the admin, which is then displayed by sat showrev
:
Additional guidance and validation has been added to each field collected by
+sat setrev
. This sets the stage for sdu setup
to stop collecting this
+information and instead collect it from sat showrev
or its S3 bucket.
sat bootsys
The platform-services
stage of the sat bootsys boot
command has been
+improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph
+health in the correct order. The ceph-check
stage has been removed as it is no
+longer needed.
The platform-services
stage of sat bootsys
boot now prompts for confirmation
+of the storage NCN hostnames in addition to the Kubernetes masters and workers.
sat firmware
.cray-sat
container image.sat firmware
command.We released version 2.0.4 of the SAT product in Shasta v1.4.1.
+This version of the SAT product included:
+sat
python package and CLI.sat-podman
wrapper script.The following sections detail the changes in this release.
+Two new commands were added to translate between NIDs and XNames:
+sat nid2xname
sat xname2nid
These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.
+sat swap
where creating the offline port policy failed.sat bootsys shutdown --stage bos-operations
to no longer forcefully
+power off all compute nodes and application nodes using CAPMC when BOS
+sessions complete or time out.sat bootsys boot --stage cabinet-power
.In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.
+This version of the SAT product included the following components:
+sat
python package and CLIIt also added the following new component:
+sat-podman
wrapper scriptThe following sections detail the changes in this release.
+SAT is now packaged and released as an independent product. The product
+deliverable is called a “release distribution”. The release distribution is a
+gzipped tar file containing an install script. This install script loads the
+cray/cray-sat
container image into the Docker registry in Nexus and loads the
+cray-sat-podman
RPM into a package repository in Nexus.
In this release, the cray-sat-podman
package is still installed in the master
+and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in
+Shasta v1.5.
The sat
command now runs in a container under Podman. The sat
executable is
+now installed on all nodes in the Kubernetes management cluster (workers and
+masters). This executable is a wrapper script that starts a SAT container in
+Podman and invokes the sat
Python CLI within that container. The admin can run
+individual sat
commands directly on the master or worker NCNs as before, or
+they can run sat
commands inside the SAT container after using sat bash
to
+enter an interactive shell inside the SAT container.
To view man pages for sat
commands, the user can run sat-man SAT_COMMAND
,
+replacing SAT_COMMAND
with the name of the sat
command. Alternatively,
+the user can enter the sat
container with sat bash
and use the man
command.
sat init
Command and Config File Location ChangeThe default location of the SAT config file has been changed from /etc/sat.toml
+to ~/.config/sat/sat.toml
. A new command, sat init
, has been added that
+initializes a configuration file in the new default directory. This better supports
+individual users on the system who want their own config files.
~/.config/sat
is mounted into the container that runs under Podman, so changes
+are persistent across invocations of the sat
container. If desired, an alternate
+configuration directory can be specified with the SAT_CONFIG_DIR
environment variable.
Additionally, if a config file does not yet exist when a user runs a sat
+command, one is generated automatically.
sat hwinv
Additional functionality has been added to sat hwinv
including:
--list-node-enclosure-power-supplies
option.--list-node-accels
option. The
+count of node accelerators is also included for each node.--list-node-accel-risers
+option. The count of node accelerator risers is also included for each node.--list-node-hsn-nics
+option. The count of HSN NICs is also included for each node.Documentation for these new options has been added to the man page for sat hwinv
.
sat setrev
in S3The sat setrev
and sat showrev
commands now use S3 to store and obtain site
+information, including system name, site name, serial number, install date, and
+system type. Since the information is stored in S3, it will now be consistent
+regardless of the node on which sat
is executed.
As a result of this change, S3 credentials must be configured for SAT. For more +information, see Generate SAT S3 Credentials.
+sat showrev
sat showrev
now shows product information from the cray-product-catalog
+ConfigMap in Kubernetes.
sat showrev
The output from sat showrev
has also been changed in the following ways:
--docker
and --packages
options were considered misleading and have
+been removed.--local
option.sat cablecheck
The sat cablecheck
command has been removed. To verify that the system’s Slingshot
+network is cabled correctly, admins should now use the show cables
command in the
+Slingshot Topology Tool (STT).
sat swap
Command Compatibility with Next-gen Fabric ControllerThe sat swap
command was added in Shasta v1.3.2. This command used the Fabric
+Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the
+Fabric Controller API, so this command has been rewritten to use the new
+backwards-incompatible API. Usage of the command did not change.
sat bootsys
FunctionalityMuch of the functionality added to sat bootsys
in Shasta v1.3.2 was broken
+by changes introduced in Shasta v1.4, which removed the Ansible inventory
+and playbooks.
The functionality in the platform-services
stage of sat bootsys
has been
+re-implemented to use python directly instead of Ansible. This resulted in
+a more robust procedure with better logging to the sat
log file. Failures
+to stop containers on Kubernetes nodes are handled more gracefully, and
+more information about the containers that failed to stop, including how to
+debug the problem, is included.
Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.
+The following improvements were made to the bos-operations
stage
+of sat bootsys
:
--bos-templates
, and a corresponding config-file
+option, bos_templates
, were added, and the --cle-bos-template
and
+--uan-bos-template
options and their corresponding config file options were
+deprecated.The following functionality has been removed from sat bootsys
:
hsn-bringup
stage of sat bootsys boot
has been removed due to removal
+of the underlying Ansible playbook.bgp-check
stage of sat bootys {boot,shutdown}
has been removed. It is
+now a manual procedure.The location of the sat log file has changed from /var/log/cray/sat.log
to
+/var/log/cray/sat/sat.log
. This change simplifies mounting this file into the
+sat container running under Podman.
Shasta v1.3.2 included version 2.4.0 of the sat
python package and CLI.
The following sections detail the changes in this release.
+sat swap
Command for Switch and Cable ReplacementThe sat switch
command which supported operations for replacing a switch has
+been deprecated and replaced with the sat swap
command, which now supports
+replacing a switch OR cable.
The sat swap switch
command is equivalent to sat switch
. The sat switch
+command will be removed in a future release.
sat bootsys
CommandThe sat bootsys
command now has multiple stages for both the boot
and
+shutdown
actions. Please refer to the “System Power On Procedures” and “System
+Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001)
+for more details on using this command in the context of a full system power off
+and power on.
Shasta v1.3 included version 2.2.3 of the sat
python package and CLI.
This version of the sat
CLI contained the following commands:
auth
bootsys
cablecheck
diag
firmware
hwinv
hwmatch
k8s
linkhealth
sensors
setrev
showrev
status
swap
switch
For more information on each of these commands, see the System Admin Toolkit Command +Overview and the table +of commands in the SAT Authentication section +of this document.
+ + + + + +By default, SAT uses Boot Orchestration Service (BOS) version one. You can
+select the BOS version to use for individual commands with the --bos-version
+option. For more information on this option, refer to the man page for a specific
+command.
You can also configure the BOS version to use in the SAT config file. Do this
+under the api_version
setting in the bos
section of the config file. If
+the system is using an existing SAT config file from an older version of SAT,
+the bos
section might not exist. In that case, add the bos
section with the
+BOS version desired in the api_version
setting.
Find the SAT config file at ~/.config/sat/sat.toml
, and look for a section
+like this:
[bos]
+api_version = "v1"
+
In this example, SAT is using BOS version "v1"
.
Change the line specifying the api_version
to the BOS version desired (for
+example, "v2"
).
[bos]
+api_version = "v2"
+
If applicable, uncomment the api_version
line.
If the system is using an existing SAT config file from a recent version of
+SAT, the api_version
line might be commented out like this:
[bos]
+# api_version = "v2"
+
If the line is commented out, SAT will still use the default BOS
+version. To ensure a different BOS version is used, uncomment the
+api_version
line by removing #
at the beginning of the line.
SAT provides an automated solution for creating CFS configurations, building +and configuring images in IMS, and creating BOS session templates based on a +given input file which defines how those configurations, images, and session +templates should be created.
+This automated process centers around the sat bootprep
command. Man page
+documentation for sat bootprep
can be viewed similarly to other SAT commands.
ncn-m001# sat-man sat-bootprep
+
sat bootprep
is used to create CFS configurations, build and
+rename IMS images, and create BOS session templates which tie the
+configurations and images together during a BOS session.
sat bootsys
automates several portions of the boot and shutdown processes,
+including (but not limited to) performing BOS operations (such as creating BOS
+sessions), powering on and off cabinets, and checking the state of the system
+prior to shutdown.
The input file provided to sat bootprep
is a YAML-formatted file containing
+information which CFS, IMS, and BOS use to create configurations, images, and
+BOS session templates respectively. Writing and modifying these input files is
+the main task associated with using sat bootprep
. An input file is composed of
+three main sections, one each for configurations, images, and session templates.
+These sections may be specified in any order, and any of the sections may be
+omitted if desired.
The sat bootprep
input file is validated against a versioned schema
+definition. The input file should specify the version of the schema with which
+it is compatible under a schema_version
key. For example:
---
+schema_version: 1.0.2
+
The current sat bootprep
input file schema version can be viewed with the
+following command:
ncn-m001# sat bootprep view-schema | grep '^version:'
+version: '1.0.2'
+
The sat bootprep run
command validates the schema version specified
+in the input file. The command also makes sure that the schema version
+of the input file is compatible with the schema version understood by the
+current version of sat bootprep
. For more information on schema version
+validation, refer to the schema_version
property description in the bootprep
+input file schema. For more information on viewing the bootprep input file
+schema in either raw form or user-friendly HTML form, see Viewing the Exact
+Schema Specification or
+Generating User-Friendly Documentation.
The default sat bootprep
input files provided by the hpc-csm-software-recipe
+release distribution already contain the correct schema version.
The CFS configurations are defined under a configurations
key. Under this
+key, you can list one or more configurations to create. For each
+configuration, give a name in addition to the list of layers that
+comprise the configuration.
Each layer can be defined by a product name and optionally a version number, +commit hash, or branch in the product’s configuration repository. If this +method is used, the layer is created in CFS by looking up relevant configuration +information (including the configuration repository and commit information) from +the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be +supplied. However, if it is absent, the version is assumed to be the latest +version found in the cray-product-catalog.
+Alternatively, a configuration layer can be defined by explicitly referencing
+the desired configuration repository. You must then specify the intended version
+of the Ansible playbooks by providing a branch name or commit hash with branch
+or commit
.
The following example shows a CFS configuration with two layers. The first +layer is defined in terms of a product name and version, and the second layer +is defined in terms of a Git clone URL and branch:
+---
+configurations:
+- name: example-configuration
+ layers:
+ - name: example-product
+ playbook: example.yml
+ product:
+ name: example
+ version: 1.2.3
+ - name: another-example-product
+ playbook: another-example.yml
+ git:
+ url: "https://vcs.local/vcs/another-example-config-management.git"
+ branch: main
+
When sat bootprep
is run against an input file, a CFS configuration is created
+corresponding to each configuration in the configurations
section. For
+example, the configuration created from an input file with the layers listed
+above might look something like the following:
{
+ "lastUpdated": "2022-02-07T21:47:49Z",
+ "layers": [
+ {
+ "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "example product",
+ "playbook": "example.yml"
+ },
+ {
+ "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "another example product",
+ "playbook": "another-example.yml"
+ }
+ ],
+ "name": "example-configuration"
+}
+
The IMS images are defined under an images
key. Under the images
key, the
+user may define one or more images to be created in a list. Each element of the
+list defines a separate IMS image to be built and/or configured. Images must
+contain a name
key and a base
key.
The name
key defines the name of the resulting IMS image. The base
key
+defines the base image to be configured or the base recipe to be built and
+optionally configured. One of the following keys must be present under the
+base
key:
ims
key to specify an existing image or recipe in IMS.product
key to specify an image or recipe provided by a
+particular version of a product. Note that this is only possible if the
+product provides a single image or recipe.image_ref
key to specify another image from the input file
+using its ref_name
.Images may also contain the following keys:
+configuration
key to specify a CFS configuration with which to
+customize the built image. If a configuration is specified, then configuration
+groups must also be specified using the configuration_group_names
key.ref_name
key to specify a unique name that can refer to this image
+within the input file in other images or in session templates. The ref_name
+key allows references to images from the input file that have dynamically
+generated names as described in
+Dynamic Variable Substitutions.description
key to describe the image in the bootprep input file.
+Note that this key is not currently used.Here is an example of an image using an existing IMS recipe as its base. This
+example builds an IMS image from that recipe. It then configures it with
+a CFS configuration named example-compute-config
. The example-compute-config
+CFS configuration can be defined under the configurations
key in the same
+input file, or it can be an existing CFS configuration. Running sat bootprep
+against this input file results in an image named example-compute-image
.
images:
+- name: example-compute-image
+ description: >
+ An example compute node image built from an existing IMS recipe.
+ base:
+ ims:
+ name: example-compute-image-recipe
+ type: recipe
+ configuration: example-compute-config
+ configuration_group_names:
+ - Compute
+
Here is an example showing the definition of two images. The first image is
+built from a recipe provided by the cos
product. The second image uses the
+first image as a base and configures it with a configuration named
+example-compute-config
. The value of the first image’s ref_name
key is used
+in the second image’s base.image_ref
key to specify it as a dependency.
+Running sat bootprep
against this input file results in two images, the
+first named example-cos-image
and the second named example-compute-image
.
images:
+- name: example-cos-image
+ ref_name: example-cos-image
+ description: >
+ An example image built from a recipe provided by the COS product.
+ base:
+ product:
+ name: cos
+ version: 2.3.101
+ type: recipe
+- name: example-compute-image
+ description: >
+ An example image built from a recipe provided by the COS product.
+ base:
+ image_ref: example-cos-image
+ configuration: example-compute-config
+ configuration_group_names:
+ - Compute
+
The BOS session templates are defined under the session_templates
key. Each
+session template must provide values for the name
, image
, configuration
,
+and bos_parameters
keys. The name
key defines the name of the resulting BOS
+session template. The image
key defines the image to use in the BOS session
+template. One of the following keys must be present under the image
key:
ims
key to specify an existing image or recipe in IMS.image_ref
key to specify another image from the input file
+using its ref_name
.The configuration
key defines the CFS configuration specified
+in the BOS session template.
The bos_parameters
key defines parameters that are passed through directly to
+the BOS session template. The bos_parameters
key should contain a boot_sets
+key, and each boot set in the session template should be specified under
+boot_sets
. Each boot set can contain the following keys, all of
+which are optional:
kernel_parameters
key to specify the parameters passed to the kernel on the command line.network
key to specify the network over which the nodes boot.node_list
key to specify the nodes to add to the boot set.node_roles_groups
key to specify the HSM roles to add to the boot set.node_groups
key to specify the HSM groups to add to the boot set.rootfs_provider
key to specify the root file system provider.rootfs_provider_passthrough
key to specify the parameters to add to the rootfs=
+kernel parameter.As mentioned above, the parameters under bos_parameters
are passed through
+directly to BOS. For more information on the properties of a BOS boot set,
+refer to BOS Session Templates in the Cray
+System Management Documentation.
Here is an example of a BOS session template that refers to an existing IMS +image by name:
+session_templates:
+- name: example-session-template
+ image:
+ ims:
+ name: example-image
+ configuration: example-configuration
+ bos_parameters:
+ boot_sets:
+ example_boot_set:
+ kernel_parameters: ip=dhcp quiet
+ node_roles_groups:
+ - Compute
+ rootfs_provider: cpss3
+ rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+
Here is an example of a BOS session template that refers to an image from the
+input file by its ref_name
. This requires that an image defined in the input
+file specifies example-image
as the value of its ref_name
key.
session_templates:
+- name: example-session-template
+ image:
+ image_ref: example-image
+ configuration: example-configuration
+ bos_parameters:
+ boot_sets:
+ example_boot_set:
+ kernel_parameters: ip=dhcp quiet
+ node_roles_groups:
+ - Compute
+ rootfs_provider: cpss3
+ rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+
The HPC CSM Software Recipe provides a manifest defining the versions of each
+HPC software product included in the recipe. These product versions can be used
+in the sat bootprep
input file with Jinja2 template syntax.
By default, the sat bootprep
command uses the product versions from the
+latest installed version of the HPC CSM Software Recipe. However, you can
+override this with the --recipe-version
command line argument to sat bootprep run
.
For example, to explicitly select the 22.11.0
version of the HPC CSM Software
+Recipe, specify --recipe-version 22.11.0
:
ncn-m001# sat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml
+
The entire sat bootprep
input file is not rendered by the Jinja2 template
+engine. Jinja2 template rendering of the input file is performed individually
+for each supported value. The values of the following keys support rendering as
+a Jinja2 template:
name
key of each configuration under the configurations
key.layers
key in a
+configuration:
+name
git.branch
product.version
product.branch
images
key:
+name
base.product.version
configuration
session_templates
key:
+name
configuration
You can use Jinja2 built-in filters in values of any of the keys listed above. +In addition, Python string methods can be called on the string variables.
+HPC CSM Software Recipe variables are available, and you can use them in the values
+of the keys listed above. View these variables by cloning the hpc-csm-software-recipe
+repository from VCS and accessing the product_vars.yaml
file on the branch that
+corresponds to the targeted version of the HPC CSM Software Recipe.
Set up a shell script to access the password for the crayvcs
user:
ncn-m001# cat > vcs-creds-helper.sh <<EOF
+#!/bin/bash
+kubectl get secret -n services vcs-user-credentials -o jsonpath={.data.vcs_password} | base64 -d
+EOF
+
Ensure vcs-creds-helper.sh
is executable:
ncn-m001# chmod u+x vcs-creds-helper.sh
+
Set the GIT_ASKPASS
environment variable to the path to the
+vcs-creds-helper.sh
script:
ncn-m001# export GIT_ASKPASS="$PWD/vcs-creds-helper.sh"
+
Clone the hpc-csm-software-recipe
repository:
ncn-m001# git clone https://crayvcs@api-gw-service-nmn.local/vcs/cray/hpc-csm-software-recipe.git
+
Change the directory to the hpc-csm-software-recipe
repository:
ncn-m001# cd hpc-csm-software-recipe
+
View the versions of the HPC CSM Software Recipe on the system:
+ncn-m001# git branch -r
+
Check out the branch of the hpc-csm-software-recipe
repository that corresponds to
+the targeted HPC CSM Software Recipe version. For example, for recipe version
+22.11.0:
ncn-m001# git checkout cray/hpc-csm-software-recipe/22.11.0
+
View the contents of the file product_vars.yaml
in the clone of the
+repository:
ncn-m001# cat product_vars.yaml
+
The variables defined in the product_vars.yaml
file can be used in the values
+that support Jinja2 templates. A variable is specified by a dot-separated path,
+with each component of the path representing a key in the YAML file. For
+example, a version of the COS product appears as follows in the
+product_vars.yaml
file:
cos:
+ version: 2.4.76
+
This COS version can be used by specifying cos.version
within a value in the
+input file.
The following example bootprep input file shows how a COS version can be +used in a bootprep input file that creates a CFS configuration for computes. +Only one layer is shown for brevity.
+---
+configurations:
+- name: compute-{{recipe.version}}
+ layers:
+ - name: cos-compute-integration-{{cos.version}}
+ playbook: cos-compute.yaml
+ product:
+ name: cos
+ version: "{{cos.version}}"
+ branch: integration-{{cos.version}}
+
Note: When the value of a key in the bootprep input file is a Jinja2 +expression, it must be quoted to pass YAML syntax checking.
+Jinja2 expressions can also use filters and Python’s built-in string methods to
+manipulate the variable values. For example, suppose only the major and minor
+components of a COS version are to be used in the branch name for the COS
+layer of the CFS configuration. You can use the split
string method to
+achieve this as follows:
---
+configurations:
+- name: compute-{{recipe.version}}
+ layers:
+ - name: cos-compute-integration-{{cos.version}}
+ playbook: cos-compute.yaml
+ product:
+ name: cos
+ version: "{{cos.version}}"
+ branch: integration-{{cos.version.split('.')[0]}}-{{cos.version.split('.')[1]}}
+
Additional variables are available besides the product version variables +provided by the HPC CSM Software Recipe. (For more information, see HPC +CSM Software Recipe Variable Substitutions.) +These additional variables are dynamic because their values are +determined at run-time based on the context in which they appear. Available +dynamic variables include the following:
+base.name
can be used in the name
of an image under the
+images
key. The value of this variable is the name of the IMS image or
+recipe used as the base of this image.image.name
can be used in the name
of a session template
+under the session_templates
key. The value of this variable is the name of
+the IMS image used in this session template.These variables reduce the need to duplicate values throughout the sat bootprep
input file and make the following use cases possible:
This section provides an example bootprep input file. It also gives +instructions for obtaining the default bootprep input files delivered +with a release of the HPC CSM Software Recipe.
+The following bootprep input file provides an example of using most of the +features described in previous sections. It is not intended to be a complete +bootprep file for the entire CSM product.
+---
+configurations:
+- name: compute-{{recipe.version}}
+ layers:
+ - name: cos-compute-integration-{{cos.version}}
+ playbook: site.yml
+ product:
+ name: cos
+ version: "{{cos.version}}"
+ branch: integration-{{cos.version}}
+ - name: cpe-pe_deploy-integration-{{cpe.version}}
+ playbook: pe_deploy.yml
+ product:
+ name: cpe
+ version: "{{cpe.version}}"
+ branch: integration-{{cpe.version}}
+
+images:
+- name: "{{base.name}}"
+ ref_name: base_cos_image
+ base:
+ product:
+ name: cos
+ type: recipe
+ version: "{{cos.version}}"
+
+- name: compute-{{base.name}}
+ ref_name: compute_image
+ base:
+ image_ref: base_cos_image
+ configuration: compute-{{recipe.version}}
+ configuration_group_names:
+ - Compute
+
+session_templates:
+- name: compute-{{recipe.version}}
+ image:
+ image_ref: compute_image
+ configuration: compute-{{recipe.version}}
+ bos_parameters:
+ boot_sets:
+ compute:
+ kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+ node_roles_groups:
+ - Compute
+ rootfs_provider_passthrough: "dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0"
+
Default bootprep input files are delivered by the HPC CSM Software Recipe
+product. You can access these files by cloning the hpc-csm-software-recipe
+repository.
To do this, follow steps 1-7 of the procedure in Viewing HPC CSM Software Recipe
+Variables. Then, access the files in the
+bootprep
directory of that repository:
ncn-m001# ls bootprep/
+
The sat bootprep generate-example
command was not updated for
+recent bootprep schema changes. It is recommended that you instead use the
+default bootprep input files described in Accessing Default Bootprep Input
+Files. The sat bootprep generate-example
command will be updated in a future release of SAT.
You might need to edit the default bootprep input files delivered by the HPC +CSM Software Recipe for your system. Here are some examples of how to edit +the files.
+Before running sat bootprep
, HPE recommends reading the bootprep input files
+and paying specific attention to the branch
parameters. Some HPE Cray EX
+products require system-specific changes on a working branch of VCS. For these
+products, the default bootprep input files assume certain naming conventions for
+the VCS branches. The files refer to a particular branch of a product’s
+configuration management repository.
Thus, it is important to confirm that the bootprep input files delivered by the
+HPC CSM Software Recipe match the actual system branch names. For example, the
+COS product’s CFS configuration layer is defined as follows in the default
+management-bootprep.yaml
bootprep input file.
- name: cos-ncn-integration-{{cos.version}}
+ playbook: ncn.yml
+ product:
+ name: cos
+ version: "{{cos.version}}"
+ branch: integration-{{cos.version}}
+
The default file is assuming that system-specific Ansible configuration changes
+for the COS product in VCS are stored in a branch named
+integration-{{cos.version}}
. If the version being installed is COS 2.4.99,
+sat bootprep
looks for a branch named integration-2.4.99
from which to
+create CFS configuration layers.
You can create VCS working branches that are not the default bootprep input file
+branch names. A simple example of this is using cne-install
to update working
+VCS branches. If you use cne-install
to update working VCS branches, (namely in
+the update_working_branches
stage), you create or update the branches specified
+by the -B WORKING_BRANCH
command line option. For example, consider the
+following cne-install
command.
ncn-m001# ./cne-install install \
+ -B integration \
+ -s deploy_products \
+ -e update_working_branches
+
Products installed with this cne-install
example use the working branch
+integration
for system-specific changes to VCS. The branch specified by the
+-B
option must match the branch specified in the bootprep input file.
In another example, to use the branch integration
for COS instead of
+integration-{{cos.version}}
, edit the bootprep input file so it reads as
+follows.
- name: cos-ncn-integration-{{cos.version}}
+ playbook: ncn.yml
+ product:
+ name: cos
+ version: "{{cos.version}}"
+ branch: integration
+
The default bootprep input file for management CFS configurations
+(management-bootprep.yaml
) creates configurations that have names specified
+within the input file. For example, in the bootprep input files included in the
+22.11
HPC CSM Software Recipe, the following configurations are named:
ncn-personalization
ncn-image-customization
These default management CFS configuration names might be acceptable for your
+system. However, it is possible to create other names. sat bootprep
creates
+whatever configurations are specified in the input file. For example, to
+create a NCN node personalization configuration named
+ncn-personalization-test
, edit the file as follows.
configurations:
+- name: ncn-personalization-test
+ layers:
+ ...
+
For management configurations, use sat status
to identify the current
+desired configuration for each of the management nodes.
ncn-m001# sat status --fields xname,role,subrole,desiredconfig --filter role=management
++----------------+------------+---------+---------------------+
+| xname | Role | SubRole | Desired Config |
++----------------+------------+---------+---------------------+
+| x3000c0s1b0n0 | Management | Master | ncn-personalization |
+| x3000c0s3b0n0 | Management | Master | ncn-personalization |
+| x3000c0s5b0n0 | Management | Master | ncn-personalization |
+| x3000c0s7b0n0 | Management | Worker | ncn-personalization |
+| x3000c0s9b0n0 | Management | Worker | ncn-personalization |
+| x3000c0s11b0n0 | Management | Worker | ncn-personalization |
+| x3000c0s13b0n0 | Management | Worker | ncn-personalization |
+| x3000c0s17b0n0 | Management | Storage | ncn-personalization |
+| x3000c0s19b0n0 | Management | Storage | ncn-personalization |
+| x3000c0s21b0n0 | Management | Storage | ncn-personalization |
+| x3000c0s25b0n0 | Management | Worker | ncn-personalization |
++----------------+------------+---------+---------------------+
+
To overwrite the desired configuration using sat bootprep
, ensure the bootprep
+input file specifies to create a configuration with the same name
+(ncn-personalization
in the example above). To create a different configuration,
+ensure the bootprep input file specifies to create a configuration with a
+different name than the desired configuration (different than ncn-personalization
+in the example above).
When working with a given HPC CSM Software Recipe, it might be necessary to +upgrade a single HPE Cray EX product past the default version given in the +recipe. However, you might still want to use the other default product versions +contained in that recipe. To do this, first upgrade the single product. For +more information, refer to the upgrade instructions in that product’s +documentation.
+After the product is upgraded, you must override its default version in subsequent
+runs of sat bootprep
. The following process explains how to do this. In this
+example, all the default product versions from the 22.11
software recipe are
+used except for COS. The COS default product version is overridden to version
+2.4.199
instead, and the CFS configurations in management-bootprep.yaml
are
+created.
Ensure you have a local copy of the default bootprep input files.
+For more information, see Accessing Default Bootprep Input +Files.
+Edit the product_vars.yaml
file to change the default product version.
ncn-m001# vim product_vars.yaml
+
Confirm the new product version in the edited product_vars.yaml
file.
ncn-m001# grep -A1 cos: `product_vars.yaml`:
+cos:
+ version: 2.4.199
+
Use the --vars-file
option when running sat bootprep
to override the
+default product version.
You must run this command from the directory containing the product_vars.yaml
+file. The product_vars.yaml
file must also be specified when using the
+--vars-file
option. It is not sufficient to just edit the file.
ncn-m001# sat bootprep run --vars-file product_vars.yaml bootprep/management-bootprep.yaml
+
Note: This example is specific to creating the configurations defined in
+management-bootprep.yaml
. Review what configurations, images, or session templates
+you intend to create by viewing the input file.
The contents of the YAML input files described above must conform to a schema +which defines the structure of the data. The schema definition is written using +the JSON Schema format. (Although the format is named “JSON Schema”, the schema +itself is written in YAML as well.) More information, including introductory +materials and a formal specification of the JSON Schema metaschema, can be found +on the JSON Schema website.
+To view the exact schema specification, run sat bootprep view-schema
.
ncn-m001# sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft/2020-12/schema"
+...
+title: Bootprep Input File
+description: >
+ A description of the set of CFS configurations to create, the set of IMS
+ images to create and optionally customize with the defined CFS configurations,
+ and the set of BOS session templates to create that reference the defined
+ images and configurations.
+type: object
+additionalProperties: false
+properties:
+ ...
+
The raw schema definition can be difficult to understand without experience +working with JSON Schema specifications. For this reason, a feature is included +that generates user-friendly HTML documentation for the input file schema. This +HTML documentation can be browsed with your preferred web browser.
+Create a documentation tarball using sat bootprep
.
ncn-m001# sat bootprep generate-docs
+INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
+
An alternate output directory can be specified with the --output-dir
+option. The generated tarball is always named bootprep-schema-docs.tar.gz
.
ncn-m001# sat bootprep generate-docs --output-dir /tmp
+INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
+
From another machine, copy the tarball to a local directory.
+another-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz .
+
Extract the contents of the tarball and open the contained index.html
.
another-machine$ tar xzvf bootprep-schema-docs.tar.gz
+x bootprep-schema-docs/
+x bootprep-schema-docs/index.html
+x bootprep-schema-docs/schema_doc.css
+x bootprep-schema-docs/schema_doc.min.js
+another-machine$ open bootprep-schema-docs/index.html
+
The SAT Grafana Dashboards display messages that are generated by the HSN (High Speed Network) and reported through +Redfish. The messages are displayed based on severity.
+Grafana can be accessed via web browser at the following URL:
+https://sma-grafana.cmn.<site-domain>
The value of site-domain
can be obtained as follows:
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+ base64 -d | grep "external:"
+
That command will produce the following output, for example:
+ external: EXAMPLE_DOMAIN.com
+
This would result in the address for Grafana being https://sma-grafana.cmn.EXAMPLE_DOMAIN.com
For more information on accessing the Grafana Dashboards, refer to Access the Grafana Monitoring UI in the +SMA product documentation.
+For more information on the interpretation of metrics for the SAT Grafana Dashboards, refer to “Fabric Telemetry +Kafka Topics” in the SMA product documentation.
+There are four Fabric Telemetry dashboards used in SAT that report on the HSN. Two contain chart panels and two display +telemetry in a tabular format.
+Dashboard Name | +Display Type | +
---|---|
Fabric Congestion | +Chart Panels | +
Fabric RFC3635 | +Chart Panels | +
Fabric Errors | +Tabular Format | +
Fabric Port State | +Tabular Format | +
The tabular format presents a single point of telemetry for a given location and metric, either because the telemetry +is not numerical or that it changes infrequently. The value shown is the most recently reported value for that location +during the time range selected, if any. The interval setting is not used for tabular dashboards.
+Shows the Interval and Locations Options for the available telemetry.
+ +The value of the Interval option sets the time resolution of the received telemetry. This works a bit like a +histogram, with the available telemetry in an interval of time going into a “bucket” and averaging out to a single +point on the chart or table. The special value auto will choose an interval based on the time range selected.
+For more information, refer to Grafana Templates and Variables.
+The Locations option allows restriction of the telemetry shown by locations, either individual links or all links +in a switch. The selection presented updates dynamically according to time range, except for the errors dashboard, +which always has entries for all links and switches, although the errors shown are restricted to the selected time +range.
+The chart panels for the RFC3635 and Congestion dashboards allow selection of a single location from the chart’s legend +or the trace on the chart.
+SAT Grafana Dashboards provide system administrators a way to view fabric telemetry data across all Rosetta switches in +the system and assess the past and present health of the high-speed network. It also allows the ability to drill down +to view data for specific ports on specific switches.
+This dashboard contains the variable, Port Type not found in the other dashboards. The possible values are edge, +local, and global and correspond to the link’s relationship to the network topology. The locations presented in the +panels are restricted to the values (any combination, defaults to “all”) selected.
+The metric values for links of a given port type are similar in value to each other but very distinct from the values of +other types. If the values for different port types are all plotted together, the values for links with lower values are +indistinguishable from zero when plotted.
+The port type of a link is reported as a port state “subtype” event when defined at port initialization.
+This dashboard reports error counters in a tabular format in three panels.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+Unlike other dashboards, the locations presented are all locations in the system rather than having telemetry within +the time range selected. However, the values are taken from telemetry within the time range.
+There is no Interval option because this parameter is not used to set a coarseness of the data. Only a single value +is presented that displays the most recent value in the time range.
+The Fabric Port State telemetry is distinct because it typically is not numeric. It also updates infrequently, so a +long time range may be necessary to obtain any values. Port State is refreshed daily, so a time range of 24 hours +results in all states for all links in the system being shown.
+The three columns named, group, switch, and port are not port state events, but extra information included with +all port state events.
+For more information on performance counters, refer to +Definitions of Managed Objects for the Ethernet-like Interface Types, +an Internet standards document.
+Because these metrics are counters that only increase over time, the values plotted are the change in the counter’s +value over the interval setting.
+ + + + + +Kibana is an open source analytics and visualization platform designed to search, view, and interact with data stored +in Elasticsearch indices. Kibana runs as a web service and has a browser-based interface. It offers visual output of +node data in the forms of charts, tables and maps that display real-time Elasticsearch queries. Viewing system data in +this way breaks down the complexity of large data volumes into easily understood information.
+Kibana can be accessed via web browser at the following URL:
+https://sma-kibana.cmn.<site-domain>
The value of site-domain
can be obtained as follows:
ncn-m001:~ # kubectl get secret site-init -n loftsman -o jsonpath='{.data.customizations\.yaml}' | \
+ base64 -d | grep "external:"
+
That command will produce the following output, for example:
+ external: EXAMPLE_DOMAIN.com
+
This would result in the address for Kibana being https://sma-kibana.cmn.EXAMPLE_DOMAIN.com
For more information on accessing the Kibana Dashboards, refer to View Logs Via Kibana in the SMA product +documentation.
+Additional details about the AER, ATOM, Heartbeat, Kernel, MCE, and RAS Daemon Kibana Dashboards are included in this +table.
+Dashboard | +Short Description | +Long Description | +Kibana Visualization and Search Name | +
---|---|---|---|
sat-aer |
+AER corrected | +Corrected Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-corrected Search: sat-aer-corrected |
+
sat-aer |
+AER fatal | +Fatal Advanced Error Reporting messages from PCI Express devices on each node. | +Visualization: aer-fatal Search: sat-aer-fatal |
+
sat-atom |
+ATOM failures | +Application Task Orchestration and Management tests are run on a node when a job finishes. Test failures are logged. | +sat-atom-failed |
+
sat-atom |
+ATOM admindown |
+Application Task Orchestration and Management test failures can result in nodes being marked admindown . An admindown node is not available for job launch. |
+sat-atom-admindown |
+
sat-heartbeat |
+Heartbeat loss events | +Heartbeat loss event messages reported by the hbtd pods that monitor for heartbeats across nodes in the system. |
+sat-heartbeat |
+
sat-kernel |
+Kernel assertions | +The kernel software performs a failed assertion when some condition represents a serious fault. The node goes down. | +sat-kassertions |
+
sat-kernel |
+Kernel panics | +The kernel panics when something is seriously wrong. The node goes down. | +sat-kernel-panic |
+
sat-kernel |
+Lustre bugs (LBUGs) | +The Lustre software in the kernel stack performs a failed assertion when some condition related to file system logic represents a serious fault. The node goes down. | +sat-lbug |
+
sat-kernel |
+CPU stalls | +CPU stalls are serous conditions that can reduce node performance, and sometimes cause a node to go down. Technically these are Read-Copy-Update stalls where software in the kernel stack holds onto memory for too long. Read-Copy-Update is a vital aspect of kernel performance and rather esoteric. | +sat-cpu-stall |
+
sat-kernel |
+Out of memory | +An Out Of Memory (OOM) condition has occurred. The kernel must kill a process to continue. The kernel will select an expendable process when possible. If there is no expendable process the node usually goes down in some manner. Even if there are expendable processes the job is likely to be impacted. OOM conditions are best avoided. | +sat-oom |
+
sat-mce |
+MCE | +Machine Check Exceptions (MCE) are errors detected at the processor level. | +sat-mce |
+
sat-rasdaemon |
+rasdaemon errors |
+Errors from the rasdaemon service on nodes. The rasdaemon service is the Reliability, Availability, and Serviceability Daemon, and it is intended to collect all hardware error events reported by the Linux kernel, including PCI and MCE errors. This may include certain HSN errors in the future. |
+sat-rasdaemon-error |
+
sat-rasdaemon |
+rasdaemon messages |
+All messages from the rasdaemon service on nodes. |
+sat-rasdaemon |
+
By default, search highlighting is enabled. This procedure instructs how to disable search highlighting.
+The Kibana Dashboard should be open on your system.
+Navigate to Management
+Navigate to Advanced Settings in the Kibana section, below the Elastic search section
+Scroll down to the Discover section
+Change Highlight results from on to off
+Click Save to save changes
+The AER Dashboard displays errors that come from the PCI Express Advanced Error Reporting (AER) driver. These errors +are split up into separate visualizations depending on whether they are fatal or corrected errors.
+Go to the dashboard section.
+Select sat-aer
dashboard.
Choose the time range of interest.
+View the Corrected and Fatal Advanced Error Reporting messages from PCI Express devices on each node. View the +matching log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on +the left. If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass +next to each NID.
+The ATOM (Application Task Orchestration and Management) Dashboard displays node failures that occur during health
+checks and application test failures. Some test failures are of possible interest even though a node is not marked
+admindown
or otherwise fails. They are of clear interest if a node is marked admindown
, and might provide
+clues if a node otherwise fails. They might also show application problems.
HPE Cray EX is installed on the system along with the System Admin Toolkit, which contains the ATOM Kibana Dashboard.
+Go to the dashboard section.
+Select sat-atom
dashboard.
Choose the time range of interest.
+View any nodes marked admindown
and any ATOM test failures. These failures occur during health checks and
+application test failures. Test failures marked admindown
are important to note. View the matching log messages
+in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. If desired,
+results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to each NID.
The Heartbeat Dashboard displays heartbeat loss messages that are logged by the hbtd
pods in the system. The hbtd
+pods are responsible for monitoring nodes in the system for heartbeat loss.
Go to the dashboard section.
+Select sat-heartbeat
dashboard.
Choose the time range of interest.
+View the heartbeat loss messages that are logged by the hbtd
pods in the system. The hbtd
pods are responsible
+for monitoring nodes in the system for heartbeat loss. View the matching log messages in the panel.
The Kernel Dashboard displays compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. +The messages reveal if Lustre has experienced a fatal error on any compute nodes in the system. A CPU stall is a serious +problem that might result in a node failure. Out-of-memory conditions can be due to applications or system problems and +may require expert analysis. They provide useful clues for some node failures and may reveal if an application is using +too much memory.
+Go to the dashboard section.
+Select sat-kernel
dashboard.
Choose the time range of interest.
+View the compute node failures such as kernel assertions, kernel panics, and Lustre LBUG messages. View the matching +log messages in the panel(s) on the right, and view the counts of each message per NID in the panel(s) on the left. +If desired, results can be filtered by NID by clicking the icon showing a + inside a magnifying glass next to +each NID.
+The MCE Dashboard displays CPU detected processor-level hardware errors.
+Go to the dashboard section.
+Select sat-mce
dashboard.
Choose the time range of interest.
+View the Machine Check Exceptions (MCEs) listed including the counts per NID (node). For an MCE, the CPU number and +DIMM number can be found in the message, if applicable. View the matching log messages in the panel(s) on the right, +and view the counts of each message per NID in the panel(s) on the left. If desired, results can be filtered by NID +by clicking the icon showing a + inside a magnifying glass next to each NID.
+The RAS Daemon Dashboard displays errors that come from the Reliability, Availability, and Serviceability (RAS) daemon
+service on nodes in the system. This service collects all hardware error events reported by the Linux kernel, including
+PCI and MCE errors. As a result there may be some duplication between the messages presented here and the messages
+presented in the MCE and AER dashboards. This dashboard splits up the messages into two separate visualizations, one
+for only messages of severity emerg
or err
and another for all messages from rasdaemon
.
Go to the dashboard section.
+Select sat-rasdaemon
dashboard.
Choose the time range of interest.
+View the errors that come from the Reliability, Availability, and Serviceability (RAS) daemon service on nodes in +the system. View the matching log messages in the panel(s) on the right, and view the counts of each message per NID +in the panel(s) on the left. If desired, results can be filtered by NID by clicking the icon showing a + inside +a magnifying glass next to each NID.
+SAT can optionally be installed and configured on an external system to interact +with CSM over the CAN.
+Most SAT subcommands work by accessing APIs which are reachable via the CAN. +However, certain SAT commands depend on host-based functionality on the +management NCNs and will not work from an external system. This includes the +following:
+platform-services
and ncn-power
stages of sat bootsys
--local
option of sat showrev
Installing SAT on an external system is not an officially supported configuration. +These instructions are provided “as-is” with the hope that they can useful for +users who desire additional flexibility.
+Certain additional steps may need to be taken to install and configure SAT +depending on the configuration of the external system in use. These additional +steps may include provisioning virtual machines, installing packages, or +configuring TLS certificates, and these steps are outside the scope of this +documentation. This section covers only the steps needed to configure SAT to +use externally-accessible API endpoints exposed by CSM.
+kubectl
, openssh
, git
, and curl
are installed on the external system.Create a Python virtual environment.
+$ SAT_VENV_PATH="$(pwd)/venv"
+$ python3 -m venv ${SAT_VENV_PATH}
+$ . ${SAT_VENV_PATH}/bin/activate
+
Clone the SAT source code.
+To use SAT version 3.21, this example clones the release/3.21
branch of
+Cray-HPE/sat
.
(venv) $ git clone --branch=release/3.21 https://github.com/Cray-HPE/sat.git
+
Set up the SAT CSM Python dependencies to be installed from their source code.
+SAT CSM Python dependency packages are not currently distributed publicly as
+source packages or binary distributions. They must be installed from
+their source code hosted on GitHub. Also, to install the cray-product-catalog
+Python package, you must first clone it locally. Use the following steps to
+modify the SAT CSM Python dependencies so they can be installed from their source
+code.
Clone the source code for cray-product-catalog
.
(venv) $ git clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog
+
In the cray-product-catalog
directory, create a file named .version
+that contains the version of cray-product-catalog
.
(venv) $ echo 1.6.0 > cray-product-catalog/.version
+
Open the “locked” requirements file in a text editor.
+(venv) $ vim sat/requirements.lock.txt
+
Update the line containing cray-product-catalog
so that it reflects the
+local path to cray-product-catalog
.
It should read as follows.
+./cray-product-catalog
+
For versions of SAT newer than 3.19, change the line containing csm-api-client
+to read as follows.
csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
+
(Optional) Confirm that requirements.lock.txt
is modified as expected.
Note: For versions newer than 3.19, you will see both cray-product-catalog
+and csm-api-client
. For version 3.19 and older, you will only see
+cray-product-catalog
.
(venv) $ grep -E 'cray-product-catalog|csm-api-client' sat/requirements.lock.txt
+./cray-product-catalog
+csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
+
Install the modified SAT dependencies.
+(venv) $ pip install -r sat/requirements.lock.txt
+...
+
Install the SAT Python package.
+(venv) $ pip install ./sat
+...
+
(Optional) Add the sat
virtual environment to the user’s PATH
environment
+variable.
If a shell other than bash
is in use, replace ~/.bash_profile
with the
+appropriate profile path.
If the virtual environment is not added to the user’s PATH
environment
+variable, then source ${SAT_VENV_PATH}/bin/activate
will need to be run before
+running any SAT commands.
(venv) $ deactivate
+$ echo export PATH=\"${SAT_VENV_PATH}/bin:${PATH}\" >> ~/.bash_profile
+$ source ~/.bash_profile
+
Copy the file /etc/kubernetes/admin.conf
from ncn-m001
to ~/.kube/config
+on the external system.
Note that this file contains credentials to authenticate against the Kubernetes +API as the administrative user, so it should be treated as sensitive.
+$ mkdir -p ~/.kube
+$ scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config
+admin.conf 100% 5566 3.0MB/s 00:00
+
Add a new entry for the hostname kubernetes
to the external system’s
+/etc/hosts
file.
The kubernetes
hostname should correspond to the CAN IP address on ncn-m001
.
+On CSM 1.2, this can be determined by querying the IP address of the bond0.cmn0
+interface.
$ ssh ncn-m001 ip addr show bond0.cmn0
+13: bond0.cmn0@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
+inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007
+ valid_lft forever preferred_lft forever
+inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
+ valid_lft forever preferred_lft forever
+$ IP_ADDRESS=10.102.1.11
+
On CSM versions prior to 1.2, the CAN IP can be determined by querying the
+IP address of the vlan007
interface.
$ ssh ncn-m001 ip addr show vlan007
+13: vlan007@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
+inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007
+ valid_lft forever preferred_lft forever
+inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
+ valid_lft forever preferred_lft forever
+$ IP_ADDRESS=10.102.1.10
+
Once the IP address is determined, add an entry to /etc/hosts
mapping the
+IP address to the hostname kubernetes
.
$ echo "${IP_ADDRESS} kubernetes" | sudo tee -a /etc/hosts
+10.102.1.11 kubernetes
+
Modify ~/.kube/config
to set the cluster server address.
The value of the server
key for the kubernetes
cluster under the clusters
+section should be set to https://kubernetes:6443
.
---
+clusters:
+- cluster:
+ certificate-authority-data: REDACTED
+ server: https://kubernetes:6443
+ name: kubernetes
+...
+
Confirm that kubectl
can access the CSM Kubernetes cluster.
$ kubectl get nodes
+NAME STATUS ROLES AGE VERSION
+ncn-m001 Ready master 135d v1.19.9
+ncn-m002 Ready master 136d v1.19.9
+ncn-m003 Ready master 136d v1.19.9
+ncn-w001 Ready <none> 136d v1.19.9
+ncn-w002 Ready <none> 136d v1.19.9
+ncn-w003 Ready <none> 136d v1.19.9
+
Use sat init
to create a configuration file for SAT.
$ sat init
+INFO: Configuration file "/home/user/.config/sat/sat.toml" generated.
+
Copy the platform CA certificates from the management NCN and configure the +certificates for use with SAT.
+If a shell other than bash
is in use, replace ~/.bash_profile
with the
+appropriate profile path.
$ scp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt .
+$ echo export REQUESTS_CA_BUNDLE=\"$(realpath platform-ca-certs.crt)\" >> ~/.bash_profile
+$ source ~/.bash_profile
+
Edit the SAT configuration file to set the API and S3 hostnames.
+Externally available API endpoints are given domain names in PowerDNS, so the
+endpoints in the configuration file should each be set to
+subdomain.system-name.site-domain
, where system-name
and site-domain
are
+replaced with the values specified during csi config init
, and subdomain
+is the DNS name for the externally available service. For more information,
+refer to Externally Exposed Services in the Cray System Management
+Documentation.
The API gateway has the subdomain api
, and S3 has the subdomain s3
. The
+S3 endpoint runs on port 8080. The following options should be set in the
+SAT configuration file.
[api_gateway]
+host = "api.system-name.site-domain"
+
+[s3]
+endpoint = "http://s3.system-name.site-domain:8080"
+
Edit the SAT configuration file to specify the Keycloak user which will be +accessing the REST API.
+[api_gateway]
+username = "user"
+
Run sat auth
. Enter your password when prompted.
The admin account used to authenticate with sat auth
must be enabled in
+Keycloak and must have its assigned role set to admin. For more
+information on editing Role Mappings, see Create Internal User Accounts
+in the Keycloak Shasta Realm in the
+Cray System Management Documentation.
+For more information on authentication types and authentication credentials,
+see SAT Command Authentication.
$ sat auth
+Password for user:
+Succeeded!
+
Ensure the files are readable only by the current user.
+$ touch ~/.config/sat/s3_access_key \
+ ~/.config/sat/s3_secret_key
+
$ chmod 600 ~/.config/sat/s3_access_key \
+ ~/.config/sat/s3_secret_key
+
Write the credentials to local files using kubectl
.
Generate S3 credentials and write them to a local file so the SAT user can
+access S3 storage. In order to use the SAT S3 bucket, the user must generate
+the S3 access key and secret keys and write them to a local file. SAT uses
+S3 storage for several purposes, most importantly to store the site-specific
+information set with sat setrev
.
$ kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.access_key}' | base64 -d > \
+ ~/.config/sat/s3_access_key
+
$ kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.secret_key}' | base64 -d > \
+ ~/.config/sat/s3_secret_key
+
The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM. IUF capabilities are +described in detail in the IUF +section of the +Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.
+This document does not replicate install, upgrade, or deployment procedures +detailed in the Cray System Management +Documentation. This document provides +details regarding software and configuration content specific to SAT which is +needed when installing, upgrading, or deploying a SAT release. The Cray +System Management Documentation will +indicate when sections of this document should be referred to for detailed +information.
+IUF will perform the following tasks for a release of SAT.
+deliver-product
stage:
+update-vcs-config
stage:
+update-cfs-config
stage:
+prepare-images
stage:
+management-nodes-rollout
stage:
+IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF +section of the +Cray System Management Documentation +describes how to use these tools directly if it is desirable to use them +instead of IUF.
+This section describes SAT details that an administrator must be aware of +before running IUF stages. Entries are prefixed with Information if no +administrative action is required or Action if an administrator needs +to perform tasks outside of IUF.
+Information: This stage is only run if a VCS working branch is specified for +SAT. By default, SAT does not create or specify a VCS working branch.
+Information: This stage only applies to the management configuration and +not to the managed configuration.
+Information: This stage only applies to management images and not to +managed images.
+After installing SAT with IUF, you must complete the following SAT configuration +procedures before using SAT:
+ +...
) in shell output indicate omitted lines.x.y.z
with the version of the SAT product stream
+being installed.To run SAT commands on the manager NCNs, you must first set up authentication
+to the API gateway. The admin account used to authenticate with sat auth
+must be enabled in Keycloak and must have its assigned role set to admin.
+For more information on editing Role Mappings, see Create Internal User Accounts
+in the Keycloak Shasta Realm in the Cray System Management
+Documentation. For more information on
+authentication types and authentication credentials, see SAT Command
+Authentication.
sat
CLI has been installed following the IUF
+section of the
+Cray System Management Documentation.The following is the procedure to globally configure the username used by SAT and +authenticate to the API gateway.
+Generate a default SAT configuration file, if one does not exist.
+ncn-m001# sat init
+Configuration file "/root/.config/sat/sat.toml" generated.
+
Note: If the config file already exists, it will print out the following +error.
+ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
+Not generating configuration file.
+
Edit ~/.config/sat/sat.toml
and set the username option in the api_gateway
+section of the config file.
username = "crayadmin"
+
Run sat auth
. Enter your password when prompted.
ncn-m001# sat auth
+Password for crayadmin:
+Succeeded!
+
Other sat
commands are now authenticated to make requests to the API gateway.
ncn-m001# sat status
+
Generate S3 credentials and write them to a local file so the SAT user can access +S3 storage. In order to use the SAT S3 bucket, the System Administrator must +generate the S3 access key and secret keys and write them to a local file. This +must be done on every Kubernetes master node where SAT commands are run.
+SAT uses S3 storage for several purposes, most importantly to store the
+site-specific information set with sat setrev
(see Set System Revision
+Information).
Ensure the files are readable only by root
.
ncn-m001# touch /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
ncn-m001# chmod 600 /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
Write the credentials to local files using kubectl
.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.access_key}' | base64 -d > \
+ /root/.config/sat/s3_access_key
+
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.secret_key}' | base64 -d > \
+ /root/.config/sat/s3_secret_key
+
Verify the S3 endpoint specified in the SAT configuration file is correct.
+Get the SAT configuration file’s endpoint value.
+Note: If the command’s output is commented out, indicated by an initial #
+character, the SAT configuration will take the default value – "https://rgw-vip.nmn"
.
ncn-m001# grep endpoint ~/.config/sat/sat.toml
+# endpoint = "https://rgw-vip.nmn"
+
Get the sat-s3-credentials
secret’s endpoint value.
ncn-m001# kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
+https://rgw-vip.nmn
+
Compare the two endpoint values.
+If the values differ, change the SAT configuration file’s endpoint value to +match the secret’s.
+Copy SAT configurations to each manager node on the system.
+ncn-m001# for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
+ mkdir -p /root/.config/sat; \
+ scp -pr /root/.config/sat ${i}:/root/.config; done
+
Note: Depending on how many manager nodes are on the system, the list of
+manager nodes may be different. This example assumes three manager nodes, where
+the configuration files must be copied from ncn-m001
to ncn-m002
and
+ncn-m003
. Therefore, the list of hosts above is ncn-m002
and ncn-m003
.
HPE service representatives use system revision information data to identify +systems in support cases.
+Set System Revision Information.
+Run sat setrev
and follow the prompts to set the following site-specific values:
Tip: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.
+ncn-m001# sat setrev
+--------------------------------------------------------------------------------
+Setting: Serial number
+Purpose: System identification. This will affect how snapshots are
+ identified in the HPE backend services.
+Description: This is the top-level serial number which uniquely identifies
+ the system. It can be requested from an HPE representative.
+Valid values: Alpha-numeric string, 4 - 20 characters.
+Type: <class 'str'>
+Default: None
+Current value: None
+--------------------------------------------------------------------------------
+Please do one of the following to set the value of the above setting:
+ - Input a new value
+ - Press CTRL-C to exit
+...
+
Verify System Revision Information.
+Run sat showrev
and verify the output shown in the “System Revision Information table.”
The following example shows sample table output.
+ncn-m001# sat showrev
+################################################################################
+System Revision Information
+################################################################################
++---------------------+---------------+
+| component | data |
++---------------------+---------------+
+| Company name | HPE |
+| Country code | US |
+| Interconnect | Sling |
+| Product number | R4K98A |
+| Serial number | 12345 |
+| Site name | HPE |
+| Slurm version | slurm 20.02.5 |
+| System description | Test System |
+| System install date | 2021-01-29 |
+| System name | eniac |
+| System type | EX-1C |
++---------------------+---------------+
+################################################################################
+Product Revision Information
+################################################################################
++--------------+-----------------+------------------------------+------------------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+------------------------------+------------------------------+
+| csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
+| sat | 2.0.1 | - | - |
+| sdu | 1.0.8 | - | - |
+| slingshot | 0.8.0 | - | - |
+| sma | 1.4.12 | - | - |
++--------------+-----------------+------------------------------+------------------------------+
+################################################################################
+Local Host Operating System
+################################################################################
++-----------+----------------------+
+| component | version |
++-----------+----------------------+
+| Kernel | 5.3.18-24.15-default |
+| SLES | SLES 15-SP2 |
++-----------+----------------------+
+
The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.
+SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt
commands
+used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview.
Six Kibana Dashboards are included with SAT. They provide organized output for system health information.
+Four Grafana Dashboards are included with SAT. They display messages that are generated by the HSN (High Speed Network) and +are reported through Redfish.
+In CSM 1.3 and newer, the sat
command is automatically available on all the
+Kubernetes NCNs. For more information, see SAT in CSM. Older
+versions of CSM do not have the sat
command automatically available, and SAT
+must be installed as a separate product.
Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.
+The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes manager nodes
+(ncn-m
nodes).
It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the
+HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are
+similarities between SAT commands and xt
commands used on the Cray XC platform.
The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.
+The sat
command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on
+Kubernetes manager nodes. A few important points about the SAT container environment include the following:
sat
or sat bash
always launches a container.There are two ways to run sat.
+sat bash
, followed by a sat
command.sat
command directly on a Kubernetes manager node.In both of these cases, a container is launched in the background to execute the command. The first option, running
+sat bash
first, gives an interactive shell, at which point sat
commands can be run. In the second option, the
+container is launched, executes the command, and upon the command’s completion the container exits. The following two
+examples show the same action, checking the system status, using interactive and non-interactive modes.
ncn-m001# sat bash
+(CONTAINER-ID)sat-container# sat status
+
ncn-m001# sat status
+
Running sat
using the interactive command prompt gives the ability to read and write local files on ephemeral
+container storage. If multiple sat
commands are being run in succession, then use sat bash to launch the
+container beforehand. This will save time because the container does not need to be launched for each sat
command.
The non-interactive mode is useful if calling sat
with a script, or when running a single sat
command as a part of
+several steps that need to be executed from a management NCN.
To view a sat
man page from a Kubernetes manager node, use sat-man
on the manager node as shown in the following
+example.
ncn-m001# sat-man status
+
A man page describing the SAT container environment is available on the Kubernetes manager nodes, which can be viewed
+either with man sat
or man sat-podman
from the manager node.
ncn-m001# man sat
+
ncn-m001# man sat-podman
+
Some SAT subcommands make requests to the Shasta services through the API
+gateway and thus require authentication to the API gateway in order to function.
+Other SAT subcommands use the Kubernetes API. Some sat
commands require S3 to
+be configured. In order to use the SAT S3 bucket, the System Administrator must
+generate the S3 access key and secret keys and write them to a local file. This
+must be done on every Kubernetes manager node where SAT commands are run.
For more information on authentication requests, see System Security and +Authentication in the Cray System Management +Documentation. The following is a table +describing SAT commands and the types of authentication they require.
+SAT Subcommand | +Authentication/Credentials Required | +Man Page | +Description | +
---|---|---|---|
sat auth |
+Responsible for authenticating to the API gateway and storing a token. | +sat-auth |
+Authenticate to the API gateway and save the token. | +
sat bmccreds |
+Requires authentication to the API gateway. | +sat-bmccreds |
+Set BMC passwords. | +
sat bootprep |
+Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install. |
+sat-bootprep |
+Prepare to boot nodes with images and configurations. | +
sat bootsys |
+Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. |
+sat-bootsys |
+Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. | +
sat diag |
+Requires authentication to the API gateway. | +sat-diag |
+Launch diagnostics on the HSN switches and generate a report. | +
sat firmware |
+Requires authentication to the API gateway. | +sat-firmware |
+Report firmware version. | +
sat hwhist |
+Requires authentication to the API gateway. | +sat-hwhist |
+Report hardware component history. | +
sat hwinv |
+Requires authentication to the API gateway. | +sat-hwinv |
+Give a listing of the hardware of the HPE Cray EX system. | +
sat hwmatch |
+Requires authentication to the API gateway. | +sat-hwmatch |
+Report hardware mismatches. | +
sat init |
+None | +sat-init |
+Create a default SAT configuration file. | +
sat jobstat |
+Requires authentication to the API gateway. | +sat-jobstat |
+Check the status of jobs and applications. | +
sat k8s |
+Requires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install. |
+sat-k8s |
+Report on Kubernetes replica sets that have co-located (on the same node) replicas. | +
sat linkhealth |
+This command has been deprecated. | ++ | + |
sat nid2xname |
+Requires authentication to the API gateway. | +sat-nid2xname |
+Translate node IDs to node XNames. | +
sat sensors |
+Requires authentication to the API gateway. | +sat-sensors |
+Report current sensor data. | +
sat setrev |
+Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-setrev |
+Set HPE Cray EX system revision information. | +
sat showrev |
+Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-showrev |
+Print revision information for the HPE Cray EX system. | +
sat slscheck |
+Requires authentication to the API gateway. | +sat-slscheck |
+Perform a cross-check between SLS and HSM. | +
sat status |
+Requires authentication to the API gateway. | +sat-status |
+Report node status across the HPE Cray EX system. | +
sat swap |
+Requires authentication to the API gateway. | +sat-swap |
+Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. | +
sat xname2nid |
+Requires authentication to the API gateway. | +sat-xname2nid |
+Translate node and node BMC XNames to node IDs. | +
sat switch |
+This command has been deprecated. It has been replaced by sat swap . |
++ | + |
In order to authenticate to the API gateway, you must run the sat auth
+command. This command will prompt for a password on the command line. The
+username value is obtained from the following locations, in order of higher
+precedence to lower precedence:
--username
global command-line option.username
option in the api_gateway
section of the config file at
+~/.config/sat/sat.toml
.sat
command.If credentials are entered correctly when prompted by sat auth
, a token file
+will be obtained and saved to ~/.config/sat/tokens
. Subsequent sat commands
+will determine the username the same way as sat auth
described above and will
+use the token for that username if it has been obtained and saved by sat auth
.
The host name in a command prompt indicates where the command must be run. The account that must run the command is +also indicated in the prompt.
+root
or super-user account always has the #
character at the end of the prompt and has the host name of the
+host in the prompt.root
account is indicated with account@hostname>. A user account that is neither root
nor crayadm
is
+referred to as user
.Command Prompt | +Meaning | +
---|---|
ncn-m001# |
+Run on one of the Kubernetes Manager servers. (Non-interactive) | +
(CONTAINER_ID) sat-container# |
+Run the command inside the SAT container environment by first running sat bash . (Interactive) |
+
Here are examples of the sat status
command used by an administrator.
ncn-m001# sat status
+
ncn-m001# sat bash
+(CONTAINER_ID) sat-container# sat status
+
In CSM 1.3 and newer, the sat
command is automatically available on all the Kubernetes NCNs, but it is still possible
+to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the
+sat
command available in CSM. Installing the SAT product stream allows additional supporting components to be added:
An entry for SAT in the cray-product-catalog
Kubernetes ConfigMap is only created by installing the SAT product
+stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev
.
The sat-install-utility
container image is only available with the full SAT product stream. This container image
+provides uninstall and downgrade functionality when used with the prodmgr
command. (In SAT 2.3 and older, SAT was
+only available to install as a separate product stream. Because these versions were packaged with
+sat-install-utility
, it is still possible to uninstall these versions of SAT.)
The docs-sat
RPM package is only available with the full SAT product stream.
The sat-config-management
git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is
+only available with the full SAT product stream.
If the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS
+configurations that apply to management NCNs (for example, management-23.5.0
) should not include a SAT layer.
The SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the
+Keycloak username that authenticates to the API gateway cannot be read by users other than root
. Specifically, it
+it does the following:
Modifies the sat.toml
configuration file which contains the username so that it is only readable by root
.
Modifies the /root/.config/sat/tokens
directory so that the directory is only readable by root
. This is needed
+because the names of the files within the tokens
directory contain the username.
Regardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other +users. These permission changes only apply to files created by previous installations of SAT. In the current version of +SAT all files and directories are created with the appropriate permissions.
+Most sat
subcommands depend on services or components from other products in the
+HPE Cray EX (Shasta) software stack. The following list shows these dependencies
+for each subcommand. Each service or component is listed under the product it belongs to.
sat auth
sat bmccreds
sat bootprep
sat bootsys
sat diag
sat firmware
sat hwhist
sat hwinv
sat hwmatch
sat init
None
+sat jobstat
sat k8s
sat nid2xname
sat sensors
sat setrev
sat showrev
sat slscheck
sat status
sat swap
sat switch
Deprecated: See sat swap
sat xname2nid
SAT 2.2.16 was released on February 25th, 2022.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptsat-cfs-install
container image and Helm chartIt also added the following new components:
+sat-install-utility
container imagecfs-config-util
container imageThe following sections detail the changes in this release.
+sat
Command Unavailable in sat bash
ShellAfter launching a shell within the SAT container with sat bash
, the sat
+command will not be found. For example:
(CONTAINER-ID) sat-container:~ # sat status
+bash: sat: command not found
+
This can be resolved temporarily in one of two ways. /sat/venv/bin/
may be
+prepended to the $PATH
environment variable:
(CONTAINER-ID) sat-container:~ # export PATH=/sat/venv/bin:$PATH
+(CONTAINER-ID) sat-container:~ # sat status
+
Or, the file /sat/venv/bin/activate
may be sourced:
(CONTAINER-ID) sat-container:~ # source /sat/venv/bin/activate
+(CONTAINER-ID) sat-container:~ # sat status
+
sat bash
ShellAfter launching a shell within the SAT container with sat bash
, tab completion
+for sat
commands does not work.
This can be resolved temporarily by sourcing the file
+/etc/bash_completion.d/sat-completion.bash
:
source /etc/bash_completion.d/sat-completion.bash
+
sat
in Root Directorysat
commands will not work if the current directory is /
. For example:
ncn-m001:/ # sat --help
+Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+
To resolve, run sat
in another directory.
sat
in Config Directorysat
commands will not work if the current directory is ~/.config/sat
.
+For example:
ncn-m001:~/.config/sat # sat --help
+Error: /root/.config/sat: duplicate mount destination
+
To resolve, run sat
in another directory.
sat
Commandssat bootprep
automates the creation of CFS configurations, the build and
+customization of IMS images, and the creation of BOS session templates. For
+more information, see SAT Bootprep.sat slscheck
performs a check for consistency between the System Layout
+Service (SLS) and the Hardware State Manager (HSM).sat bmccreds
provides a simple interface for interacting with the System
+Configuration Service (SCSD) to set BMC Redfish credentials.sat hwhist
displays hardware component history by XName (location) or by
+its Field-Replaceable Unit ID (FRUID). This command queries the Hardware
+State Manager (HSM) API to obtain this information. Since the sat hwhist
+command supports querying for the history of a component by its FRUID, the
+FRUID of components has been added to the output of sat hwinv
.The following automation has been added to the install script, install.sh
:
sat-config-import
Kubernetes job, which is
+started when the sat-cfs-install
Helm chart is deployed.ncn-personalization
).The SAT product uploads additional information to the cray-product-catalog
+Kubernetes ConfigMap detailing the components it provides, including container
+(Docker) images, Helm charts, RPMs, and package repositories.
This information is used to support uninstall and downgrade of SAT product +versions moving forward.
+Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and downgrade of the SAT product stream.
+For more information, see +Uninstall: Remove a Version of SAT and +Downgrade: Switch Between SAT Versions.
+sat status
A Subrole
column has been added to the output of sat status
. This allows you
+to easily differentiate between master, worker, and storage nodes in the
+management role, for example.
Hostname information from SLS has been added to sat status
output.
Support for JSON-formatted output has been added to commands which currently
+support the --format
option, such as hwinv
, status
, and showrev
.
Many usability improvements have been made to multiple sat
commands,
+mostly related to filtering command output. The following are some highlights:
--fields
option to display only specific fields for subcommands which
+display tabular reports.--filter
queries
+so that the first match is used, similar to --sort-by
.--filter
, --fields
, and --reverse
for summaries
+displayed by sat hwinv
.sat hwinv
.The default log level for stderr
has been changed from “WARNING” to “INFO”. For
+more information, see Update SAT Logging.
With the command-line options --loglevel-stderr
and --loglevel-file
, the log
+level can now be configured separately for stderr
and the log file.
The existing --loglevel
option is now an alias for the --loglevel-stderr
+option.
The Podman wrapper script is the script installed at /usr/bin/sat
on the
+master management NCNs by the cray-sat-podman
RPM that runs the cray-sat
+container in podman
. The following subsections detail improvements that were
+made to the wrapper script in this release.
cray-sat
ContainerThe Podman wrapper script that launches the cray-sat
container with podman
+has been modified to mount the user’s current directory and home directory into
+the cray-sat
container to provide access to local files in the container.
The man page for the Podman wrapper script, which is accessed by typing man sat
on a master management NCN, has been improved to document the following:
Fixed issues with redirecting stdout
and stderr
, and piping output to
+commands, such as awk
, less
, and more
.
A new sat
option has been added to configure the HTTP timeout length for
+requests to the API gateway. For more information, refer to sat-man sat
.
sat bootsys
ImprovementsMany improvements and fixes have been made to sat bootsys
. The following are
+some highlights:
--excluded-ncns
option, which can be used to omit NCNs
+from the platform-services
and ncn-power
stages in case they are
+inaccessible.sat bootsys shutdown
now prompt the user to
+continue before proceeding. A new option, --disruptive
, will bypass this.platform-services
stage of sat bootsys boot
.sat xname2nid
Improvementssat xname2nid
can now recursively expand slot, chassis, and cabinet XNames to
+a list of NIDs in those locations.
A new --format
option has been added to sat xname2nid
. It sets the output
+format to either “range” (the default) or “NID”. The “range” format displays NIDs
+in a compressed range format suitable for use with a workload manager like Slurm.
v2
HSM APIThe commands which interact with HSM (for example, sat status
and sat hwinv
)
+now use the v2
HSM API.
sat diag
Limited to HSN Switchessat diag
will now only operate against HSN switches by default. These are the
+only controllers that support running diagnostics with HMJTD.
sat showrev
EnhancementsA column has been added to the output of sat showrev
that indicates whether a
+product version is “active”. The definition of “active” varies across products,
+and not all products may set an “active” version.
For SAT, the active version is the one with its hosted-type package repository
+in Nexus set as the member of the group-type package repository in Nexus,
+meaning that it will be used when installing the cray-sat-podman
RPM.
cray-sat
Container Image Size ReductionThe size of the cray-sat
container image has been approximately cut in half by
+leveraging multi-stage builds. This also improved the repeatability of the unit
+tests by running them in the container.
Minor bug fixes were made in cray-sat
and in cray-sat-podman
. For full
+change lists, refer to each repository’s CHANGELOG.md
file.
The 2.3.4 version of the SAT product includes:
+sat
python package and CLIsat-podman
wrapper scriptsat-cfs-install
container imagesat-cfs-install
Helm chartsat-install-utility
container imagecfs-config-util
container imagesat
CommandsNone.
+When running sat
commands, the current working directory is now mounted in the
+container as /sat/share
, and the current working directory within the container
+is also /sat/share
.
Files in the current working directory must be specified using relative paths to
+that directory, because the current working directory is always mounted on
+/sat/share
. Absolute paths should be avoided, and paths that are outside of
+$HOME
or $PWD
are never accessible to the container environment.
The home directory is still mounted on the same path inside the container as it +is on the host.
+sat bootsys
The following options were added to sat bootsys
.
--bos-limit
--recursive
The --bos-limit
option passes a given limit string to a BOS session. The
+--recursive
option specifies a slot or other higher-level component in the
+limit string.
sat bootprep
The --delete-ims-jobs
option was added to sat bootprep run
. It deletes IMS
+jobs after sat bootprep
is run. Jobs are no longer deleted by default.
sat status
sat status
now includes information about nodes’ CFS configuration statuses,
+such as desired configuration, configuration status, and error count.
The output of sat status
now splits different component types into different
+report tables.
The following options were added to sat status
.
--hsm-fields
, --sls-fields
, --cfs-fields
--bos-template
The --hsm-fields
, --sls-fields
, --cfs-fields
options limit the output
+columns according to specified CSM services.
The --bos-template
option filters the status report according to the specified
+session template’s boot sets.
The following components were modified to be compatible with CSM 1.2.
+sat-cfs-install
container image and Helm chartsat-install-utility
container imageThe sat-ncn
Ansible role provided by sat-cfs-install
was modified to enable
+GPG checks on packages while leaving GPG checks disabled on repository metadata.
Updated urllib3
dependency to version 1.26.5 to mitigate CVE-2021-33503 and
+refreshed Python dependency versions.
Minor bug fixes were made in each of the repositories. For full change lists,
+refer to each repository’s CHANGELOG.md
file.
The known issues listed under the SAT 2.2 release +were fixed.
+ + + + + +The 2.4.13 version of the SAT product includes:
+sat
python package and CLI.sat-podman
wrapper script.sat-install-utility
container image.cfs-config-util
container image.Because of installation refactoring efforts, the following two components +are no longer delivered with SAT:
+sat-cfs-install
container imagesat-cfs-install
Helm chartA version of the cray-sat
container image is now included in CSM. For more
+information, see SAT in CSM.
The SAT install.sh
script no longer uses a sat-cfs-install
Helm chart and
+container image to upload its Ansible content to the sat-config-management
+repository in VCS. Instead, it uses Podman to run the cf-gitea-import
container
+directly. Some of the benefits of this change include the following:
cray-sat
container image and cray-sat-podman
packagecray-sat
Container Image and cray-sat-podman
PackageIn older SAT releases, the sat
wrapper script that was provided by the
+cray-sat-podman
package installed on Kubernetes master NCNs included a
+hard-coded version of the cray-sat
container image. As a result, every new
+version of the cray-sat
image required a corresponding new version of the
+cray-sat-podman
package.
In this release, this tight coupling of the cray-sat-podman
package and the
+cray-sat
container image was removed. The sat
wrapper script provided
+by the cray-sat-podman
package now looks for the version of the cray-sat
+container image in the /opt/cray/etc/sat/version
file. This file is populated
+with the correct version of the cray-sat
container image by the SAT layer of
+the CFS configuration that is applied to management NCNs. If the version
file
+does not exist, the wrapper script defaults to the version of the cray-sat
+container image delivered with the latest version of CSM installed on the system.
The steps for performing NCN personalization as part of the SAT installation
+were moved out of the install.sh
script and into a new
+update-mgmt-ncn-cfs-config.sh
script that is provided in the SAT release
+distribution. The new script provides additional flexibility in how it modifies
+the NCN personalization CFS configuration for SAT. It can modify an existing CFS
+configuration by name, a CFS configuration being built in a JSON file, or an
+existing CFS configuration that applies to certain components.
sat bootprep
FeaturesThe following new features were added to the sat bootprep
command:
Variable substitutions using Jinja2 templates in certain fields of the
+sat bootprep
input file
For more information, see +HPC CSM Software Recipe Variable Substitutions +and Dynamic Variable Substitutions.
+Schema version validation in the sat bootprep
input files
For more information, see +Provide a Schema Version.
+Ability to look up images and recipes provided by products
+For more information, see +Define IMS Images.
+The schema of the sat bootprep
input files was also changed to support these
+new features:
base
key instead of under an ims
key. The old ims
+key is deprecated.base.image_ref
.
+You should no longer use the IMS name of the image on which it depends.image.ims.name
, image.ims.id
, or image.image_ref
. Specifying a string
+value directly under the image
key is deprecated.For more information on defining IMS images and BOS session templates in the
+sat bootprep
input file, see Define IMS Images
+and Define BOS Session Templates.
sat swap
The sat swap
command was updated to support swapping compute and UAN blades
+with sat swap blade
. This functionality is described in the following processes
+of the Cray System Management Documentation:
v2
A new v2
version of the Boot Orchestration Service (BOS) is available in CSM
+1.3.0. SAT has added support for BOS v2
. This impacts the following commands
+that interact with BOS:
sat bootprep
sat bootsys
sat status
By default, SAT uses BOS v1
. However, you can choose the BOS version you want
+to use. For more information, see Change the BOS Version.
sat status
When using BOS v2
, sat status
outputs additional fields. These fields show
+the most recent BOS session, session template, booted image, and boot status for
+each node. An additional --bos-fields
option was added to limit the output of
+sat status
to these fields. The fields are not displayed when using BOS v1
.
This is the first release of SAT built from open source code repositories. +As a result, build infrastructure was changed to use an external Jenkins instance, +and artifacts are now published to an external Artifactory instance. These +changes should not impact the functionality of the SAT product in any way.
+paramiko
Python package version was updated from 2.9.2 to 2.10.1 to
+mitigate CVE-2022-24302.oauthlib
Python package version was updated from 3.2.0 to 3.2.1 to
+mitigate CVE-2022-36087.SAT stores information used to authenticate to the API gateway with Keycloak.
+Token files are stored in the ~/.config/sat/tokens/
directory. Those files
+have always had permissions appropriately set to restrict them to be readable
+only by the user.
Keycloak usernames used to authenticate to the API gateway are stored in the
+SAT config file at /.config/sat/sat.toml
. Keycloak usernames are also used in
+the file names of tokens stored in /.config/sat/tokens
. As an additional
+security measure, SAT now restricts the permissions of the SAT config file
+to be readable and writable only by the user. It also restricts the tokens
+directory and the entire SAT config directory ~/.config/sat
to be accessible
+only by the user. This prevents other users on the system from viewing
+Keycloak usernames used to authenticate to the API gateway.
sat init
did not print a message confirming a new
+configuration file was created.sat showrev
exited with a traceback if the file
+/opt/cray/etc/site_info.yaml
existed but was empty. This could occur if the
+user exited sat setrev
with Ctrl-C
.sat bootsys
man page, and added a
+description of the command stages.The 2.5.22 version of the SAT product includes:
+sat
python package and CLI.sat-podman
wrapper script.sat-install-utility
container image.cfs-config-util
container image.sat
Commandssat jobstat
allows you to access application and job data through the command
+line. It provides a table summarizing information for all jobs on the system.
sat bootprep
A list-vars
subcommand was added to sat bootprep
.
It lists the variables available for use in bootprep input files at runtime.
+A --limit
option was added to sat bootprep run
.
It restricts the creation of CFS configurations, IMS images, and BOS session +templates into separate stages. For more information, see +Limit SAT Bootprep Run into Stages.
+sat bootprep
now prompts individually for each CFS configuration that
+already exists.
sat bootprep
can now filter images provided by a product by using a prefix.
This is useful when specifying the base of an image in a bootprep input +file. For more information, see +Define IMS Images.
+To support product names with hyphens, sat bootprep
now converts hyphens to
+underscores within variables.
For more information, see +Hyphens in HPC CSM Software Recipe Variables.
+In sat bootprep
input files, you can now render the value of the playbook
+property of CFS configuration layers with Jinja2 templates.
For more information, see +Values Supporting Jinja2 Template Rendering.
+Output was added to sat bootprep run
that summarizes the CFS configurations,
+IMS images, and BOS session templates created.
For more information, see +Summary of SAT Bootprep Results.
+Improvements were made to the sat bootprep
output when CFS configuration
+and BOS session templates are created.
sat bootsys
reboot
subcommand was added to sat bootsys
. It uses BOS to reboot
+nodes in the bos-operations
stage.--staged-session
option was added to sat bootsys
. It can be used to
+create staged BOS sessions. For more information, refer to Staging Changes
+with BOS in the Cray System Management Documentation.sat
Commandsprodmgr
, a version is no longer set as
+“active” in the product catalog. The “active” field was also removed from the
+output of sat showrev
.sat status
when using BOS
+version two.The new Install and Upgrade Framework (IUF) provides commands which install,
+upgrade, and deploy products with the help of sat bootprep
on HPE Cray EX
+systems managed by Cray System Management (CSM). IUF capabilities are described
+in detail in the IUF section
+of the Cray System Management Documentation.
+The initial install and upgrade workflows described in the
+HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM
+(S-8052) detail when and how to use
+IUF with a new release of SAT or any other HPE Cray EX product.
Because IUF now handles NCN personalization, information about this process was +removed from the SAT documentation. Other sections in the documentation were +also revised to support the new Install and Upgrade Framework. For example, the +SAT Installation and SAT Upgrade sections of this +guide now provide details on software and configuration content specific to SAT. +The Cray System Management Documentation +will indicate when these sections should be referred to for detailed information.
+For more information on the relationship between sat bootprep
and IUF, see
+SAT and IUF.
By default, SAT now uses version two of the Boot Orchestration Service (BOS).
+This change to BOS v2
impacts the following commands that interact with BOS:
sat bootprep
sat bootsys
sat status
If needed, you can choose the BOS version you want to use. For more information, +see Change the BOS Version.
+sat
python package and CLI from
+2021.10.8 to 2022.12.7 to resolve CVE-2022-23491.sat-install-utility
container image
+from 2021.5.30 to 2022.12.7 to resolve CVE-2022-23491.sat init
from creating a configuration file in
+the current directory when not prefixed with ./
.sat status
failed with a traceback when using BOS
+version two and reported components whose most recent image did not exist.sat
container could contain a different
+version of kubectl
than the version found in CSM.sat bootprep
and
+sat swap blade
.Shasta v1.3.2 included version 2.4.0 of the sat
python package and CLI.
The following sections detail the changes in this release.
+sat swap
Command for Switch and Cable ReplacementThe sat switch
command which supported operations for replacing a switch has
+been deprecated and replaced with the sat swap
command, which now supports
+replacing a switch OR cable.
The sat swap switch
command is equivalent to sat switch
. The sat switch
+command will be removed in a future release.
sat bootsys
CommandThe sat bootsys
command now has multiple stages for both the boot
and
+shutdown
actions. Please refer to the “System Power On Procedures” and “System
+Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001)
+for more details on using this command in the context of a full system power off
+and power on.
Shasta v1.3 included version 2.2.3 of the sat
python package and CLI.
This version of the sat
CLI contained the following commands:
auth
bootsys
cablecheck
diag
firmware
hwinv
hwmatch
k8s
linkhealth
sensors
setrev
showrev
status
swap
switch
For more information on each of these commands, see the +SAT Command Overview and the table +of commands in the Authenticate SAT Commands +section of this document.
+ + + + + +We released version 2.0.4 of the SAT product in Shasta v1.4.1.
+This version of the SAT product included:
+sat
python package and CLI.sat-podman
wrapper script.The following sections detail the changes in this release.
+Two new commands were added to translate between NIDs and XNames:
+sat nid2xname
sat xname2nid
These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.
+sat swap
where creating the offline port policy failed.sat bootsys shutdown --stage bos-operations
to no longer forcefully
+power off all compute nodes and application nodes using CAPMC when BOS
+sessions complete or time out.sat bootsys boot --stage cabinet-power
.In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.
+This version of the SAT product included the following components:
+sat
python package and CLIIt also added the following new component:
+sat-podman
wrapper scriptThe following sections detail the changes in this release.
+SAT is now packaged and released as an independent product. The product
+deliverable is called a “release distribution”. The release distribution is a
+gzipped tar file containing an install script. This install script loads the
+cray/cray-sat
container image into the Docker registry in Nexus and loads the
+cray-sat-podman
RPM into a package repository in Nexus.
In this release, the cray-sat-podman
package is still installed in the master
+and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in
+Shasta v1.5.
The sat
command now runs in a container under Podman. The sat
executable is
+now installed on all nodes in the Kubernetes management cluster (workers and
+masters). This executable is a wrapper script that starts a SAT container in
+Podman and invokes the sat
Python CLI within that container. The admin can run
+individual sat
commands directly on the master or worker NCNs as before, or
+they can run sat
commands inside the SAT container after using sat bash
to
+enter an interactive shell inside the SAT container.
To view man pages for sat
commands, the user can run sat-man SAT_COMMAND
,
+replacing SAT_COMMAND
with the name of the sat
command. Alternatively,
+the user can enter the sat
container with sat bash
and use the man
command.
sat init
Command and Config File Location ChangeThe default location of the SAT config file has been changed from /etc/sat.toml
+to ~/.config/sat/sat.toml
. A new command, sat init
, has been added that
+initializes a configuration file in the new default directory. This better supports
+individual users on the system who want their own config files.
~/.config/sat
is mounted into the container that runs under Podman, so changes
+are persistent across invocations of the sat
container. If desired, an alternate
+configuration directory can be specified with the SAT_CONFIG_DIR
environment
+variable.
Additionally, if a config file does not yet exist when a user runs a sat
+command, one is generated automatically.
sat hwinv
Additional functionality has been added to sat hwinv
including:
--list-node-enclosure-power-supplies
+option.--list-node-accels
option.
+The count of node accelerators is also included for each node.--list-node-accel-risers
option. The count of node accelerator risers is also
+included for each node.--list-node-hsn-nics
option. The count of HSN NICs is also included for each node.Documentation for these new options has been added to the man page for sat hwinv
.
sat setrev
in S3The sat setrev
and sat showrev
commands now use S3 to store and obtain site
+information, including system name, site name, serial number, install date, and
+system type. Since the information is stored in S3, it will now be consistent
+regardless of the node on which sat
is executed.
As a result of this change, S3 credentials must be configured for SAT. For more +information, see Generate SAT S3 Credentials.
+sat showrev
sat showrev
now shows product information from the cray-product-catalog
+ConfigMap in Kubernetes.
sat showrev
The output from sat showrev
has also been changed in the following ways:
--docker
and --packages
options were considered misleading and have
+been removed.--local
option.sat cablecheck
The sat cablecheck
command has been removed. To verify that the system’s Slingshot
+network is cabled correctly, admins should now use the show cables
command in the
+Slingshot Topology Tool (STT).
sat swap
Command Compatibility with Next-gen Fabric ControllerThe sat swap
command was added in Shasta v1.3.2. This command used the Fabric
+Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the
+Fabric Controller API, so this command has been rewritten to use the new
+backwards-incompatible API. Usage of the command did not change.
sat bootsys
FunctionalityMuch of the functionality added to sat bootsys
in Shasta v1.3.2 was broken
+by changes introduced in Shasta v1.4, which removed the Ansible inventory
+and playbooks.
The functionality in the platform-services
stage of sat bootsys
has been
+re-implemented to use python directly instead of Ansible. This resulted in
+a more robust procedure with better logging to the sat
log file. Failures
+to stop containers on Kubernetes nodes are handled more gracefully, and
+more information about the containers that failed to stop, including how to
+debug the problem, is included.
Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.
+The following improvements were made to the bos-operations
stage
+of sat bootsys
:
--bos-templates
, and a corresponding config-file
+option, bos_templates
, were added, and the --cle-bos-template
and
+--uan-bos-template
options and their corresponding config file options were
+deprecated.The following functionality has been removed from sat bootsys
:
hsn-bringup
stage of sat bootsys boot
has been removed due to removal
+of the underlying Ansible playbook.bgp-check
stage of sat bootys {boot,shutdown}
has been removed. It is
+now a manual procedure.The location of the sat log file has changed from /var/log/cray/sat.log
to
+/var/log/cray/sat/sat.log
. This change simplifies mounting this file into the
+sat container running under Podman.
We released version 2.1.16 of the SAT product in Shasta v1.5.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptIt also added the following new component:
+sat-cfs-install
docker image and helm chartThe following sections detail the changes in this release.
+This release further decouples the installation of the SAT product from the CSM
+product. The cray-sat-podman
RPM is no longer installed in the management
+non-compute node (NCN) image. Instead, the cray-sat-podman
RPM is installed on
+all master management NCNs via an Ansible playbook which is referenced by a
+layer of the CFS configuration that applies to management NCNs. This CFS
+configuration is typically named ncn-personalization
.
The SAT product now includes a Docker image and a Helm chart named
+sat-cfs-install
. The SAT install script, install.sh
, deploys the Helm chart
+with Loftsman. This helm chart deploys a Kubernetes job that imports the
+SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management
.
+This repository is referenced by the layer added to the NCN personalization
+CFS configuration.
All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:
+sat sensors
sat diag
sat linkhealth
The sat sensors
command has been rewritten to use the SMA telemetry API to
+obtain the latest sensor values. The command’s usage has changed slightly, but
+legacy options work as before, so it is backwards compatible. Additionally, new
+commands have been added.
The sat diag
command has been rewritten to use a new service called Fox, which
+is delivered with the CSM-Diags product. The sat diag
command now launches
+diagnostics using the Fox service, which launches the corresponding diagnostic
+programs on controllers using the Hardware Management Job and Task Daemon
+(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start
+diagnostics over Redfish.
The sat linkhealth
command has been removed. Its functionality has been
+replaced by functionality from the Slingshot Topology Tool (STT) in the
+fabric manager pod.
The Redfish username and password command line options and config file options +have been removed. For more information, see +Remove Obsolete Configuration File Sections.
+sat setrev
and sat showrev
sat setrev
now collects the following information from the admin, which is then
+displayed by sat showrev
:
Additional guidance and validation has been added to each field collected by
+sat setrev
. This sets the stage for sdu setup
to stop collecting this
+information and instead collect it from sat showrev
or its S3 bucket.
sat bootsys
The platform-services
stage of the sat bootsys boot
command has been
+improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph
+health in the correct order. The ceph-check
stage has been removed as it is no
+longer needed.
The platform-services
stage of sat bootsys
boot now prompts for confirmation
+of the storage NCN hostnames in addition to the Kubernetes masters and workers.
sat firmware
.cray-sat
container image.sat firmware
command.This procedure can be used to uninstall a version of SAT.
+prodmgr
.prodmgr
command is available.Use sat showrev
to list versions of SAT.
ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+-------------------+-----------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+-------------------+-----------------------+
+| sat | 2.3.3 | - | - |
+| sat | 2.2.10 | - | - |
++--------------+-----------------+-------------------+-----------------------+
+
Use prodmgr
to uninstall a version of SAT.
This command will do three things:
+cray-product-catalog
Kubernetes ConfigMap, so that it will no longer show up
+in the output of sat showrev
.ncn-m001# prodmgr uninstall sat 2.2.10
+Repository sat-2.2.10-sle-15sp2 has been removed.
+Removed Docker image cray/cray-sat:3.9.0
+Removed Docker image cray/sat-cfs-install:1.0.2
+Removed Docker image cray/sat-install-utility:1.4.0
+Deleted sat-2.2.10 from product catalog.
+
This procedure can be used to downgrade the active version of SAT.
+prodmgr
command is available.Use sat showrev
to list versions of SAT.
ncn-m001# sat showrev --products --filter product_name=sat
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------------------+-----------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+--------------------+-----------------------+
+| sat | 2.3.3 | - | - |
+| sat | 2.2.10 | - | - |
++--------------+-----------------+--------------------+-----------------------+
+
Use prodmgr
to switch to a different version of SAT.
This command will do two things:
+2.2.10
+sets the repository sat-2.2.10-sle-15sp2
as the only member of the sat-sle-15sp2
group.management-23.5.0
). Specifically, it will ensure that the layer refers to the version of SAT CFS
+configuration content associated with the version of SAT to which you are switching.ncn-m001# prodmgr activate sat 2.5.15
+Repository sat-2.5.15-sle-15sp4 is now the default in sat-sle-15sp4.
+Updated CFS configurations: [management-23.5.0]
+
Apply the modified CFS configuration to the management NCNs.
+At this point, Nexus package repositories have been modified to set a +particular package repository as active, but the SAT package may not have +been updated on management NCNs.
+To ensure that management NCNs have been updated to use the active SAT +version, follow the Procedure to Apply CFS Configuration.
+Set an environment variable that refers to the name of the CFS configuration +to be applied to the management NCNs.
+ncn-m001# export CFS_CONFIG_NAME="management-23.5.0"
+
Note: Refer to the output from the prodmgr activate
command to find
+the name of the modified CFS configuration. If more than one CFS configuration
+was modified, use the first one.
INFO: Successfully saved CFS configuration "management-23.5.0"
+
Obtain the name of the CFS configuration layer for SAT and save it in an +environment variable:
+ncn-m001# export SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \
+ | jq -r '.layers | map(select(.cloneUrl | contains("sat-config-management.git")))[0].name')
+
Create a CFS session that executes only the SAT layer of the given CFS +configuration.
+The --configuration-limit
option limits the configuration session to run
+only the SAT layer of the configuration.
ncn-m001# cray cfs sessions create --name "sat-session-${CFS_CONFIG_NAME}" --configuration-name \
+ "${CFS_CONFIG_NAME}" --configuration-limit "${SAT_LAYER_NAME}"
+
Monitor the progress of the CFS session.
+Set an environment variable to name of the Ansible container within the pod +for the CFS session:
+ncn-m001# export ANSIBLE_CONTAINER=$(kubectl get pod -n services \
+ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \
+ -o json | jq -r '.items[0].spec.containers | map(select(.name | contains("ansible"))) | .[0].name')
+
Next, get the logs for the Ansible container.
+ncn-m001# kubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \
+ --selector=cfsession=sat-session-${CFS_CONFIG_NAME}
+
Ansible plays, which are run by the CFS session, will install SAT on all the +master management NCNs on the system. A summary of results can be found at +the end of the log output. The following example shows a successful session.
+...
+PLAY RECAP *********************************************************************
+x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+
Note: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.
+Verify that SAT was successfully configured.
+If sat
is configured, the --version
command will indicate which version
+is installed. If sat
is not properly configured, the command will fail.
Note: This version number will differ from the version number of the SAT
+release distribution. This is the semantic version of the sat
Python package,
+which is different from the version number of the overall SAT release distribution.
ncn-m001# sat --version
+sat 3.7.0
+
Note: Upon first running sat
, you may see additional output while the sat
+container image is downloaded. This will occur the first time sat
is run on
+each manager NCN. For example, if you run sat
for the first time on ncn-m001
+and then for the first time on ncn-m002
, you will see this additional output
+both times.
Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
+Getting image source signatures
+Copying blob da64e8df3afc done
+Copying blob 0f36fd81d583 done
+Copying blob 12527cf455ba done
+...
+sat 3.7.0
+
Stop the typescript.
+ncn-m001# exit
+
SAT version x.y.z
is now installed and configured:
The previous procedure is not always necessary because the CFS Batcher service +automatically detects configuration changes and will automatically create new +sessions to apply configuration changes according to certain rules. For more +information on these rules, refer to Configuration Management with +the CFS Batcher in the Cray System Management Documentation.
+The main scenario in which the CFS batcher will not automatically re-apply the +SAT layer is when the commit hash of the sat-config-management git repository +has not changed between SAT versions. The previous procedure ensures the +configuration is re-applied in all cases, and it is harmless if the batcher has +already applied an updated configuration.
+ + + + + +The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM. IUF capabilities are +described in detail in the IUF +section of the +Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.
+This document does not replicate install, upgrade, or deployment procedures +detailed in the Cray System Management +Documentation. This document provides +details regarding software and configuration content specific to SAT which is +needed when installing, upgrading, or deploying a SAT release. The Cray +System Management Documentation will +indicate when sections of this document should be referred to for detailed +information.
+IUF will perform the following tasks for a release of SAT.
+deliver-product
stage:
+update-vcs-config
stage:
+update-cfs-config
stage:
+prepare-images
stage:
+management-nodes-rollout
stage:
+IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF +section of the +Cray System Management Documentation +describes how to use these tools directly if it is desirable to use them +instead of IUF.
+This section describes SAT details that an administrator must be aware of +before running IUF stages. Entries are prefixed with Information if no +administrative action is required or Action if an administrator needs +to perform tasks outside of IUF.
+Information: This stage is only run if a VCS working branch is specified for +SAT. By default, SAT does not create or specify a VCS working branch.
+Information: This stage only applies to the management configuration and +not to the managed configuration.
+Information: This stage only applies to management images and not to +managed images.
+After upgrading SAT with IUF, it is recommended that you complete the following +procedures before using SAT:
+ +...
) in shell output indicate omitted lines.x.y.z
with the version of the SAT product stream
+being upgraded.After upgrading SAT, if using the configuration file from a previous version, there may be
+configuration file sections no longer used in the new version. For example, when upgrading
+from Shasta 1.4 to Shasta 1.5, the [redfish]
configuration file section is no longer used.
+In that case, the following warning may appear upon running sat
commands.
WARNING: Ignoring unknown section 'redfish' in config file.
+
Remove the [redfish]
section from /root/.config/sat/sat.toml
to resolve the warning.
[redfish]
+username = "admin"
+password = "adminpass"
+
Repeat this process for any configuration file sections for which there are “unknown section” warnings.
+As of SAT version 2.2, some command output that was previously printed to stdout
+is now logged to stderr
. These messages are logged at the INFO
level. The
+default logging threshold was changed from WARNING
to INFO
to accommodate
+this logging change. Additionally, some messages previously logged at the INFO
+are now logged at the DEBUG
level.
These changes take effect automatically. However, if the default output threshold
+has been manually set in ~/.config/sat/sat.toml
, it should be changed to ensure
+that important output is shown in the terminal.
In the following example, the stderr
log level, logging.stderr_level
, is set to
+WARNING
, which will exclude INFO
-level logging from terminal output.
ncn-m001:~ # grep -A 3 logging ~/.config/sat/sat.toml
+[logging]
+...
+stderr_level = "WARNING"
+
To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.
+If logging.stderr_level
is commented out, its value will not affect logging
+behavior. However, it may be helpful to set its value to INFO
as a reminder of
+the new default behavior.
The following commands trigger messages that have been changed from stdout
+print calls to INFO
-level (or WARNING
- or ERROR
-level) log messages:
sat bootsys --stage shutdown --stage session-checks
sat sensors
The following commands trigger messages that have been changed from INFO
-level
+log messages to DEBUG
-level log messages:
sat nid2xname
sat xname2nid
sat swap
HPE service representatives use system revision information data to identify +systems in support cases.
+This procedure is not required if SAT was upgraded from 2.1 (Shasta v1.5) +or later. It is required if SAT was upgraded from 2.0 (Shasta v1.4) or +earlier.
+Set System Revision Information.
+Run sat setrev
and follow the prompts to set the following site-specific values:
Tip: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.
+ncn-m001# sat setrev
+--------------------------------------------------------------------------------
+Setting: Serial number
+Purpose: System identification. This will affect how snapshots are
+ identified in the HPE backend services.
+Description: This is the top-level serial number which uniquely identifies
+ the system. It can be requested from an HPE representative.
+Valid values: Alpha-numeric string, 4 - 20 characters.
+Type: <class 'str'>
+Default: None
+Current value: None
+--------------------------------------------------------------------------------
+Please do one of the following to set the value of the above setting:
+ - Input a new value
+ - Press CTRL-C to exit
+...
+
Verify System Revision Information.
+Run sat showrev
and verify the output shown in the “System Revision Information table.”
The following example shows sample table output.
+ncn-m001# sat showrev
+################################################################################
+System Revision Information
+################################################################################
++---------------------+---------------+
+| component | data |
++---------------------+---------------+
+| Company name | HPE |
+| Country code | US |
+| Interconnect | Sling |
+| Product number | R4K98A |
+| Serial number | 12345 |
+| Site name | HPE |
+| Slurm version | slurm 20.02.5 |
+| System description | Test System |
+| System install date | 2021-01-29 |
+| System name | eniac |
+| System type | EX-1C |
++---------------------+---------------+
+################################################################################
+Product Revision Information
+################################################################################
++--------------+-----------------+------------------------------+------------------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+------------------------------+------------------------------+
+| csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
+| sat | 2.0.1 | - | - |
+| sdu | 1.0.8 | - | - |
+| slingshot | 0.8.0 | - | - |
+| sma | 1.4.12 | - | - |
++--------------+-----------------+------------------------------+------------------------------+
+################################################################################
+Local Host Operating System
+################################################################################
++-----------+----------------------+
+| component | version |
++-----------+----------------------+
+| Kernel | 5.3.18-24.15-default |
+| SLES | SLES 15-SP2 |
++-----------+----------------------+
+
By default, SAT uses Boot Orchestration Service (BOS) version two (v2). You can
+select the BOS version to use for individual commands with the --bos-version
+option. For more information on this option, refer to the man page for a specific
+command.
You can also configure the BOS version to use in the SAT config file. Do this
+under the api_version
setting in the bos
section of the config file. If
+the system is using an existing SAT config file from an older version of SAT,
+the bos
section might not exist. In that case, add the bos
section with the
+BOS version desired in the api_version
setting.
Find the SAT config file at ~/.config/sat/sat.toml
, and look for a section
+like this:
[bos]
+api_version = "v2"
+
In this example, SAT is using BOS version "v2"
.
Change the line specifying the api_version
to the BOS version desired (for
+example, "v1"
).
[bos]
+api_version = "v1"
+
If applicable, uncomment the api_version
line.
If the system is using an existing SAT config file from a recent version of
+SAT, the api_version
line might be commented out like this:
[bos]
+# api_version = "v2"
+
If the line is commented out, SAT will still use the default BOS
+version. To ensure a different BOS version is used, uncomment the
+api_version
line by removing #
at the beginning of the line.
The Install and Upgrade Framework (IUF) provides commands which install,
+upgrade, and deploy products on systems managed by CSM with the help of
+sat bootprep
. Outside of IUF, it is uncommon to use sat bootprep
.
+For more information on IUF, see the
+IUF section of
+the Cray System Management Documentation.
+For more information on sat bootprep
, see SAT Bootprep.
Both IUF and sat bootprep
allow variable substitutions into the default HPC
+CSM Software Recipe bootprep input files. The default variables of the HPC
+CSM Software Recipe are available in a product_vars.yaml
file. To override
+the default variables, specify any site variables in a site_vars.yaml
file.
+Variables are sourced from the command line, any variable files directly
+provided, and the HPC CSM Software Recipe files used, in that order.
IUF also has special session variables internal to the iuf
command that
+override any matching entries. Session variables are the set of product and
+version combinations being installed by the current IUF activity, and they are
+found inside IUF’s internal session_vars.yaml
file. For more information on
+IUF and variable substitutions, see the
+IUF section of
+the Cray System Management Documentation.
When using sat bootprep
outside of IUF, you might encounter problems
+substituting variables into the default bootprep input files. Complex variables
+like "{{ working_branch }}"
cannot be completely resolved outside of IUF and
+its internal session variables. Thus, the default product_vars.yaml
file is
+unusable with only the sat bootprep
command when variables like
+"{{ working_branch }}"
are used. To work around this limitation if you are
+substituting complex variables, use the internal IUF session_vars.yaml
file
+with sat bootprep
and the default bootprep input files.
Find the session_vars.yaml
file from the most recent IUF activity on the
+system.
This process is documented in the upgrade prerequisites procedure of the +Cray System Management Documentation. For more information, see steps 1-6 of +Stage 0.3 - Option 2.
+Use the session_vars.yaml
file to substitute variables into the default
+bootprep input files.
ncn-m001# sat bootprep run --vars-file session_vars.yaml
+
The sat bootprep run
command uses information from the bootprep input files
+to create CFS configurations, IMS images, and BOS session templates. To restrict
+this creation into separate stages, use the --limit
option and list whether
+you want to create configurations
, images
, session_templates
, or some
+combination of these. IUF uses the --limit
option in this way to install,
+upgrade, and deploy products on a system in stages. For example, to create only
+CFS configurations, run the following command used by the IUF update-cfs-config
+stage:
ncn-m001# sat bootprep run --limit configurations example-bootprep-input-file.yaml
+INFO: Validating given input file example-bootprep-input-file.yaml
+INFO: Input file successfully validated against schema
+INFO: Creating 3 CFS configurations
+...
+INFO: Skipping creation of IMS images based on value of --limit option.
+INFO: Skipping creation of BOS session templates based on value of --limit option.
+
To create only IMS images and BOS session templates, run the following command
+used by the IUF prepare-images
stage:
ncn-m001# sat bootprep run --limit images --limit session_templates example-bootprep-input-file.yaml
+INFO: Validating given input file example-bootprep-input-file.yaml
+INFO: Input file successfully validated against schema
+INFO: Skipping creation of CFS configurations based on value of --limit option.
+
+
+
+
+
+ SAT provides an automated solution for creating CFS configurations, building
+and configuring images in IMS, and creating BOS session templates. The
+solution is based on a given input file that defines how those configurations,
+images, and session templates should be created. This automated process centers
+around the sat bootprep
command. Man page documentation for sat bootprep
+can be viewed similar to other SAT commands.
ncn-m001# sat-man sat-bootprep
+
The sat bootprep
command helps the Install and Upgrade Framework (IUF)
+install, upgrade, and deploy products on systems managed by CSM. Outside of IUF,
+it is uncommon to use sat bootprep
. For more information on this relationship,
+see SAT and IUF. For more information on IUF, see the
+IUF section of
+the Cray System Management Documentation.
sat bootprep
is used to create CFS configurations, build and
+rename IMS images, and create BOS session templates which tie the
+configurations and images together during a BOS session.
sat bootsys
automates several portions of the boot and shutdown processes,
+including (but not limited to) performing BOS operations (such as creating BOS
+sessions), powering on and off cabinets, and checking the state of the system
+prior to shutdown.
The input file provided to sat bootprep
is a YAML-formatted file containing
+information which CFS, IMS, and BOS use to create configurations, images, and
+BOS session templates respectively. Writing and modifying these input files is
+the main task associated with using sat bootprep
. An input file is composed of
+three main sections, one each for configurations, images, and session templates.
+These sections may be specified in any order, and any of the sections may be
+omitted if desired.
The sat bootprep
input file is validated against a versioned schema
+definition. The input file should specify the version of the schema with which
+it is compatible under a schema_version
key. For example:
---
+schema_version: 1.0.2
+
The current sat bootprep
input file schema version can be viewed with the
+following command:
ncn-m001# sat bootprep view-schema | grep '^version:'
+version: '1.0.2'
+
The sat bootprep run
command validates the schema version specified
+in the input file. The command also makes sure that the schema version
+of the input file is compatible with the schema version understood by the
+current version of sat bootprep
. For more information on schema version
+validation, refer to the schema_version
property description in the bootprep
+input file schema. For more information on viewing the bootprep input file
+schema in either raw form or user-friendly HTML form, see View SAT Bootprep
+Schema.
The default HPC CSM Software Recipe bootprep input files provided by the
+hpc-csm-software-recipe
release distribution already contain the correct
+schema version.
The CFS configurations are defined under a configurations
key. Under this
+key, you can list one or more configurations to create. For each
+configuration, give a name in addition to the list of layers that
+comprise the configuration.
Each layer can be defined by a product name and optionally a version number,
+commit hash, or branch in the product’s configuration repository. If this
+method is used, the layer is created in CFS by looking up relevant configuration
+information (including the configuration repository and commit information) from
+the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be
+supplied. However, if it is absent, the version is assumed to be the latest
+version found in the cray-product-catalog
.
Alternatively, a configuration layer can be defined by explicitly referencing
+the desired configuration repository. You must then specify the intended version
+of the Ansible playbooks by providing a branch name or commit hash with branch
+or commit
.
The following example shows a CFS configuration with two layers. The first +layer is defined in terms of a product name and version, and the second layer +is defined in terms of a Git clone URL and branch:
+---
+configurations:
+- name: example-configuration
+ layers:
+ - name: example-product
+ playbook: example.yml
+ product:
+ name: example
+ version: 1.2.3
+ - name: another-example-product
+ playbook: another-example.yml
+ git:
+ url: "https://vcs.local/vcs/another-example-config-management.git"
+ branch: main
+
When sat bootprep
is run against an input file, a CFS configuration is created
+corresponding to each configuration in the configurations
section. For
+example, the configuration created from an input file with the layers listed
+above might look something like the following:
{
+ "lastUpdated": "2022-02-07T21:47:49Z",
+ "layers": [
+ {
+ "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "example product",
+ "playbook": "example.yml"
+ },
+ {
+ "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "another example product",
+ "playbook": "another-example.yml"
+ }
+ ],
+ "name": "example-configuration"
+}
+
The IMS images are defined under an images
key. Under the images
key, the
+user may define one or more images to be created in a list. Each element of the
+list defines a separate IMS image to be built and/or configured. Images must
+contain a name
key and a base
key.
The name
key defines the name of the resulting IMS image. The base
key
+defines the base image to be configured or the base recipe to be built and
+optionally configured. One of the following keys must be present under the
+base
key:
ims
key to specify an existing image or recipe in IMS.product
key to specify an image or recipe provided by a particular
+version of a product. If a product provides more than one image or recipe,
+a filter string prefix must be specified to select one.image_ref
key to specify another image from the input file
+using its ref_name
.Images may also contain the following keys:
+configuration
key to specify a CFS configuration with which to
+customize the built image. If a configuration is specified, then configuration
+groups must also be specified using the configuration_group_names
key.ref_name
key to specify a unique name that can refer to this image
+within the input file in other images or in session templates. The ref_name
+key allows references to images from the input file that have dynamically
+generated names as described in
+Dynamic Variable Substitutions.description
key to describe the image in the bootprep input file.
+Note that this key is not currently used.Here is an example of an image using an existing IMS recipe as its base. This
+example builds an IMS image from that recipe. It then configures it with
+a CFS configuration named example-compute-config
. The example-compute-config
+CFS configuration can be defined under the configurations
key in the same
+input file, or it can be an existing CFS configuration. Running sat bootprep
+against this input file results in an image named example-compute-image
.
images:
+- name: example-compute-image
+ description: >
+ An example compute node image built from an existing IMS recipe.
+ base:
+ ims:
+ name: example-compute-image-recipe
+ type: recipe
+ configuration: example-compute-config
+ configuration_group_names:
+ - Compute
+
Here is an example showing the definition of two images. The first image is
+built from a recipe provided by the cos
product. The second image uses the
+first image as a base and configures it with a configuration named
+example-compute-config
. The value of the first image’s ref_name
key is used
+in the second image’s base.image_ref
key to specify it as a dependency.
+Running sat bootprep
against this input file results in two images, the
+first named example-cos-image
and the second named example-compute-image
.
images:
+- name: example-cos-image
+ ref_name: example-cos-image
+ description: >
+ An example image built from a recipe provided by the COS product.
+ base:
+ product:
+ name: cos
+ version: 2.3.101
+ type: recipe
+- name: example-compute-image
+ description: >
+ An example image built from a recipe provided by the COS product.
+ base:
+ image_ref: example-cos-image
+ configuration: example-compute-config
+ configuration_group_names:
+ - Compute
+
Here is an example of three IMS images built from the Kubernetes image and the
+Ceph storage image provided by the csm
product. This example uses a filter
+string prefix to select from the multiple images provided by the CSM product.
+The first two IMS images in the example find any image from the specified csm
+product version whose name starts with secure-kubernetes
. The third image in
+the example finds any csm
image whose name starts with secure-storage-ceph
.
+All three images are then configured with a configuration named
+example-management-config
. Running sat bootprep
against this input file
+results in three IMS images named worker-example-csm-image
,
+master-example-csm-image
, and storage-example-csm-image
.
images:
+- name: worker-example-csm-image
+ base:
+ product:
+ name: csm
+ version: 1.4.1
+ type: image
+ filter:
+ prefix: secure-kubernetes
+ configuration: example-management-config
+ configuration_group_names:
+ - Management_Worker
+
+- name: master-example-csm-image
+ base:
+ product:
+ name: csm
+ version: 1.4.1
+ type: image
+ filter:
+ prefix: secure-kubernetes
+ configuration: example-management-config
+ configuration_group_names:
+ - Management_Master
+
+- name: storage-example-csm-image
+ base:
+ product:
+ name: csm
+ version: 1.4.1
+ type: image
+ filter:
+ prefix: secure-storage-ceph
+ configuration: example-management-config
+ configuration_group_names:
+ - Management_Storage
+
The BOS session templates are defined under the session_templates
key. Each
+session template must provide values for the name
, image
, configuration
,
+and bos_parameters
keys. The name
key defines the name of the resulting BOS
+session template. The image
key defines the image to use in the BOS session
+template. One of the following keys must be present under the image
key:
ims
key to specify an existing image or recipe in IMS.image_ref
key to specify another image from the input file
+using its ref_name
.The configuration
key defines the CFS configuration specified
+in the BOS session template.
The bos_parameters
key defines parameters that are passed through directly to
+the BOS session template. The bos_parameters
key should contain a boot_sets
+key, and each boot set in the session template should be specified under
+boot_sets
. Each boot set can contain the following keys, all of
+which are optional:
kernel_parameters
key to specify the parameters passed to the kernel on the command line.network
key to specify the network over which the nodes boot.node_list
key to specify the nodes to add to the boot set.node_roles_groups
key to specify the HSM roles to add to the boot set.node_groups
key to specify the HSM groups to add to the boot set.rootfs_provider
key to specify the root file system provider.rootfs_provider_passthrough
key to specify the parameters to add to the rootfs=
+kernel parameter.As mentioned above, the parameters under bos_parameters
are passed through
+directly to BOS. For more information on the properties of a BOS boot set,
+refer to BOS Session Templates in the Cray
+System Management Documentation.
Here is an example of a BOS session template that refers to an existing IMS +image by name:
+session_templates:
+- name: example-session-template
+ image:
+ ims:
+ name: example-image
+ configuration: example-configuration
+ bos_parameters:
+ boot_sets:
+ example_boot_set:
+ kernel_parameters: ip=dhcp quiet
+ node_roles_groups:
+ - Compute
+ rootfs_provider: cpss3
+ rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+
Here is an example of a BOS session template that refers to an image from the
+input file by its ref_name
. This requires that an image defined in the input
+file specifies example-image
as the value of its ref_name
key.
session_templates:
+- name: example-session-template
+ image:
+ image_ref: example-image
+ configuration: example-configuration
+ bos_parameters:
+ boot_sets:
+ example_boot_set:
+ kernel_parameters: ip=dhcp quiet
+ node_roles_groups:
+ - Compute
+ rootfs_provider: cpss3
+ rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+
The sat bootprep
command takes any variables you provide and substitutes them
+into the input file. Variables are sourced from the command line, any variable
+files directly provided, and the HPC CSM Software Recipe files used, in that
+order. When you provide values through a variable file, sat bootprep
+substitutes the values with Jinja2 template syntax. The HPC CSM Software Recipe
+provides default variables in a product_vars.yaml
variable file. This file
+defines information about each HPC software product included in the recipe.
You will primarily substitute variables into the default HPC CSM Software Recipe
+bootprep input files through IUF. However, variable files can also be given to
+sat bootprep
directly from IUF’s use of the recipe. If you do use variables
+directly with sat bootprep
, you might encounter some limitations. For more
+information on SAT variable limitations, see SAT and IUF.
+For more information on IUF and variable substitutions, see the
+IUF section of
+the Cray System Management Documentation.
You can view a listing of the default HPC CSM Software Recipe variables and
+their values by running sat bootprep list-vars
. For more information on
+options that can be used with the list-vars
subcommand, refer to the man page
+for the sat bootprep
subcommand.
By default, the sat bootprep
command uses the variables from the latest
+installed version of the HPC CSM Software Recipe. However, you can override
+this with the --recipe-version
command line argument to sat bootprep run
.
For example, to explicitly select the 22.11.0
version of the HPC CSM Software
+Recipe default variables, specify --recipe-version 22.11.0
:
ncn-m001# sat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml
+
The entire sat bootprep
input file is not rendered by the Jinja2 template
+engine. Jinja2 template rendering of the input file is performed individually
+for each supported value. The values of the following keys in the bootprep
+input file support rendering as a Jinja2 template and thus support variables:
name
key of each configuration under the configurations
key.layers
key in a
+configuration:
+name
playbook
git.branch
product.version
product.branch
images
key:
+name
base.product.version
configuration
session_templates
key:
+name
configuration
You can use Jinja2 built-in filters in values of any of the keys listed above. +In addition, Python string methods can be called on the string variables.
+Variable names with hyphens are not allowed in Jinja2 expressions because they
+are parsed as an arithmetic expression instead of a single variable. To support
+product names with hyphens, sat bootprep
converts hyphens to underscores in
+all top-level keys of the default HPC CSM Software Recipe variables. It also
+converts any variables sourced from the command line or any variable files
+you provide directly. When referring to a variable with hyphens in the bootprep
+input file, keep this in mind. For example, to refer to the product version
+variable for slingshot-host-software
in the bootprep input file, write
+"{{slingshot_host_software.version}}"
.
The following example bootprep input file shows how a variable of a COS version +can be used in an input file that creates a CFS configuration for computes. +Only one layer is shown for brevity.
+---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ layers:
+ - name: cos-compute-{{cos.working_branch}}
+ playbook: cos-compute.yml
+ product:
+ name: cos
+ version: "{{cos.version}}"
+ branch: "{{cos.working_branch}}"
+
Note: When the value of a key in the bootprep input file is a Jinja2 +expression, it must be quoted to pass YAML syntax checking.
+Jinja2 expressions can also use filters and Python’s built-in string methods to
+manipulate the variable values. For example, suppose only the major and minor
+components of a COS version are to be used in the branch name for the COS
+layer of the CFS configuration. You can use the split
string method to
+achieve this as follows:
---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ layers:
+ - name: cos-compute-{{cos.working_branch}}
+ playbook: cos-compute.yml
+ product:
+ name: cos
+ version: "{{cos.version}}"
+ branch: integration-{{cos.version.split('.')[0]}}-{{cos.version.split('.')[1]}}
+
Additional variables are available besides the default variables provided by +the HPC CSM Software Recipe. (For more information, see HPC CSM Software +Recipe Variable Substitutions.) +These additional variables are dynamic because their values are determined +at run-time based on the context in which they appear. Available dynamic +variables include the following:
+The variable base.name
can be used in the name
of an image under the
+images
key. The value of this variable is the name of the IMS image or
+recipe used as the base of this image.
The variable image.name
can be used in the name
of a session template
+under the session_templates
key. The value of this variable is the name of
+the IMS image used in this session template.
Note: The name of a session template is restricted to 45 characters. Keep
+this in mind when using image.name
in the name of a session template.
These variables reduce the need to duplicate values throughout the sat bootprep
input file and make the following use cases possible:
This section provides an example bootprep input file. It also gives +instructions for obtaining the default bootprep input files delivered +with a release of the HPC CSM Software Recipe.
+The following bootprep input file provides an example of using most of the +features described in previous sections. It is not intended to be a complete +bootprep file for the entire CSM product.
+---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ layers:
+ - name: cos-compute-{{cos.working_branch}}
+ playbook: cos-compute.yml
+ product:
+ name: cos
+ version: "{{cos.version}}"
+ branch: "{{cos.working_branch}}"
+ - name: cpe-pe_deploy-{{cpe.working_branch}}
+ playbook: pe_deploy.yml
+ product:
+ name: cpe
+ version: "{{cpe.version}}"
+ branch: "{{cpe.working_branch}}"
+
+images:
+- name: "{{default.note}}{{base.name}}{{default.suffix}}"
+ ref_name: base_cos_image
+ base:
+ product:
+ name: cos
+ type: recipe
+ version: "{{cos.version}}"
+
+- name: "compute-{{base.name}}"
+ ref_name: compute_image
+ base:
+ image_ref: base_cos_image
+ configuration: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ configuration_group_names:
+ - Compute
+
+session_templates:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ image:
+ image_ref: compute_image
+ configuration: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ bos_parameters:
+ boot_sets:
+ compute:
+ kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+ node_roles_groups:
+ - Compute
+ rootfs_provider_passthrough: "dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0"
+
Default bootprep input files are delivered by the HPC CSM Software Recipe
+product. You can access these files by cloning the hpc-csm-software-recipe
+repository, as described in the Accessing sat bootprep
files process of
+the Cray System Management
+Documentation. Find the
+default input files in the bootprep
directory of the cloned repository:
ncn-m001# ls bootprep/
+
The sat bootprep generate-example
command was not updated for
+recent bootprep schema changes. It is recommended that you instead use the
+default bootprep input files described in Access Default Bootprep Input
+Files. The sat bootprep generate-example
command will be updated in a future release of SAT.
The sat bootprep run
command uses information from the bootprep input file to
+create CFS configurations, IMS images, and BOS session templates. For easy
+reference, the command also includes output summarizing the final creation
+results. The following example shows a sample table output.
ncn-m001# sat bootprep run
+...
+################################################################################
+CFS configurations
+################################################################################
++------------------+
+| name |
++------------------+
+| example-config-1 |
+| example-config-2 |
++------------------+
+################################################################################
+IMS images
+################################################################################
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+| name | preconfigured_image_id | final_image_id | configuration | configuration_group_names |
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+| example-image | c1bcaf00-109d-470f-b665-e7b37dedb62f | a22fb912-22be-449b-a51b-081af2d7aff6 | example-config | Compute |
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+################################################################################
+BOS session templates
+################################################################################
++------------------+----------------+
+| name | configuration |
++------------------+----------------+
+| example-template | example-config |
++------------------+----------------+
+
The contents of the YAML input files used by sat bootprep
must conform to a
+schema which defines the structure of the data. The schema definition is written
+using the JSON Schema format. (Although the format is named “JSON Schema”, the
+schema itself is written in YAML as well.) More information, including introductory
+materials and a formal specification of the JSON Schema metaschema, can be found
+on the JSON Schema website.
To view the exact schema specification, run sat bootprep view-schema
.
ncn-m001# sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft/2020-12/schema"
+...
+title: Bootprep Input File
+description: >
+ A description of the set of CFS configurations to create, the set of IMS
+ images to create and optionally customize with the defined CFS configurations,
+ and the set of BOS session templates to create that reference the defined
+ images and configurations.
+type: object
+additionalProperties: false
+properties:
+ ...
+
The raw schema definition can be difficult to understand without experience
+working with JSON Schema specifications. For this reason, a feature is included
+with sat bootprep
that generates user-friendly HTML documentation for the input
+file schema. This HTML documentation can be browsed with your preferred web
+browser.
Create a documentation tarball using sat bootprep
.
ncn-m001# sat bootprep generate-docs
+INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
+
An alternate output directory can be specified with the --output-dir
+option. The generated tarball is always named bootprep-schema-docs.tar.gz
.
ncn-m001# sat bootprep generate-docs --output-dir /tmp
+INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
+
From another machine, copy the tarball to a local directory.
+another-machine$ scp root@ncn-m001:bootprep-schema-docs.tar.gz .
+
Extract the contents of the tarball and open the contained index.html
.
another-machine$ tar xzvf bootprep-schema-docs.tar.gz
+x bootprep-schema-docs/
+x bootprep-schema-docs/index.html
+x bootprep-schema-docs/schema_doc.css
+x bootprep-schema-docs/schema_doc.min.js
+another-machine$ open bootprep-schema-docs/index.html
+
Some SAT subcommands make requests to the HPE Cray EX services through the API
+gateway and thus require authentication to the API gateway in order to function.
+Other SAT subcommands use the Kubernetes API. Some sat
commands require S3 to
+be configured. In order to use the SAT S3 bucket, the System Administrator must
+generate the S3 access key and secret keys and write them to a local file. This
+must be done on every Kubernetes control plane node where SAT commands are run.
For more information on authentication requests, see System Security and +Authentication in the Cray System Management +Documentation. The following is a table +describing SAT commands and the types of authentication they require.
+SAT Subcommand | +Authentication/Credentials Required | +Man Page | +Description | +
---|---|---|---|
sat auth |
+Responsible for authenticating to the API gateway and storing a token. | +sat-auth |
+Authenticate to the API gateway and save the token. | +
sat bmccreds |
+Requires authentication to the API gateway. | +sat-bmccreds |
+Set BMC passwords. | +
sat bootprep |
+Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is done on ncn-m001 during the install. |
+sat-bootprep |
+Prepare to boot nodes with images and configurations. | +
sat bootsys |
+Requires authentication to the API gateway. Requires Kubernetes configuration and authentication, which is configured on ncn-m001 during the install. Some stages require passwordless SSH to be configured to all other NCNs. Requires S3 to be configured for some stages. |
+sat-bootsys |
+Boot or shutdown the system, including compute nodes, application nodes, and non-compute nodes (NCNs) running the management software. | +
sat diag |
+Requires authentication to the API gateway. | +sat-diag |
+Launch diagnostics on the HSN switches and generate a report. | +
sat firmware |
+Requires authentication to the API gateway. | +sat-firmware |
+Report firmware version. | +
sat hwhist |
+Requires authentication to the API gateway. | +sat-hwhist |
+Report hardware component history. | +
sat hwinv |
+Requires authentication to the API gateway. | +sat-hwinv |
+Give a listing of the hardware of the HPE Cray EX system. | +
sat hwmatch |
+Requires authentication to the API gateway. | +sat-hwmatch |
+Report hardware mismatches. | +
sat init |
+None | +sat-init |
+Create a default SAT configuration file. | +
sat jobstat |
+Requires authentication to the API gateway. | +sat-jobstat |
+Check the status of jobs and applications. | +
sat k8s |
+Requires Kubernetes configuration and authentication, which is automatically configured on ncn-m001 during the install. |
+sat-k8s |
+Report on Kubernetes replica sets that have co-located (on the same node) replicas. | +
sat linkhealth |
+This command has been deprecated. | ++ | + |
sat nid2xname |
+Requires authentication to the API gateway. | +sat-nid2xname |
+Translate node IDs to node XNames. | +
sat sensors |
+Requires authentication to the API gateway. | +sat-sensors |
+Report current sensor data. | +
sat setrev |
+Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-setrev |
+Set HPE Cray EX system revision information. | +
sat showrev |
+Requires API gateway authentication in order to query the Interconnect from HSM. Requires S3 to be configured for site information such as system name, serial number, install date, and site name. | +sat-showrev |
+Print revision information for the HPE Cray EX system. | +
sat slscheck |
+Requires authentication to the API gateway. | +sat-slscheck |
+Perform a cross-check between SLS and HSM. | +
sat status |
+Requires authentication to the API gateway. | +sat-status |
+Report node status across the HPE Cray EX system. | +
sat swap |
+Requires authentication to the API gateway. | +sat-swap |
+Prepare HSN switch or cable for replacement and bring HSN switch or cable into service. | +
sat xname2nid |
+Requires authentication to the API gateway. | +sat-xname2nid |
+Translate node and node BMC XNames to node IDs. | +
sat switch |
+This command has been deprecated. It has been replaced by sat swap . |
++ | + |
In order to authenticate to the API gateway, run the sat auth
+command. This command will prompt for a password on the command line. The
+username value is obtained from the following locations, in order of higher
+precedence to lower precedence:
--username
global command-line option.username
option in the api_gateway
section of the configuration file
+at ~/.config/sat/sat.toml
.sat
command.If credentials are entered correctly when prompted by sat auth
, a token file
+will be obtained and saved to ~/.config/sat/tokens
. Subsequent sat commands
+will determine the username the same way as sat auth
described above and will
+use the token for that username if it has been obtained and saved by sat auth
.
Most sat
subcommands depend on services or components from other products in the
+HPE Cray EX software stack. The following list shows these dependencies for each
+subcommand. Each service or component is listed under the product it belongs to.
sat auth
sat bmccreds
sat bootprep
sat bootsys
sat diag
sat firmware
sat hwhist
sat hwinv
sat hwmatch
sat init
None
+sat jobstat
sat k8s
sat nid2xname
sat sensors
sat setrev
sat showrev
sat slscheck
sat status
sat swap
sat switch
Deprecated: See sat swap
sat xname2nid
The System Admin Toolkit (SAT) is designed to assist administrators with common tasks, such as troubleshooting and +querying information about the HPE Cray EX System and its components, system boot and shutdown, and replacing hardware +components.
+SAT offers a command line utility which uses subcommands. There are similarities between SAT commands and xt
commands
+used on the Cray XC platform. For more information on SAT commands, see SAT Command Overview.
In CSM 1.3 and newer, the sat
command is automatically available on all the
+Kubernetes control plane. For more information, see SAT in CSM. Older
+versions of CSM do not have the sat
command automatically available, and SAT
+must be installed as a separate product.
Describes the SAT Command Line Utility, lists the key commands found in the System Admin Toolkit man pages, and provides +instruction on the SAT Container Environment.
+The primary component of the System Admin Toolkit (SAT) is a command-line utility run from Kubernetes control plane nodes
+(ncn-m
nodes).
It is designed to assist administrators with common tasks, such as troubleshooting and querying information about the
+HPE Cray EX System and its components, system boot and shutdown, and replacing hardware components. There are
+similarities between SAT commands and xt
commands used on the Cray XC platform.
The top-level SAT man page describes the toolkit, documents the global options affecting all subcommands, documents +configuration file options, and references the man page for each subcommand. SAT consists of many subcommands that each +have their own set of options.
+The sat
command-line utility runs in a container using Podman, a daemonless container runtime. SAT runs on
+Kubernetes control plane nodes. A few important points about the SAT container environment include the following:
sat
or sat bash
always launches a container.There are two ways to run sat
.
sat bash
, followed by a sat
command.sat
command directly on a Kubernetes control plane node.In both of these cases, a container is launched in the background to execute the command. The first option, running
+sat bash
first, gives an interactive shell, at which point sat
commands can be run. In the second option, the
+container is launched, executes the command, and upon the command’s completion the container exits. The following two
+examples show the same action, checking the system status, using both modes.
(ncn-m001#
) Here is an example using interactive mode:
sat bash
+
((CONTAINER_ID) sat-container#
) Example sat
command after a container is launched:
sat status
+
(ncn-m001#
) Here is an example using non-interactive mode:
sat status
+
Running sat
using the interactive command prompt gives the ability to read and write local files on ephemeral
+container storage. If multiple sat
commands are being run in succession, use sat bash
to launch the
+container beforehand. This will save time because the container does not need to be launched for each sat
command.
The non-interactive mode is useful if calling sat
with a script, or when running a single sat
command as a part of
+several steps that need to be executed from a management NCN.
To view a sat
man page from a Kubernetes control plane node, use sat-man
on the manager node.
(ncn-m001#
) Here is an example:
sat-man status
+
A man page describing the SAT container environment is available on the Kubernetes control plane nodes, which can be viewed
+either with man sat
or man sat-podman
from the manager node.
(ncn-m001#
) Here are examples:
man sat
+
man sat-podman
+
The host name in a command prompt indicates where the command must be run. The +user account that must run the command is also indicated in the prompt.
+root
or super-user account always has host name in the prompt and the
+#
character at the end of the prompt.account@hostname>
. A non-privileged
+account is referred to as user.#
character at the end of the prompt.Command Prompt | +Meaning | +
---|---|
ncn-m001# |
+Run the command as root on the specific Kubernetes control plane server which has this hostname (ncn-m001 in this example). (Non-interactive) |
+
user@hostname> |
+Run the command as any non-root user on the specified hostname. (Non-interactive) |
+
(venv) user@hostname> |
+Run the command as any non-root user within a Python virtual environment on the specified hostname. (Non-interactive) |
+
(CONTAINER_ID) sat-container# |
+Run the command inside the SAT container environment by first running sat bash . (Interactive) |
+
These command prompts should be inserted into text before the fenced code block +instead of inside of it. This is a change from the documentation of SAT 2.5 and +earlier. Here is an example of the new use of the command prompt:
+(ncn-m001#
) Example first step.
yes >/dev/null
+
In CSM 1.3 and newer, the sat
command is automatically available on the Kubernetes control plane, but it is still possible
+to install SAT as a separate product stream. Any version of SAT installed as a separate product stream overrides the
+sat
command available in CSM. Installing the SAT product stream allows additional supporting components to be added:
An entry for SAT in the cray-product-catalog
Kubernetes ConfigMap is only created by installing the SAT product
+stream. Otherwise, there will be no entry for this version of SAT in the output of sat showrev
.
The sat-install-utility
container image is only available with the full SAT product stream. This container image
+provides uninstall and downgrade functionality when used with the prodmgr
command. (In SAT 2.3 and older, SAT was
+only available to install as a separate product stream. Because these versions were packaged with
+sat-install-utility
, it is still possible to uninstall these versions of SAT.)
The docs-sat
RPM package is only available with the full SAT product stream.
The sat-config-management
git repository in Gitea (VCS) and thus the SAT layer of NCN CFS configuration is
+only available with the full SAT product stream.
If the SAT product stream is not installed, there will be no configuration content for SAT in VCS. Therefore, CFS
+configurations that apply to management NCNs (for example, management-23.5.0
) should not include a SAT layer.
The SAT configuration layer modifies the permissions of files left over from prior installations of SAT, so that the
+Keycloak username that authenticates to the API gateway cannot be read by users other than root
. Specifically,
+it does the following:
Modifies the sat.toml
configuration file which contains the username so that it is only readable by root
.
Modifies the /root/.config/sat/tokens
directory so that the directory is only readable by root
. This is needed
+because the names of the files within the tokens
directory contain the username.
Regardless of the SAT configuration being applied, passwords and the contents of the tokens are never readable by other +users. These permission changes only apply to files created by previous installations of SAT. In the current version of +SAT all files and directories are created with the appropriate permissions.
+ + + + + +View the System Admin Toolkit (SAT) documentation both online and +offline by using the information in this section.
+The SAT documentation can be found online in HTML form at the following link: +SAT Documentation. The navigation pane +on the left of the HTML page orders topics alphabetically. Navigate an +individual topic’s headings by using the Headings icon at the top of the +page, as shown in the following images.
+ + +The documentation can also be viewed online in GitHub by navigating to the
+docs/
subdirectory of the
+docs-sat
repository.
+Navigate an individual topic’s headings with a similar
+Headings icon at the top of the page, as shown in the following images.
The SAT documentation is available offline as markdown, which can be
+viewed with a markdown viewer or with a text editor. The offline
+documentation is available in the docs/
directory of the SAT release
+distribution as well as in RPM package format. The RPM package is
+installed as a part of the Ansible plays launched by the Configuration
+Framework Service (CFS). Its files are installed to /usr/share/doc/sat
.
SAT can optionally be installed and configured on an external system to interact +with CSM over the CAN.
+Most SAT subcommands work by accessing APIs which are reachable via the CAN. +However, certain SAT commands depend on host-based functionality on the +management NCNs and will not work from an external system. This includes the +following:
+platform-services
and ncn-power
stages of sat bootsys
--local
option of sat showrev
Installing SAT on an external system is not an officially supported configuration. +These instructions are provided “as-is” with the hope that they can be useful for +users who desire additional flexibility.
+Certain additional steps may need to be taken to install and configure SAT +depending on the configuration of the external system in use. These additional +steps may include provisioning virtual machines, installing packages, or +configuring TLS certificates, and these steps are outside the scope of this +documentation. This section covers only the steps needed to configure SAT to +use externally-accessible API endpoints exposed by CSM.
+kubectl
, openssh
, git
, and curl
are installed on the external system.(user@hostname>
) Create a Python virtual environment.
SAT_VENV_PATH="$(pwd)/venv"
+python3 -m venv ${SAT_VENV_PATH}
+. ${SAT_VENV_PATH}/bin/activate
+
((venv) user@hostname>
) Clone the SAT source code.
To use SAT version 3.21, this example clones the release/3.21
branch of
+Cray-HPE/sat
.
git clone --branch=release/3.21 https://github.com/Cray-HPE/sat.git
+
Set up the SAT CSM Python dependencies to be installed from their source code.
+SAT CSM Python dependency packages are not currently distributed publicly as
+source packages or binary distributions. They must be installed from
+their source code hosted on GitHub. Also, to install the cray-product-catalog
+Python package, first clone it locally. Use the following steps to
+modify the SAT CSM Python dependencies so they can be installed from their source
+code.
((venv) user@hostname>
) Clone the source code for cray-product-catalog
.
git clone --branch v1.6.0 https://github.com/Cray-HPE/cray-product-catalog
+
((venv) user@hostname>
) In the cray-product-catalog
directory, create a file named .version
+that contains the version of cray-product-catalog
.
echo 1.6.0 > cray-product-catalog/.version
+
((venv) user@hostname>
) Open the “locked” requirements file in a text editor.
vim sat/requirements.lock.txt
+
Update the line containing cray-product-catalog
so that it reflects the
+local path to cray-product-catalog
.
It should read as follows:
+./cray-product-catalog
+
For versions of SAT newer than 3.19, change the line containing csm-api-client
+to read as follows.
csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
+
(Optional) ((venv) user@hostname>
) Confirm that requirements.lock.txt
is modified as expected.
grep -E 'cray-product-catalog|csm-api-client' sat/requirements.lock.txt
+
Example output:
+./cray-product-catalog
+csm-api-client@git+https://github.com/Cray-HPE/python-csm-api-client@release/1.1
+
Note: For versions newer than 3.19, the output will show both
+cray-product-catalog
and csm-api-client
. For version 3.19 and older,
+the output will only show cray-product-catalog
.
((venv) user@hostname>
) Install the modified SAT dependencies.
pip install -r sat/requirements.lock.txt
+
((venv) user@hostname>
) Install the SAT Python package.
pip install ./sat
+
(Optional) ((venv) user@hostname>
) Add the sat
virtual environment to the user’s PATH
environment
+variable.
If a shell other than bash
is in use, replace ~/.bash_profile
with the
+appropriate profile path.
If the virtual environment is not added to the user’s PATH
environment
+variable, then source ${SAT_VENV_PATH}/bin/activate
will need to be run before
+running any SAT commands.
deactivate
+echo export PATH=\"${SAT_VENV_PATH}/bin:${PATH}\" >> ~/.bash_profile
+source ~/.bash_profile
+
(user@hostname>
) Copy the file /etc/kubernetes/admin.conf
from ncn-m001
to ~/.kube/config
+on the external system.
Note that this file contains credentials to authenticate against the Kubernetes +API as the administrative user, so it should be treated as sensitive.
+mkdir -p ~/.kube
+scp ncn-m001:/etc/kubernetes/admin.conf ~/.kube/config\
+
Example output:
+admin.conf 100% 5566 3.0MB/s 00:00
+
(user@hostname>
) Find the CAN IP address on ncn-m001
to determine the
+corresponding kubernetes
hostname.
On CSM 1.2 and newer, query the IP address of the bond0.cmn0
+interface.
ssh ncn-m001 ip addr show bond0.cmn0
+
Example output:
+13: bond0.cmn0@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
+inet 10.102.1.11/24 brd 10.102.1.255 scope global vlan007
+ valid_lft forever preferred_lft forever
+inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
+ valid_lft forever preferred_lft forever
+
On CSM versions prior to 1.2, query the IP address of the vlan007
interface.
ssh ncn-m001 ip addr show vlan007
+
Example output:
+13: vlan007@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+link/ether b8:59:9f:1d:d9:0e brd ff:ff:ff:ff:ff:ff
+inet 10.102.1.10/24 brd 10.102.1.255 scope global vlan007
+ valid_lft forever preferred_lft forever
+inet6 fe80::ba59:9fff:fe1d:d90e/64 scope link
+ valid_lft forever preferred_lft forever
+
(user@hostname>
) Set the IP_ADDRESS
variable to the value found in the
+previous step.
IP_ADDRESS=10.102.1.11
+
(user@hostname>
) Add an entry to /etc/hosts
mapping the IP address to
+the hostname kubernetes
.
echo "${IP_ADDRESS} kubernetes" | sudo tee -a /etc/hosts
+10.102.1.11 kubernetes
+
(user@hostname>
) Modify ~/.kube/config
to set the cluster server address.
The value of the server
key for the kubernetes
cluster under the clusters
+section should be set to https://kubernetes:6443
.
---
+clusters:
+- cluster:
+ certificate-authority-data: REDACTED
+ server: https://kubernetes:6443
+ name: kubernetes
+...
+
(user@hostname>
) Confirm that kubectl
can access the CSM Kubernetes cluster.
kubectl get nodes
+
Example output:
+NAME STATUS ROLES AGE VERSION
+ncn-m001 Ready master 135d v1.19.9
+ncn-m002 Ready master 136d v1.19.9
+ncn-m003 Ready master 136d v1.19.9
+ncn-w001 Ready <none> 136d v1.19.9
+ncn-w002 Ready <none> 136d v1.19.9
+ncn-w003 Ready <none> 136d v1.19.9
+
(user@hostname>
) Use sat init
to create a configuration file for SAT.
sat init
+
Example output:
+INFO: Configuration file "/home/user/.config/sat/sat.toml" generated.
+
(user@hostname>
) Copy the platform CA certificates from the management NCN
+and configure the certificates for use with SAT.
If a shell other than bash
is in use, replace ~/.bash_profile
with the
+appropriate profile path.
scp ncn-m001:/etc/pki/trust/anchors/platform-ca-certs.crt .
+echo export REQUESTS_CA_BUNDLE=\"$(realpath platform-ca-certs.crt)\" >> ~/.bash_profile
+source ~/.bash_profile
+
Edit the SAT configuration file to set the API and S3 hostnames.
+Externally available API endpoints are given domain names in PowerDNS, so the
+endpoints in the configuration file should each be set to the format
+subdomain.system-name.site-domain
. Here system-name
and site-domain
are
+replaced with the values specified during csi config init
, and subdomain
+is the DNS name for the externally available service. For more information,
+refer to Externally Exposed Services in the Cray System Management
+Documentation.
The API gateway has the subdomain api
, and S3 has the subdomain s3
. The
+S3 endpoint runs on port 8080. The following options should be set in the
+SAT configuration file.
[api_gateway]
+host = "api.system-name.site-domain"
+
+[s3]
+endpoint = "http://s3.system-name.site-domain:8080"
+
Edit the SAT configuration file to specify the Keycloak user who will be +accessing the REST API.
+[api_gateway]
+username = "user"
+
(user@hostname>
) Run sat auth
, and enter the password when prompted.
The admin account used to authenticate with sat auth
must be enabled in
+Keycloak and must have its assigned role set to admin.
sat auth
+
Example output:
+Password for user:
+Succeeded!
+
For more information on authentication types and authentication credentials, +see SAT Command Authentication. +For more information on Keycloak accounts and changing Role Mappings, +refer to both Configure Keycloak Account and Create Internal User +Accounts in the Keycloak Shasta Realm in the Cray System Management +Documentation.
+(user@hostname>
) Ensure the files are readable only by the current user.
touch ~/.config/sat/s3_access_key \
+ ~/.config/sat/s3_secret_key
+
chmod 600 ~/.config/sat/s3_access_key \
+ ~/.config/sat/s3_secret_key
+
(user@hostname>
) Write the credentials to local files using kubectl
.
Generate S3 credentials and write them to a local file so the SAT user can
+access S3 storage. In order to use the SAT S3 bucket, the user must generate
+the S3 access key and secret keys and write them to a local file. SAT uses
+S3 storage for several purposes, most importantly to store the site-specific
+information set with sat setrev
.
kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.access_key}' | base64 -d > \
+ ~/.config/sat/s3_access_key
+
kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.secret_key}' | base64 -d > \
+ ~/.config/sat/s3_secret_key
+
IMPORTANT: Starting in CSM 1.6.0, SAT is fully included in CSM. There is no longer a separate SAT +product stream to install. SAT 2.6 releases, which accompanied CSM 1.5, are the last releases of +SAT as a separate product.
+Similarly, the SAT documentation moved to be fully included within the CSM documentation. Starting in +CSM 1.6.0, find information on SAT in the +System Admin Toolkit (SAT) section +of the Cray System Management Documentation.
+The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM. IUF capabilities are +described in detail in the IUF +section of the +Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.
+This document does not replicate install, upgrade, or deployment procedures +detailed in the Cray System Management +Documentation. This document provides +details regarding software and configuration content specific to SAT which is +needed when installing, upgrading, or deploying a SAT release. The Cray +System Management Documentation will +indicate when sections of this document should be referred to for detailed +information.
+IUF will perform the following tasks for a release of SAT.
+deliver-product
stage:
+update-vcs-config
stage:
+update-cfs-config
stage:
+prepare-images
stage:
+management-nodes-rollout
stage:
+IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF +section of the +Cray System Management Documentation +describes how to use these tools directly if it is desirable to use them +instead of IUF.
+This section describes SAT details that an administrator must be aware of +before running IUF stages. Entries are prefixed with Information if no +administrative action is required or Action if an administrator needs +to perform tasks outside of IUF.
+Information: This stage is only run if a VCS working branch is specified for +SAT. By default, SAT does not create or specify a VCS working branch.
+Information: This stage only applies to the management configuration and +not to the managed configuration.
+Information: This stage only applies to management images and not to +managed images.
+After installing SAT with IUF, complete the following SAT configuration +procedures before using SAT:
+...
) in shell output indicate omitted lines.x.y.z
with the version of the SAT product stream
+being installed.To run SAT commands on the manager NCNs, first set up authentication +to the API gateway. For more information on authentication types and +authentication credentials, see SAT Command +Authentication.
+The admin account used to authenticate with sat auth
must be enabled in
+Keycloak and must have its assigned role set to admin. For more information
+on Keycloak accounts and changing Role Mappings, refer to both Configure Keycloak
+Account and Create Internal User Accounts in the Keycloak Shasta Realm in
+the Cray System Management Documentation.
sat
CLI has been installed following the IUF
+section of the
+Cray System Management Documentation.The following is the procedure to globally configure the username used by SAT and +authenticate to the API gateway.
+(ncn-m001#
) Generate a default SAT configuration file if one does not exist.
sat init
+
Example output:
+Configuration file "/root/.config/sat/sat.toml" generated.
+
Note: If the configuration file already exists, it will print out the +following error.
+ERROR: Configuration file "/root/.config/sat/sat.toml" already exists.
+Not generating configuration file.
+
Edit ~/.config/sat/sat.toml
and set the username option in the api_gateway
+section of the configuration file.
username = "crayadmin"
+
(ncn-m001#
) Run sat auth
. Enter the password when prompted.
sat auth
+
Example output:
+Password for crayadmin:
+Succeeded!
+
(ncn-m001#
) Other sat
commands are now authenticated to make requests to the API gateway.
sat status
+
Generate S3 credentials and write them to a local file so the SAT user can access +S3 storage. In order to use the SAT S3 bucket, the System Administrator must +generate the S3 access key and secret keys and write them to a local file. This +must be done on every Kubernetes control plane node where SAT commands are run.
+SAT uses S3 storage for several purposes, most importantly to store the
+site-specific information set with sat setrev
(see Set System Revision
+Information).
(ncn-m001#
) Ensure the files are readable only by root
.
touch /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
chmod 600 /root/.config/sat/s3_access_key \
+ /root/.config/sat/s3_secret_key
+
(ncn-m001#
) Write the credentials to local files using kubectl
.
kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.access_key}' | base64 -d > \
+ /root/.config/sat/s3_access_key
+
kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.secret_key}' | base64 -d > \
+ /root/.config/sat/s3_secret_key
+
Verify the S3 endpoint specified in the SAT configuration file is correct.
+(ncn-m001#
) Get the SAT configuration file’s endpoint value.
Note: If the command’s output is commented out, indicated by an initial #
+character, the SAT configuration will take the default value – "https://rgw-vip.nmn"
.
grep endpoint ~/.config/sat/sat.toml
+
Example output:
+# endpoint = "https://rgw-vip.nmn"
+
(ncn-m001#
) Get the sat-s3-credentials
secret’s endpoint value.
kubectl get secret sat-s3-credentials -o json -o \
+ jsonpath='{.data.s3_endpoint}' | base64 -d | xargs
+
Example output:
+https://rgw-vip.nmn
+
Compare the two endpoint values.
+If the values differ, change the SAT configuration file’s endpoint value to +match the secret’s.
+(ncn-m001#
) Copy SAT configurations to each manager node on the system.
for i in ncn-m002 ncn-m003; do echo $i; ssh ${i} \
+ mkdir -p /root/.config/sat; \
+ scp -pr /root/.config/sat ${i}:/root/.config; done
+
Note: Depending on how many manager nodes are on the system, the list of
+manager nodes may be different. This example assumes three manager nodes, where
+the configuration files must be copied from ncn-m001
to ncn-m002
and
+ncn-m003
. Therefore, the list of hosts above is ncn-m002
and ncn-m003
.
If installing SAT on a multi-tenant system, the tenant name can be configured +at this point. For more information, see Configure multi-tenancy.
+HPE service representatives use system revision information data to identify +systems in support cases.
+(ncn-m001#
) Set System Revision Information.
Run sat setrev
and follow the prompts to set the following site-specific values:
Tip: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.
+sat setrev
+
Example output:
+--------------------------------------------------------------------------------
+Setting: Serial number
+Purpose: System identification. This will affect how snapshots are
+ identified in the HPE backend services.
+Description: This is the top-level serial number which uniquely identifies
+ the system. It can be requested from an HPE representative.
+Valid values: Alpha-numeric string, 4 - 20 characters.
+Type: <class 'str'>
+Default: None
+Current value: None
+--------------------------------------------------------------------------------
+Please do one of the following to set the value of the above setting:
+ - Input a new value
+ - Press CTRL-C to exit
+...
+
Verify System Revision Information.
+(ncn-m001#
) Run sat showrev
and verify the output shown in the “System Revision Information table.”
sat showrev
+
Example table output:
+################################################################################
+System Revision Information
+################################################################################
++---------------------+---------------+
+| component | data |
++---------------------+---------------+
+| Company name | HPE |
+| Country code | US |
+| Interconnect | Sling |
+| Product number | R4K98A |
+| Serial number | 12345 |
+| Site name | HPE |
+| Slurm version | slurm 20.02.5 |
+| System description | Test System |
+| System install date | 2021-01-29 |
+| System name | eniac |
+| System type | EX-1C |
++---------------------+---------------+
+################################################################################
+Product Revision Information
+################################################################################
++--------------+-----------------+------------------------------+------------------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+------------------------------+------------------------------+
+| csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
+| sat | 2.0.1 | - | - |
+| sdu | 1.0.8 | - | - |
+| slingshot | 0.8.0 | - | - |
+| sma | 1.4.12 | - | - |
++--------------+-----------------+------------------------------+------------------------------+
+################################################################################
+Local Host Operating System
+################################################################################
++-----------+----------------------+
+| component | version |
++-----------+----------------------+
+| Kernel | 5.3.18-24.15-default |
+| SLES | SLES 15-SP2 |
++-----------+----------------------+
+
SAT 2.2.16 was released on February 25th, 2022.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptsat-cfs-install
container image and Helm chartIt also added the following new components:
+sat-install-utility
container imagecfs-config-util
container imageThe following sections detail the changes in this release.
+sat
Command Unavailable in sat bash
ShellAfter launching a shell within the SAT container with sat bash
, the sat
+command will not be found.
((CONTAINER_ID) sat-container#
) Here is an example output after running sat status
:
bash: sat: command not found
+
((CONTAINER_ID) sat-container#
) This can be resolved temporarily in one of two ways. /sat/venv/bin/
may be
+prepended to the $PATH
environment variable:
export PATH=/sat/venv/bin:$PATH
+sat status
+
((CONTAINER_ID) sat-container#
) Another option is to source the file /sat/venv/bin/activate
:
source /sat/venv/bin/activate
+sat status
+
sat bash
ShellAfter launching a shell within the SAT container with sat bash
, tab completion
+for sat
commands does not work.
((CONTAINER_ID) sat-container#
) This can be resolved temporarily by sourcing the file
+/etc/bash_completion.d/sat-completion.bash
:
source /etc/bash_completion.d/sat-completion.bash
+
sat
in Root Directorysat
commands will not work if the current directory is /
.
(ncn-m001#
) Here is an example output after running sat --help
:
Error: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: open /dev/console: operation not permitted: OCI runtime permission denied error
+
To resolve, run sat
in another directory.
sat
in Configuration Directorysat
commands will not work if the current directory is ~/.config/sat
.
(ncn-m001#
) Here is an example output after running sat --help
:
Error: /root/.config/sat: duplicate mount destination
+
To resolve, run sat
in another directory.
sat
Commandssat bootprep
automates the creation of CFS configurations, the build and
+customization of IMS images, and the creation of BOS session templates. For
+more information, see SAT Bootprep.sat slscheck
performs a check for consistency between the System Layout
+Service (SLS) and the Hardware State Manager (HSM).sat bmccreds
provides a simple interface for interacting with the System
+Configuration Service (SCSD) to set BMC Redfish credentials.sat hwhist
displays hardware component history by XName (location) or by
+its Field-Replaceable Unit ID (FRUID). This command queries the Hardware
+State Manager (HSM) API to obtain this information. Since the sat hwhist
+command supports querying for the history of a component by its FRUID, the
+FRUID of components has been added to the output of sat hwinv
.The following automation has been added to the install script, install.sh
:
sat-config-import
Kubernetes job, which is
+started when the sat-cfs-install
Helm chart is deployed.ncn-personalization
).The SAT product uploads additional information to the cray-product-catalog
+Kubernetes ConfigMap detailing the components it provides, including container
+(Docker) images, Helm charts, RPMs, and package repositories.
This information is used to support uninstall and downgrade of SAT product +versions moving forward.
+Beginning with the 2.2 release, SAT now provides partial support for the +uninstall and downgrade of the SAT product stream.
+For more information, see +Uninstall: Remove a Version of SAT and +Downgrade: Switch Between SAT Versions.
+sat status
A Subrole
column has been added to the output of sat status
. This allows
+easy differentiation between master, worker, and storage nodes in the
+management role, for example.
Hostname information from SLS has been added to sat status
output.
Support for JSON-formatted output has been added to commands which currently
+support the --format
option, such as hwinv
, status
, and showrev
.
Many usability improvements have been made to multiple sat
commands,
+mostly related to filtering command output. The following are some highlights:
--fields
option to display only specific fields for subcommands which
+display tabular reports.--filter
queries
+so that the first match is used, similar to --sort-by
.--filter
, --fields
, and --reverse
for summaries
+displayed by sat hwinv
.sat hwinv
.The default log level for stderr
has been changed from “WARNING” to “INFO”. For
+more information, see Update SAT Logging.
With the command-line options --loglevel-stderr
and --loglevel-file
, the log
+level can now be configured separately for stderr
and the log file.
The existing --loglevel
option is now an alias for the --loglevel-stderr
+option.
The Podman wrapper script is the script installed at /usr/bin/sat
on the
+master management NCNs by the cray-sat-podman
RPM that runs the cray-sat
+container in podman
. The following subsections detail improvements that were
+made to the wrapper script in this release.
cray-sat
ContainerThe Podman wrapper script that launches the cray-sat
container with podman
+has been modified to mount the user’s current directory and home directory into
+the cray-sat
container to provide access to local files in the container.
The man page for the Podman wrapper script, which is accessed by typing man sat
on a master management NCN, has been improved to document the following:
Fixed issues with redirecting stdout
and stderr
, and piping output to
+commands, such as awk
, less
, and more
.
A new sat
option has been added to configure the HTTP timeout length for
+requests to the API gateway. For more information, refer to sat-man sat
.
sat bootsys
ImprovementsMany improvements and fixes have been made to sat bootsys
. The following are
+some highlights:
--excluded-ncns
option, which can be used to omit NCNs
+from the platform-services
and ncn-power
stages in case they are
+inaccessible.sat bootsys shutdown
now prompt the user to
+continue before proceeding. A new option, --disruptive
, will bypass this.platform-services
stage of sat bootsys boot
.sat xname2nid
Improvementssat xname2nid
can now recursively expand slot, chassis, and cabinet XNames to
+a list of NIDs in those locations.
A new --format
option has been added to sat xname2nid
. It sets the output
+format to either “range” (the default) or “NID”. The “range” format displays NIDs
+in a compressed range format suitable for use with a workload manager like Slurm.
v2
HSM APIThe commands which interact with HSM (for example, sat status
and sat hwinv
)
+now use the v2
HSM API.
sat diag
Limited to HSN Switchessat diag
will now only operate against HSN switches by default. These are the
+only controllers that support running diagnostics with HMJTD.
sat showrev
EnhancementsA column has been added to the output of sat showrev
that indicates whether a
+product version is “active”. The definition of “active” varies across products,
+and not all products may set an “active” version.
For SAT, the active version is the one with its hosted-type package repository
+in Nexus set as the member of the group-type package repository in Nexus,
+meaning that it will be used when installing the cray-sat-podman
RPM.
cray-sat
Container Image Size ReductionThe size of the cray-sat
container image has been approximately cut in half by
+leveraging multi-stage builds. This also improved the repeatability of the unit
+tests by running them in the container.
Minor bug fixes were made in cray-sat
and in cray-sat-podman
. For full
+change lists, refer to each repository’s CHANGELOG.md
file.
The 2.3.4 version of the SAT product includes:
+sat
python package and CLIsat-podman
wrapper scriptsat-cfs-install
container imagesat-cfs-install
Helm chartsat-install-utility
container imagecfs-config-util
container imagesat
CommandsNone.
+When running sat
commands, the current working directory is now mounted in the
+container as /sat/share
, and the current working directory within the container
+is also /sat/share
.
Files in the current working directory must be specified using relative paths to
+that directory, because the current working directory is always mounted on
+/sat/share
. Absolute paths should be avoided, and paths that are outside of
+$HOME
or $PWD
are never accessible to the container environment.
The home directory is still mounted on the same path inside the container as it +is on the host.
+sat bootsys
The following options were added to sat bootsys
.
--bos-limit
--recursive
The --bos-limit
option passes a given limit string to a BOS session. The
+--recursive
option specifies a slot or other higher-level component in the
+limit string.
sat bootprep
The --delete-ims-jobs
option was added to sat bootprep run
. It deletes IMS
+jobs after sat bootprep
is run. Jobs are no longer deleted by default.
sat status
sat status
now includes information about nodes’ CFS configuration statuses,
+such as desired configuration, configuration status, and error count.
The output of sat status
now splits different component types into different
+report tables.
The following options were added to sat status
.
--hsm-fields
, --sls-fields
, --cfs-fields
--bos-template
The --hsm-fields
, --sls-fields
, --cfs-fields
options limit the output
+columns according to specified CSM services.
The --bos-template
option filters the status report according to the specified
+session template’s boot sets.
The following components were modified to be compatible with CSM 1.2.
+sat-cfs-install
container image and Helm chartsat-install-utility
container imageThe sat-ncn
Ansible role provided by sat-cfs-install
was modified to enable
+GPG checks on packages while leaving GPG checks disabled on repository metadata.
Updated urllib3
dependency to version 1.26.5 to mitigate CVE-2021-33503 and
+refreshed Python dependency versions.
Minor bug fixes were made in each of the repositories. For full change lists,
+refer to each repository’s CHANGELOG.md
file.
The known issues listed under the SAT 2.2 release +were fixed.
+ + + + + +The 2.4.13 version of the SAT product includes:
+sat
python package and CLI.sat-podman
wrapper script.sat-install-utility
container image.cfs-config-util
container image.Because of installation refactoring efforts, the following two components +are no longer delivered with SAT:
+sat-cfs-install
container imagesat-cfs-install
Helm chartA version of the cray-sat
container image is now included in CSM. For more
+information, see SAT in CSM.
The SAT install.sh
script no longer uses a sat-cfs-install
Helm chart and
+container image to upload its Ansible content to the sat-config-management
+repository in VCS. Instead, it uses Podman to run the cf-gitea-import
container
+directly. Some of the benefits of this change include the following:
cray-sat
container image and cray-sat-podman
packagecray-sat
Container Image and cray-sat-podman
PackageIn older SAT releases, the sat
wrapper script that was provided by the
+cray-sat-podman
package installed on Kubernetes control plane nodes included a
+hard-coded version of the cray-sat
container image. As a result, every new
+version of the cray-sat
image required a corresponding new version of the
+cray-sat-podman
package.
In this release, this tight coupling of the cray-sat-podman
package and the
+cray-sat
container image was removed. The sat
wrapper script provided
+by the cray-sat-podman
package now looks for the version of the cray-sat
+container image in the /opt/cray/etc/sat/version
file. This file is populated
+with the correct version of the cray-sat
container image by the SAT layer of
+the CFS configuration that is applied to management NCNs. If the version
file
+does not exist, the wrapper script defaults to the version of the cray-sat
+container image delivered with the latest version of CSM installed on the system.
The steps for performing NCN personalization as part of the SAT installation
+were moved out of the install.sh
script and into a new
+update-mgmt-ncn-cfs-config.sh
script that is provided in the SAT release
+distribution. The new script provides additional flexibility in how it modifies
+the NCN personalization CFS configuration for SAT. It can modify an existing CFS
+configuration by name, a CFS configuration being built in a JSON file, or an
+existing CFS configuration that applies to certain components.
sat bootprep
FeaturesThe following new features were added to the sat bootprep
command:
Variable substitutions using Jinja2 templates in certain fields of the
+sat bootprep
input file
For more information, see +HPC CSM Software Recipe Variable Substitutions +and Dynamic Variable Substitutions.
+Schema version validation in the sat bootprep
input files
For more information, see +Provide a Schema Version.
+Ability to look up images and recipes provided by products
+For more information, see +Define IMS Images.
+The schema of the sat bootprep
input files was also changed to support these
+new features:
base
key instead of under an ims
key. The old ims
+key is deprecated.base.image_ref
.
+Going forward, do not use the IMS name of the image on which it depends.image.ims.name
, image.ims.id
, or image.image_ref
. Specifying a string
+value directly under the image
key is deprecated.For more information on defining IMS images and BOS session templates in the
+sat bootprep
input file, see Define IMS Images
+and Define BOS Session Templates.
sat swap
The sat swap
command was updated to support swapping compute and UAN blades
+with sat swap blade
. This functionality is described in the following processes
+of the Cray System Management Documentation:
v2
A new v2
version of the Boot Orchestration Service (BOS) is available in CSM
+1.3.0. SAT has added support for BOS v2
. This impacts the following commands
+that interact with BOS:
sat bootprep
sat bootsys
sat status
By default, SAT uses BOS v1
. To change the default to a different BOS version,
+see Change the BOS Version.
sat status
When using BOS v2
, sat status
outputs additional fields. These fields show
+the most recent BOS session, session template, booted image, and boot status for
+each node. An additional --bos-fields
option was added to limit the output of
+sat status
to these fields. The fields are not displayed when using BOS v1
.
This is the first release of SAT built from open source code repositories. +As a result, build infrastructure was changed to use an external Jenkins instance, +and artifacts are now published to an external Artifactory instance. These +changes should not impact the functionality of the SAT product in any way.
+paramiko
Python package version was updated from 2.9.2 to 2.10.1 to
+mitigate CVE-2022-24302.oauthlib
Python package version was updated from 3.2.0 to 3.2.1 to
+mitigate CVE-2022-36087.SAT stores information used to authenticate to the API gateway with Keycloak.
+Token files are stored in the ~/.config/sat/tokens/
directory. Those files
+have always had permissions appropriately set to restrict them to be readable
+only by the user.
Keycloak usernames used to authenticate to the API gateway are stored in the
+SAT configuration file at /.config/sat/sat.toml
. Keycloak usernames are also
+used in the file names of tokens stored in /.config/sat/tokens
. As an
+additional security measure, SAT now restricts the permissions of the SAT
+configuration file to be readable and writable only by the user. It also
+restricts the tokens directory and the entire SAT configuration directory
+~/.config/sat
to be accessible only by the user. This prevents other users on
+the system from viewing Keycloak usernames used to authenticate to the API
+gateway.
sat init
did not print a message confirming a new
+configuration file was created.sat showrev
exited with a traceback if the file
+/opt/cray/etc/site_info.yaml
existed but was empty. This could occur if the
+user exited sat setrev
with Ctrl-C
.sat bootsys
man page, and added a
+description of the command stages.The 2.5.17 version of the SAT product includes:
+sat
python package and CLI.sat-podman
wrapper script.sat-install-utility
container image.cfs-config-util
container image.sat
Commandssat jobstat
allows access to application and job data through the command
+line. It provides a table summarizing information for all jobs on the system.
sat bootprep
A list-vars
subcommand was added to sat bootprep
.
It lists the variables available for use in bootprep input files at runtime.
+A --limit
option was added to sat bootprep run
.
It restricts the creation of CFS configurations, IMS images, and BOS session +templates into separate stages. For more information, see +Limit SAT Bootprep Run into Stages.
+sat bootprep
now prompts individually for each CFS configuration that
+already exists.
sat bootprep
can now filter images provided by a product by using a prefix.
This is useful when specifying the base of an image in a bootprep input +file. For more information, see +Define IMS Images.
+To support product names with hyphens, sat bootprep
now converts hyphens to
+underscores within variables.
For more information, see +Hyphens in HPC CSM Software Recipe Variables.
+In sat bootprep
input files, the value of the playbook
property of CFS
+configuration layers can now be rendered with Jinja2 templates.
For more information, see +Values Supporting Jinja2 Template Rendering.
+Output was added to sat bootprep run
that summarizes the CFS configurations,
+IMS images, and BOS session templates created.
For more information, see +Summary of SAT Bootprep Results.
+Improvements were made to the sat bootprep
output when CFS configuration
+and BOS session templates are created.
sat bootsys
reboot
subcommand was added to sat bootsys
. It uses BOS to reboot
+nodes in the bos-operations
stage.--staged-session
option was added to sat bootsys
. It can be used to
+create staged BOS sessions. For more information, refer to Staging Changes
+with BOS in the Cray System Management Documentation.sat
Commandsprodmgr
, a version is no longer set as
+“active” in the product catalog. The “active” field was also removed from the
+output of sat showrev
.sat status
when using BOS
+version two.The new Install and Upgrade Framework (IUF) provides commands which install,
+upgrade, and deploy products with the help of sat bootprep
on HPE Cray EX
+systems managed by Cray System Management (CSM). IUF capabilities are described
+in detail in the IUF section
+of the Cray System Management Documentation.
+The initial install and upgrade workflows described in the
+HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM
+(S-8052) detail when and how to use
+IUF with a new release of SAT or any other HPE Cray EX product.
Because IUF now handles NCN personalization, information about this process was +removed from the SAT documentation. Other sections in the documentation were +also revised to support the new Install and Upgrade Framework. For example, the +SAT Installation and SAT Upgrade sections of this +guide now provide details on software and configuration content specific to SAT. +The Cray System Management Documentation +will indicate when these sections should be referred to for detailed information.
+For more information on the relationship between sat bootprep
and IUF, see
+SAT and IUF.
By default, SAT now uses version two of the Boot Orchestration Service (BOS).
+This change to BOS v2
impacts the following commands that interact with BOS:
sat bootprep
sat bootsys
sat status
To change the default to a different BOS version, see +Change the BOS Version.
+sat
python package and CLI from
+2021.10.8 to 2022.12.7 to resolve CVE-2022-23491.sat-install-utility
container image
+from 2021.5.30 to 2022.12.7 to resolve CVE-2022-23491.sat init
from creating a configuration file in
+the current directory when not prefixed with ./
.sat status
failed with a traceback when using BOS
+version two and reported components whose most recent image did not exist.sat
container could contain a different
+version of kubectl
than the version found in CSM.sat bootprep
and
+sat swap blade
.The 2.6.14 version of the SAT product includes:
+sat
python package and CLI.sat-podman
wrapper script.sat-install-utility
container image.sat
CommandsNo new sat
commands were added in SAT 2.6.
sat bootsys
Functionality was added to the platform-services
and cabinet-power
+stages of sat bootsys boot
. This allows SAT to automatically recreate
+Kubernetes CronJobs that may have become stuck during shutdown, boot, or
+reboot.
sat bootsys boot
more reliably determines if the hms-discovery
CronJob
+was scheduled during the cabinet-power
stage.
SAT now uses the BatchV1 Kubernetes API to manipulate CronJobs instead of the +BatchV1Beta1 API.
+sat bootsys
now logs the ID of all BOS sessions when performing BOS
+operations. A warning is logged for any BOS sessions with failed
+components.
Support for the Compute Rolling Upgrade Service (CRUS) has been removed,
+and the sat bootsys
command will no longer interact with CRUS.
The bos-operations
stage of sat bootsys
no longer checks whether BOS
+session templates need any operations to be performed before creating a BOS
+session. BOS instead determines whether the session will need to boot or
+shut down any nodes to reach the desired state.
sat bootprep
Wildcard matching was added for images in sat bootprep
input files. Use
+wildcards similar to how prefix filters were used in older versions of SAT.
+For more information, see Define IMS Images.
Support for multiple architectures was added to sat bootprep
. It is now
+possible to filter base IMS images and recipes from products based on their
+target architecture. This support also allows specifying target architectures
+in boot sets of BOS session templates. For more information, see
+Filter Base Images or Recipes from a Product
+and
+Define BOS Session Templates.
When specifying a base image or recipe from a product, sat bootprep
+can combine multiple image or recipe filters. When specifying multiple
+filters, the unique base image or recipe that satisfies all of the given
+filters is selected. An error occurs if either no images or recipes match the
+given filters or if more than one image or recipe matches the given filters.
In CFS configuration layers, support was added for the new imsRequireDkms
+field under the specialParameters
section. CFS configurations in bootprep
+input files can specify an ims_require_dkms
field in a new, optional
+special_parameters
section for each layer.
The SAT Kibana and Grafana dashboards were moved to the System Monitoring +Application (SMA) beside other dashboards. For more information on how to +view these dashboards going forward, see the HPE Cray EX System Monitoring +Application Administration Guide (S-8029).
+Add the new s3.cert_verify
option to the SAT configuration file to
+control whether certificate verification is performed when accessing S3.
Log messages spanning multiple lines now print the log level on each line +instead of only at the beginning of the message.
+When certificate verification is disabled for CSM API requests, only a single +warning now prints at the beginning of SAT’s invocation instead of for +each request.
+sat swap blade
more reliably determines if the hms-discovery
CronJob was
+scheduled when enabling a blade following a hardware swap.
sat swap blade
will use the BatchV1 Kubernetes API to manipulate CronJobs,
+instead of the BatchV1Beta1 API as previously.
Command prompts in this guide are now inserted into text before the +fenced code block instead of inside of it. This is a change from the +documentation of SAT 2.5 and earlier. In addition, two new command prompts +were added for better clarity. For more information, see +Command Prompt Conventions in SAT.
+SAT 2.6 supports supplying tenant information to CSM services in order to allow +tenant admins to use SAT within their tenant. For more information, see +Configure multi-tenancy.
+Updated the version of cryptography from 36.0.1 to 41.0.0 to resolve +CVE-2023-2650.
+Updated the version of requests from 2.27.1 to 2.31.0 to resolve +CVE-2023-32681.
+Updated the version of curl/libcurl from 7.80.0-r6 to 8.1.2-r0 to address +CVE-2023-27536.
+Improved extreme slowness in the platform-services
stage of
+sat bootsys shutdown
in cases where a large known_hosts
file is used on
+the host where SAT is running.
Fixed a bug that caused the wrong container name to be logged when CFS +configuration sessions failed on newer CSM systems.
+Shasta v1.3.2 included version 2.4.0 of the sat
python package and CLI.
The following sections detail the changes in this release.
+sat swap
Command for Switch and Cable ReplacementThe sat switch
command which supported operations for replacing a switch has
+been deprecated and replaced with the sat swap
command, which now supports
+replacing a switch OR cable.
The sat swap switch
command is equivalent to sat switch
. The sat switch
+command will be removed in a future release.
sat bootsys
CommandThe sat bootsys
command now has multiple stages for both the boot
and
+shutdown
actions. Please refer to the “System Power On Procedures” and “System
+Power Off Procedures” sections of the Cray Shasta Administration Guide (S-8001)
+for more details on using this command in the context of a full system power off
+and power on.
Shasta v1.3 included version 2.2.3 of the sat
python package and CLI.
This version of the sat
CLI contained the following commands:
auth
bootsys
cablecheck
diag
firmware
hwinv
hwmatch
k8s
linkhealth
sensors
setrev
showrev
status
swap
switch
For more information on each of these commands, see the +SAT Command Overview and the table +of commands in the SAT Command Authentication +section of this document.
+ + + + + +We released version 2.0.4 of the SAT product in Shasta v1.4.1.
+This version of the SAT product included:
+sat
python package and CLI.sat-podman
wrapper script.The following sections detail the changes in this release.
+Two new commands were added to translate between NIDs and XNames:
+sat nid2xname
sat xname2nid
These commands perform this translation by making requests to the Hardware +State Manager (HSM) API.
+sat swap
where creating the offline port policy failed.sat bootsys shutdown --stage bos-operations
to no longer forcefully
+power off all compute nodes and application nodes using CAPMC when BOS
+sessions complete or time out.sat bootsys boot --stage cabinet-power
.In Shasta v1.4, SAT became an independent product, which meant we began to +designate a version number for the entire SAT product. We released version +2.0.3 of the SAT product in Shasta v1.4.
+This version of the SAT product included the following components:
+sat
python package and CLIIt also added the following new component:
+sat-podman
wrapper scriptThe following sections detail the changes in this release.
+SAT is now packaged and released as an independent product. The product
+deliverable is called a “release distribution”. The release distribution is a
+gzipped tar file containing an install script. This install script loads the
+cray/cray-sat
container image into the Docker registry in Nexus and loads the
+cray-sat-podman
RPM into a package repository in Nexus.
In this release, the cray-sat-podman
package is still installed in the master
+and worker NCN images provided by CSM. This is changed in SAT 2.1.16 released in
+Shasta v1.5.
The sat
command now runs in a container under Podman. The sat
executable is
+now installed on all nodes in the Kubernetes cluster (workers and
+control plane nodes). This executable is a wrapper script that starts a SAT container in
+Podman and invokes the sat
Python CLI within that container. The admin can run
+individual sat
commands directly on the master or worker NCNs as before, or
+they can run sat
commands inside the SAT container after using sat bash
to
+enter an interactive shell inside the SAT container.
To view man pages for sat
commands, the user can run sat-man SAT_COMMAND
,
+replacing SAT_COMMAND
with the name of the sat
command. Alternatively,
+the user can enter the sat
container with sat bash
and use the man
command.
sat init
Command and Configuration File Location ChangeThe default location of the SAT configuration file has been changed from /etc/sat.toml
+to ~/.config/sat/sat.toml
. A new command, sat init
, has been added that
+initializes a configuration file in the new default directory. This better supports
+individual users on the system who want their own configuration files.
~/.config/sat
is mounted into the container that runs under Podman, so changes
+are persistent across invocations of the sat
container. If desired, an alternate
+configuration directory can be specified with the SAT_CONFIG_DIR
environment
+variable.
Additionally, if a configuration file does not yet exist when a user runs a sat
+command, one is generated automatically.
sat hwinv
Additional functionality has been added to sat hwinv
including:
--list-node-enclosure-power-supplies
+option.--list-node-accels
option.
+The count of node accelerators is also included for each node.--list-node-accel-risers
option. The count of node accelerator risers is also
+included for each node.--list-node-hsn-nics
option. The count of HSN NICs is also included for each node.Documentation for these new options has been added to the man page for sat hwinv
.
sat setrev
in S3The sat setrev
and sat showrev
commands now use S3 to store and obtain site
+information, including system name, site name, serial number, install date, and
+system type. Since the information is stored in S3, it will now be consistent
+regardless of the node on which sat
is executed.
As a result of this change, S3 credentials must be configured for SAT. For more +information, see Generate SAT S3 Credentials.
+sat showrev
sat showrev
now shows product information from the cray-product-catalog
+ConfigMap in Kubernetes.
sat showrev
The output from sat showrev
has also been changed in the following ways:
--docker
and --packages
options were considered misleading and have
+been removed.--local
option.sat cablecheck
The sat cablecheck
command has been removed. To verify that the system’s Slingshot
+network is cabled correctly, admins should now use the show cables
command in the
+Slingshot Topology Tool (STT).
sat swap
Command Compatibility with Next-gen Fabric ControllerThe sat swap
command was added in Shasta v1.3.2. This command used the Fabric
+Controller API. Shasta v1.4 introduced a new Fabric Manager API and removed the
+Fabric Controller API, so this command has been rewritten to use the new
+backwards-incompatible API. Usage of the command did not change.
sat bootsys
FunctionalityMuch of the functionality added to sat bootsys
in Shasta v1.3.2 was broken
+by changes introduced in Shasta v1.4, which removed the Ansible inventory
+and playbooks.
The functionality in the platform-services
stage of sat bootsys
has been
+re-implemented to use python directly instead of Ansible. This resulted in
+a more robust procedure with better logging to the sat
log file. Failures
+to stop containers on Kubernetes nodes are handled more gracefully, and
+more information about the containers that failed to stop, including how to
+debug the problem, is included.
Improvements were made to console logging setup for non-compute nodes +(NCNs) when they are shut down and booted.
+The following improvements were made to the bos-operations
stage
+of sat bootsys
:
--bos-templates
, and a corresponding configuration
+file option, bos_templates
, were added, and the --cle-bos-template
and
+--uan-bos-template
options and their corresponding configuration file
+options were deprecated.The following functionality has been removed from sat bootsys
:
hsn-bringup
stage of sat bootsys boot
has been removed due to removal
+of the underlying Ansible playbook.bgp-check
stage of sat bootys {boot,shutdown}
has been removed. It is
+now a manual procedure.The location of the sat log file has changed from /var/log/cray/sat.log
to
+/var/log/cray/sat/sat.log
. This change simplifies mounting this file into the
+sat container running under Podman.
We released version 2.1.16 of the SAT product in Shasta v1.5.
+This version of the SAT product included:
+sat
python package and CLIsat-podman
wrapper scriptIt also added the following new component:
+sat-cfs-install
docker image and helm chartThe following sections detail the changes in this release.
+This release further decouples the installation of the SAT product from the CSM
+product. The cray-sat-podman
RPM is no longer installed in the management
+non-compute node (NCN) image. Instead, the cray-sat-podman
RPM is installed on
+all master management NCNs via an Ansible playbook which is referenced by a
+layer of the CFS configuration that applies to management NCNs. This CFS
+configuration is typically named ncn-personalization
.
The SAT product now includes a Docker image and a Helm chart named
+sat-cfs-install
. The SAT install script, install.sh
, deploys the Helm chart
+with Loftsman. This helm chart deploys a Kubernetes job that imports the
+SAT Ansible content to a git repository in VCS (Gitea) named sat-config-management
.
+This repository is referenced by the layer added to the NCN personalization
+CFS configuration.
All commands which used to access Redfish directly have either been removed or +modified to use higher-level service APIs. This includes the following commands:
+sat sensors
sat diag
sat linkhealth
The sat sensors
command has been rewritten to use the SMA telemetry API to
+obtain the latest sensor values. The command’s usage has changed slightly, but
+legacy options work as before, so it is backwards compatible. Additionally, new
+commands have been added.
The sat diag
command has been rewritten to use a new service called Fox, which
+is delivered with the CSM-Diags product. The sat diag
command now launches
+diagnostics using the Fox service, which launches the corresponding diagnostic
+programs on controllers using the Hardware Management Job and Task Daemon
+(HMJTD) over Redfish. Essentially, Fox serves as a proxy for us to start
+diagnostics over Redfish.
The sat linkhealth
command has been removed. Its functionality has been
+replaced by functionality from the Slingshot Topology Tool (STT) in the
+fabric manager pod.
The Redfish username and password command line options and configuration file +options have been removed. For more information, see +Remove Obsolete Configuration File Sections.
+sat setrev
and sat showrev
sat setrev
now collects the following information from the admin, which is then
+displayed by sat showrev
:
Additional guidance and validation has been added to each field collected by
+sat setrev
. This sets the stage for sdu setup
to stop collecting this
+information and instead collect it from sat showrev
or its S3 bucket.
sat bootsys
The platform-services
stage of the sat bootsys boot
command has been
+improved to start inactive Ceph services, unfreeze Ceph, and wait for Ceph
+health in the correct order. The ceph-check
stage has been removed as it is no
+longer needed.
The platform-services
stage of sat bootsys
boot now prompts for confirmation
+of the storage NCN hostnames in addition to the Kubernetes control plane and worker nodes.
sat firmware
.cray-sat
container image.sat firmware
command.This procedure can be used to uninstall a version of SAT.
+prodmgr
.prodmgr
command is available.(ncn-m001#
) Use sat showrev
to list versions of SAT.
sat showrev --products --filter product_name=sat
+
Example output:
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+-------------------+-----------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+-------------------+-----------------------+
+| sat | 2.3.3 | - | - |
+| sat | 2.2.10 | - | - |
++--------------+-----------------+-------------------+-----------------------+
+
(ncn-m001#
) Use prodmgr
to uninstall a version of SAT.
This command will do three things:
+cray-product-catalog
Kubernetes ConfigMap, so that it will no longer show up
+in the output of sat showrev
.prodmgr uninstall sat 2.2.10
+
Example output:
+Repository sat-2.2.10-sle-15sp2 has been removed.
+Removed Docker image cray/cray-sat:3.9.0
+Removed Docker image cray/sat-cfs-install:1.0.2
+Removed Docker image cray/sat-install-utility:1.4.0
+Deleted sat-2.2.10 from product catalog.
+
This procedure can be used to downgrade the active version of SAT.
+Note: The prodmgr activate
command is deprecated in SAT 2.6, and the
+ability to switch between SAT versions will be removed in a future release.
prodmgr
command is
+available.(ncn-m001#
) Use sat showrev
to list versions of SAT.
sat showrev --products --filter product_name=sat
+
Example output:
+###############################################################################
+Product Revision Information
+###############################################################################
++--------------+-----------------+--------------------+-----------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+--------------------+-----------------------+
+| sat | 2.3.3 | - | - |
+| sat | 2.2.10 | - | - |
++--------------+-----------------+--------------------+-----------------------+
+
(ncn-m001#
) Use prodmgr
to switch to a different version of SAT.
This command will do two things:
+2.2.10
+sets the repository sat-2.2.10-sle-15sp2
as the only member of the sat-sle-15sp2
group.management-23.5.0
). Specifically, it will ensure that the layer refers to the version of SAT CFS
+configuration content associated with the version of SAT to which the system is switching.prodmgr activate sat 2.5.15
+
Example output:
+Repository sat-2.5.15-sle-15sp4 is now the default in sat-sle-15sp4.
+Updated CFS configurations: [management-23.5.0]
+
Apply the modified CFS configuration to the management NCNs.
+At this point, Nexus package repositories have been modified to set a +particular package repository as active, but the SAT package may not have +been updated on management NCNs.
+To ensure that management NCNs have been updated to use the active SAT +version, follow the Procedure to Apply CFS Configuration.
+(ncn-m001#
) Set an environment variable that refers to the name of the CFS configuration
+to be applied to the management NCNs.
export CFS_CONFIG_NAME="management-23.5.0"
+
Note: Refer to the output from the prodmgr activate
command to find
+the name of the modified CFS configuration. If more than one CFS configuration
+was modified, use the first one.
INFO: Successfully saved CFS configuration "management-23.5.0"
+
(ncn-m001#
) Obtain the name of the CFS configuration layer for SAT and save it in an
+environment variable:
export SAT_LAYER_NAME=$(cray cfs configurations describe $CFS_CONFIG_NAME --format json \
+ | jq -r '.layers | map(select(.cloneUrl | contains("sat-config-management.git")))[0].name')
+
(ncn-m001#
) Create a CFS session that executes only the SAT layer of the given CFS
+configuration.
The --configuration-limit
option limits the configuration session to run
+only the SAT layer of the configuration.
cray cfs sessions create --name "sat-session-${CFS_CONFIG_NAME}" --configuration-name \
+ "${CFS_CONFIG_NAME}" --configuration-limit "${SAT_LAYER_NAME}"
+
Monitor the progress of the CFS session.
+(ncn-m001#
) Set an environment variable to name of the Ansible container within the pod
+for the CFS session:
export ANSIBLE_CONTAINER=$(kubectl get pod -n services \
+ --selector=cfsession=sat-session-${CFS_CONFIG_NAME} -o json \
+ -o json | jq -r '.items[0].spec.containers | map(select(.name | contains("ansible"))) | .[0].name')
+
(ncn-m001#
) Next, get the logs for the Ansible container.
kubectl logs -c $ANSIBLE_CONTAINER --tail 100 -f -n services \
+ --selector=cfsession=sat-session-${CFS_CONFIG_NAME}
+
Ansible plays, which are run by the CFS session, will install SAT on all the +master management NCNs on the system. A summary of results can be found at +the end of the log output.
+(ncn-m001#
) The following example shows a successful session:
...
+PLAY RECAP *********************************************************************
+x3000c0s1b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s3b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+x3000c0s5b0n0 : ok=3 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
+
Note: Ensure that the PLAY RECAPs for each session show successes for all +manager NCNs before proceeding.
+(ncn-m001#
) Verify that SAT was successfully configured.
If sat
is configured, the --version
command will indicate which version
+is installed. If sat
is not properly configured, the command will fail.
Note: This version number will differ from the version number of the SAT
+release distribution. This is the semantic version of the sat
Python package,
+which is different from the version number of the overall SAT release distribution.
sat --version
+
Example output:
+sat 3.7.0
+
Note: Upon first running sat
, there might be additional output while
+the sat
container image is downloaded. This occurs the first time sat
+is run on each manager NCN. For example, when running sat
for the first time
+on ncn-m001
and then for the first time on ncn-m002
, this additional
+output is seen both times.
Trying to pull registry.local/cray/cray-sat:3.7.0-20210514024359_9fed037...
+Getting image source signatures
+Copying blob da64e8df3afc done
+Copying blob 0f36fd81d583 done
+Copying blob 12527cf455ba done
+...
+sat 3.7.0
+
(ncn-m001#
) Stop the typescript.
exit
+
SAT version x.y.z
is now installed and configured:
The previous procedure is not always necessary because the CFS Batcher service +automatically detects configuration changes and will automatically create new +sessions to apply configuration changes according to certain rules. For more +information on these rules, refer to Configuration Management with +the CFS Batcher in the Cray System Management Documentation.
+The main scenario in which the CFS batcher will not automatically re-apply the
+SAT layer is when the commit hash of the sat-config-management
git repository
+has not changed between SAT versions. The previous procedure ensures the
+configuration is re-applied in all cases, and it is harmless if the batcher has
+already applied an updated configuration.
The Install and Upgrade Framework (IUF) provides commands which install, +upgrade, and deploy products on systems managed by CSM. IUF capabilities are +described in detail in the IUF +section of the +Cray System Management Documentation. +The initial install and upgrade workflows described in the +HPE Cray EX System Software Stack Installation and Upgrade Guide for CSM +(S-8052) detail when and how to use +IUF with a new release of SAT or any other HPE Cray EX product.
+This document does not replicate install, upgrade, or deployment procedures +detailed in the Cray System Management +Documentation. This document provides +details regarding software and configuration content specific to SAT which is +needed when installing, upgrading, or deploying a SAT release. The Cray +System Management Documentation will +indicate when sections of this document should be referred to for detailed +information.
+IUF will perform the following tasks for a release of SAT.
+deliver-product
stage:
+update-vcs-config
stage:
+update-cfs-config
stage:
+prepare-images
stage:
+management-nodes-rollout
stage:
+IUF uses a variety of CSM and SAT tools when performing these tasks. The IUF +section of the +Cray System Management Documentation +describes how to use these tools directly if it is desirable to use them +instead of IUF.
+This section describes SAT details that an administrator must be aware of +before running IUF stages. Entries are prefixed with Information if no +administrative action is required or Action if an administrator needs +to perform tasks outside of IUF.
+Information: This stage is only run if a VCS working branch is specified for +SAT. By default, SAT does not create or specify a VCS working branch.
+Information: This stage only applies to the management configuration and +not to the managed configuration.
+Information: This stage only applies to management images and not to +managed images.
+After upgrading SAT with IUF, it is recommended to complete the following +procedures before using SAT:
+ +...
) in shell output indicate omitted lines.x.y.z
with the version of the SAT product stream
+being upgraded.After upgrading SAT, if using the configuration file from a previous version, there may be
+configuration file sections no longer used in the new version. For example, when upgrading
+from Shasta 1.4 to Shasta 1.5, the [redfish]
configuration file section is no longer used.
(ncn-m001#
) In that case, the following warning may appear upon running sat
commands.
WARNING: Ignoring unknown section 'redfish' in config file.
+
Remove the [redfish]
section from /root/.config/sat/sat.toml
to resolve the warning.
[redfish]
+username = "admin"
+password = "adminpass"
+
Repeat this process for any configuration file sections for which there are “unknown section” warnings.
+As of SAT version 2.2, some command output that was previously printed to stdout
+is now logged to stderr
. These messages are logged at the INFO
level. The
+default logging threshold was changed from WARNING
to INFO
to accommodate
+this logging change. Additionally, some messages previously logged at the INFO
+are now logged at the DEBUG
level.
These changes take effect automatically. However, if the default output threshold
+has been manually set in ~/.config/sat/sat.toml
, it should be changed to ensure
+that important output is shown in the terminal.
(ncn-m001#
) In the following example, the stderr
log level, logging.stderr_level
, is set to
+WARNING
, which will exclude INFO
-level logging from terminal output.
grep -A 3 logging ~/.config/sat/sat.toml
+
Example output:
+[logging]
+...
+stderr_level = "WARNING"
+
To enable the new default behavior, comment this line out, delete it, or set +the value to “INFO”.
+If logging.stderr_level
is commented out, its value will not affect logging
+behavior. However, it may be helpful to set its value to INFO
as a reminder of
+the new default behavior.
The following commands trigger messages that have been changed from stdout
+print calls to INFO
-level (or WARNING
- or ERROR
-level) log messages:
sat bootsys --stage shutdown --stage session-checks
sat sensors
The following commands trigger messages that have been changed from INFO
-level
+log messages to DEBUG
-level log messages:
sat nid2xname
sat xname2nid
sat swap
HPE service representatives use system revision information data to identify +systems in support cases.
+This procedure is not required if SAT was upgraded from 2.1 (Shasta v1.5) +or later. It is required if SAT was upgraded from 2.0 (Shasta v1.4) or +earlier.
+Set System Revision Information.
+(ncn-m001#
) Run sat setrev
and follow the prompts to set the following site-specific values:
Tip: For “System type”, a system with any liquid-cooled components should be +considered a liquid-cooled system. In other words, “System type” is EX-1C.
+sat setrev
+
Example output:
+--------------------------------------------------------------------------------
+Setting: Serial number
+Purpose: System identification. This will affect how snapshots are
+ identified in the HPE backend services.
+Description: This is the top-level serial number which uniquely identifies
+ the system. It can be requested from an HPE representative.
+Valid values: Alpha-numeric string, 4 - 20 characters.
+Type: <class 'str'>
+Default: None
+Current value: None
+--------------------------------------------------------------------------------
+Please do one of the following to set the value of the above setting:
+ - Input a new value
+ - Press CTRL-C to exit
+...
+
Verify System Revision Information.
+(ncn-m001#
) Run sat showrev
and verify the output shown in the “System Revision Information table.”
The following example shows sample table output.
+sat showrev
+
Example output:
+################################################################################
+System Revision Information
+################################################################################
++---------------------+---------------+
+| component | data |
++---------------------+---------------+
+| Company name | HPE |
+| Country code | US |
+| Interconnect | Sling |
+| Product number | R4K98A |
+| Serial number | 12345 |
+| Site name | HPE |
+| Slurm version | slurm 20.02.5 |
+| System description | Test System |
+| System install date | 2021-01-29 |
+| System name | eniac |
+| System type | EX-1C |
++---------------------+---------------+
+################################################################################
+Product Revision Information
+################################################################################
++--------------+-----------------+------------------------------+------------------------------+
+| product_name | product_version | images | image_recipes |
++--------------+-----------------+------------------------------+------------------------------+
+| csm | 0.8.14 | cray-shasta-csm-sles15sp1... | cray-shasta-csm-sles15sp1... |
+| sat | 2.0.1 | - | - |
+| sdu | 1.0.8 | - | - |
+| slingshot | 0.8.0 | - | - |
+| sma | 1.4.12 | - | - |
++--------------+-----------------+------------------------------+------------------------------+
+################################################################################
+Local Host Operating System
+################################################################################
++-----------+----------------------+
+| component | version |
++-----------+----------------------+
+| Kernel | 5.3.18-24.15-default |
+| SLES | SLES 15-SP2 |
++-----------+----------------------+
+
By default, SAT uses Boot Orchestration Service (BOS) version two (v2
).
+Select the BOS version to use for individual commands with the --bos-version
+option. For more information on this option, refer to the man page for a specific
+command.
Another way to change the BOS version is by configuring it under the
+api_version
setting in the bos
section of the SAT configuration file.
+If the system is using an existing SAT configuration file from an older
+version of SAT, the bos
section might not exist. In that case, add the bos
+section with the BOS version desired in the api_version
setting.
Find the SAT configuration file at ~/.config/sat/sat.toml
, and look for a
+section like this:
[bos]
+api_version = "v2"
+
In this example, SAT is using BOS version "v2"
.
Change the line specifying the api_version
to the BOS version desired (for
+example, "v1"
).
[bos]
+api_version = "v1"
+
If applicable, uncomment the api_version
line.
If the system is using an existing SAT configuration file from a recent
+version of SAT, the api_version
line might be commented out like this:
[bos]
+# api_version = "v2"
+
If the line is commented out, SAT will still use the default BOS
+version. To ensure a different BOS version is used, uncomment the
+api_version
line by removing #
at the beginning of the line.
SAT supports supplying tenant information to CSM services in order to allow +tenant admins to use SAT within their tenant. By default, the tenant name is +not set, and SAT will not send any tenant information with its requests to +CSM services. Configure the tenant name either in the SAT configuration file +or on the command line.
+Set the tenant name in the SAT configuration file using the
+api_gateway.tenant_name
option.
Here is an example:
+[api_gateway]
+tenant_name = "my_tenant"
+
Set the tenant name for each sat
invocation using the --tenant-name
+option. The --tenant-name
option must be specified before the subcommand
+name.
(ncn-m001#
) Here is an example:
sat --tenant-name=my_tenant status
+
The Install and Upgrade Framework (IUF) provides commands which install,
+upgrade, and deploy products on systems managed by CSM with the help of
+sat bootprep
. Outside of IUF, it is uncommon to use sat bootprep
.
+For more information on IUF, see the
+IUF section of
+the Cray System Management Documentation.
+For more information on sat bootprep
, see SAT Bootprep.
Both IUF and sat bootprep
allow variable substitutions into the default HPC
+CSM Software Recipe bootprep input files. The default variables of the HPC
+CSM Software Recipe are available in a product_vars.yaml
file. To override
+the default variables, specify any site variables in a site_vars.yaml
file.
+Variables are sourced from the command line, any variable files directly
+provided, and the HPC CSM Software Recipe files used, in that order.
IUF also has special session variables internal to the iuf
command that
+override any matching entries. Session variables are the set of product and
+version combinations being installed by the current IUF activity, and they are
+found inside IUF’s internal session_vars.yaml
file. For more information on
+IUF and variable substitutions, see the
+IUF section of
+the Cray System Management Documentation.
When using sat bootprep
outside of IUF, substituting variables into the
+default bootprep input files might cause problems. Complex variables like
+"{{ working_branch }}"
cannot be completely resolved outside of IUF and
+its internal session variables. Thus, the default product_vars.yaml
file is
+unusable with only the sat bootprep
command when variables like
+"{{ working_branch }}"
are used. To work around this limitation when
+substituting complex variables, use the internal IUF session_vars.yaml
file
+with sat bootprep
and the default bootprep input files.
Find the session_vars.yaml
file from the most recent IUF activity on the
+system.
This process is documented in the upgrade prerequisites procedure of the +Cray System Management Documentation. For more information, see steps 1-6 of +Stage 0.3 - Option 2.
+(ncn-m001#
) Use the session_vars.yaml
file to substitute variables into the default
+bootprep input files.
sat bootprep run --vars-file session_vars.yaml
+
The sat bootprep run
command uses information from the bootprep input files
+to create CFS configurations, IMS images, and BOS session templates. To restrict
+this creation into separate stages, use the --limit
option and list whether
+to create configurations
, images
, session_templates
, or some
+combination of these. IUF uses the --limit
option in this way to install,
+upgrade, and deploy products on a system in stages.
(ncn-m001#
) For example, to create only CFS configurations, run the following command used
+by the IUF update-cfs-config
stage:
sat bootprep run --limit configurations example-bootprep-input-file.yaml
+
Example output:
+INFO: Validating given input file example-bootprep-input-file.yaml
+INFO: Input file successfully validated against schema
+INFO: Creating 3 CFS configurations
+...
+INFO: Skipping creation of IMS images based on value of --limit option.
+INFO: Skipping creation of BOS session templates based on value of --limit option.
+
(ncn-m001#
) To create only IMS images and BOS session templates, run the following command
+used by the IUF prepare-images
stage:
sat bootprep run --limit images --limit session_templates example-bootprep-input-file.yaml
+
Example output:
+INFO: Validating given input file example-bootprep-input-file.yaml
+INFO: Input file successfully validated against schema
+INFO: Skipping creation of CFS configurations based on value of --limit option.
+
SAT provides an automated solution for creating CFS configurations, building
+and configuring images in IMS, and creating BOS session templates. The
+solution is based on a given input file that defines how those configurations,
+images, and session templates should be created. This automated process centers
+around the sat bootprep
command. Man page documentation for sat bootprep
+can be viewed similar to other SAT commands.
(ncn-m001#
) Here is an example:
sat-man sat-bootprep
+
The sat bootprep
command helps the Install and Upgrade Framework (IUF)
+install, upgrade, and deploy products on systems managed by CSM. Outside of IUF,
+it is uncommon to use sat bootprep
. For more information on this relationship,
+see SAT and IUF. For more information on IUF, see the
+IUF section of
+the Cray System Management Documentation.
sat bootprep
is used to create CFS configurations, build and
+rename IMS images, and create BOS session templates which tie the
+configurations and images together during a BOS session.
sat bootsys
automates several portions of the boot and shutdown processes,
+including (but not limited to) performing BOS operations (such as creating BOS
+sessions), powering on and off cabinets, and checking the state of the system
+prior to shutdown.
The input file provided to sat bootprep
is a YAML-formatted file containing
+information which CFS, IMS, and BOS use to create configurations, images, and
+BOS session templates respectively. Writing and modifying these input files is
+the main task associated with using sat bootprep
. An input file is composed of
+three main sections, one each for configurations, images, and session templates.
+These sections may be specified in any order, and any of the sections may be
+omitted if desired.
The sat bootprep
input file is validated against a versioned schema
+definition. The input file should specify the version of the schema with which
+it is compatible under a schema_version
key. For example:
---
+schema_version: 1.0.2
+
(ncn-m001#
) The current sat bootprep
input file schema version can be viewed with the
+following command:
sat bootprep view-schema | grep '^version:'
+
Example output:
+version: '1.0.2'
+
The sat bootprep run
command validates the schema version specified
+in the input file. The command also makes sure that the schema version
+of the input file is compatible with the schema version understood by the
+current version of sat bootprep
. For more information on schema version
+validation, refer to the schema_version
property description in the bootprep
+input file schema. For more information on viewing the bootprep input file
+schema in either raw form or user-friendly HTML form, see View SAT Bootprep
+Schema.
The default HPC CSM Software Recipe bootprep input files provided by the
+hpc-csm-software-recipe
release distribution already contain the correct
+schema version.
The CFS configurations are defined under a configurations
key. Under this
+key, list one or more configurations to create. For each
+configuration, give a name in addition to the list of layers that
+comprise the configuration.
Each layer can be defined by a product name and optionally a version number,
+commit hash, or branch in the product’s configuration repository. If this
+method is used, the layer is created in CFS by looking up relevant configuration
+information (including the configuration repository and commit information) from
+the cray-product-catalog Kubernetes ConfigMap as necessary. A version may be
+supplied. However, if it is absent, the version is assumed to be the latest
+version found in the cray-product-catalog
.
Alternatively, a configuration layer can be defined by explicitly referencing
+the desired configuration repository. Specify the intended version
+of the Ansible playbooks by providing a branch name or commit hash with branch
+or commit
.
The following example shows a CFS configuration with two layers. The first +layer is defined in terms of a product name and version, and the second layer +is defined in terms of a Git clone URL and branch:
+---
+configurations:
+- name: example-configuration
+ layers:
+ - name: example-product
+ playbook: example.yml
+ product:
+ name: example
+ version: 1.2.3
+ - name: another-example-product
+ playbook: another-example.yml
+ git:
+ url: "https://vcs.local/vcs/another-example-config-management.git"
+ branch: main
+
When sat bootprep
is run against an input file, a CFS configuration is created
+corresponding to each configuration in the configurations
section. For
+example, the configuration created from an input file with the layers listed
+above might look something like the following:
{
+ "lastUpdated": "2022-02-07T21:47:49Z",
+ "layers": [
+ {
+ "cloneUrl": "https://vcs.local/vcs/example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "example product",
+ "playbook": "example.yml"
+ },
+ {
+ "cloneUrl": "https://vcs.local/vcs/another-example-config-management.git",
+ "commit": "<commit hash>",
+ "name": "another example product",
+ "playbook": "another-example.yml"
+ }
+ ],
+ "name": "example-configuration"
+}
+
The IMS images are defined under an images
key. Under the images
key, the
+user may define one or more images to be created in a list. Each element of the
+list defines a separate IMS image to be built and/or configured. Images must
+contain a name
key and a base
key.
The name
key defines the name of the resulting IMS image. The base
key
+defines the base image to be configured or the base recipe to be built and
+optionally configured. One of the following keys must be present under the
+base
key:
ims
key to specify an existing image or recipe in IMS.product
key to specify an image or recipe provided by a particular
+version of a product. If a product provides more than one image or recipe,
+specify a filter to select one. For more information, see
+Filter Base Images or Recipes from a Product.image_ref
key to specify another image from the input file
+using its ref_name
.Images may also contain the following keys:
+configuration
key to specify a CFS configuration with which to
+customize the built image. If a configuration is specified, then configuration
+groups must also be specified using the configuration_group_names
key.ref_name
key to specify a unique name that can refer to this image
+within the input file in other images or in session templates. The ref_name
+key allows references to images from the input file that have dynamically
+generated names as described in
+Dynamic Variable Substitutions.description
key to describe the image in the bootprep input file.
+Note that this key is not currently used.Here is an example of an image using an existing IMS recipe as its base. This
+example builds an IMS image from that recipe. It then configures it with
+a CFS configuration named example-compute-config
. The example-compute-config
+CFS configuration can be defined under the configurations
key in the same
+input file, or it can be an existing CFS configuration. Running sat bootprep
+against this input file results in an image named example-compute-image
.
images:
+- name: example-compute-image
+ description: >
+ An example compute node image built from an existing IMS recipe.
+ base:
+ ims:
+ name: example-compute-image-recipe
+ type: recipe
+ configuration: example-compute-config
+ configuration_group_names:
+ - Compute
+
Here is an example showing the definition of two images. The first image is
+built from a recipe provided by the uss
product. The second image uses the
+first image as a base and configures it with a configuration named
+example-compute-config
. The value of the first image’s ref_name
key is used
+in the second image’s base.image_ref
key to specify it as a dependency.
+Running sat bootprep
against this input file results in two images, the
+first named example-uss-image
and the second named example-compute-image
.
images:
+- name: example-uss-image
+ ref_name: example-uss-image
+ description: >
+ An example image built from the recipe provided by the USS product.
+ base:
+ product:
+ name: uss
+ version: 1.0.0
+ type: recipe
+- name: example-compute-image
+ description: >
+ An example image that is configured from an image built from the recipe provided
+ by the USS product.
+ base:
+ image_ref: example-uss-image
+ configuration: example-compute-config
+ configuration_group_names:
+ - Compute
+
This example assumes that the given version of the uss
product provides
+only a single IMS recipe. If more than one recipe is provided by the
+given version of the uss
product, use a filter as described in
+Filter Base Images or Recipes from a Product.
A product may provide more than one image or recipe. If this happens,
+filter the product’s images or recipes whenever a base image or recipe from
+that product is used. Beneath the base.product
value within an image,
+specify a filter
key to create a filter using the following criteria:
prefix
key to filter based on a prefix matching the name of the
+image or recipe.wildcard
key to filter based on a shell-style wildcard matching the
+name of the image or recipe.arch
key to filter based on the target architecture of the image or
+recipe in IMS.When specifying more than one filter key, all filters must match only the +desired image or recipe. An error occurs if either no images or recipes +match the given filters or if more than one image or recipe matches +the given filters.
+Here is an example of three IMS images built from the Kubernetes image and the
+Ceph storage image provided by the csm
product. This example uses a prefix
+filter to select from the multiple images provided by the CSM product.
+The first two IMS images in the example find any image from the specified csm
+product version whose name starts with secure-kubernetes
. The third image in
+the example finds any csm
image whose name starts with secure-storage-ceph
.
+All three images are then configured with a configuration named
+example-management-config
. Running sat bootprep
against this input file
+results in three IMS images named worker-example-csm-image
,
+master-example-csm-image
, and storage-example-csm-image
.
images:
+- name: worker-example-csm-image
+ base:
+ product:
+ name: csm
+ version: 1.4.1
+ type: image
+ filter:
+ prefix: secure-kubernetes
+ configuration: example-management-config
+ configuration_group_names:
+ - Management_Worker
+
+- name: master-example-csm-image
+ base:
+ product:
+ name: csm
+ version: 1.4.1
+ type: image
+ filter:
+ prefix: secure-kubernetes
+ configuration: example-management-config
+ configuration_group_names:
+ - Management_Master
+
+- name: storage-example-csm-image
+ base:
+ product:
+ name: csm
+ version: 1.4.1
+ type: image
+ filter:
+ prefix: secure-storage-ceph
+ configuration: example-management-config
+ configuration_group_names:
+ - Management_Storage
+
Here is an example of two IMS images built from recipes provided by the uss
+product. This example uses an architecture filter to select from the multiple
+recipes provided by the USS product. The first image will be built from the
+x86_64
version of the IMS recipe provided by the specified version of the
+uss
product. The second image will be built from the aarch64
version of
+the IMS recipe provided by the specified version of the uss
product.
images:
+- name: example-uss-image-x86_64
+ ref_name: example-uss-image-x86_64
+ description: >
+ An example image built from the x86_64 recipe provided by the USS product.
+ base:
+ product:
+ name: uss
+ version: 1.0.0
+ type: recipe
+ filter:
+ arch: x86_64
+
+- name: example-uss-image-aarch64
+ ref_name: example-uss-image-aarch64
+ description: >
+ An example image built from the aarch64 recipe provided by the USS product.
+ base:
+ product:
+ name: uss
+ version: 1.0.0
+ type: recipe
+ filter:
+ arch: aarch64
+
The BOS session templates are defined under the session_templates
key. Each
+session template must provide values for the name
, image
, configuration
,
+and bos_parameters
keys. The name
key defines the name of the resulting BOS
+session template. The image
key defines the image to use in the BOS session
+template. One of the following keys must be present under the image
key:
ims
key to specify an existing image or recipe in IMS.image_ref
key to specify another image from the input file
+using its ref_name
.The configuration
key defines the CFS configuration specified
+in the BOS session template.
The bos_parameters
key defines parameters that are passed through directly to
+the BOS session template. The bos_parameters
key should contain a boot_sets
+key, and each boot set in the session template should be specified under
+boot_sets
. Each boot set can contain the following keys, all of
+which are optional:
arch
key to specify the architecture of the nodes that should be
+targeted by the boot set. Valid values are the same as those used by
+Hardware State Manager (HSM).kernel_parameters
key to specify the parameters passed to the kernel
+on the command line.network
key to specify the network over which the nodes boot.node_list
key to specify the nodes to add to the boot set.node_roles_groups
key to specify the HSM roles to add to the boot
+set.node_groups
key to specify the HSM groups to add to the boot set.rootfs_provider
key to specify the root file system provider.rootfs_provider_passthrough
key to specify the parameters to add to
+the rootfs=
kernel parameter.As mentioned above, the parameters under bos_parameters
are passed through
+directly to BOS. For more information on the properties of a BOS boot set,
+refer to BOS Session Templates in the Cray
+System Management Documentation.
Here is an example of a BOS session template that refers to an existing IMS
+image by name and targets nodes with the role Compute
and the architecture
+X86
in HSM:
session_templates:
+- name: example-session-template
+ image:
+ ims:
+ name: example-image
+ configuration: example-configuration
+ bos_parameters:
+ boot_sets:
+ example_boot_set:
+ arch: X86
+ kernel_parameters: ip=dhcp quiet
+ node_roles_groups:
+ - Compute
+ rootfs_provider: cpss3
+ rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+
Here is an example of a BOS session template that refers to an image from the
+input file by its ref_name
and targets nodes with the role Compute
and the
+architecture ARM
in HSM. Note that using the image_ref
key requires that
+an image defined in the input file specifies example-image
as the value of
+its ref_name
key.
session_templates:
+- name: example-session-template
+ image:
+ image_ref: example-image
+ configuration: example-configuration
+ bos_parameters:
+ boot_sets:
+ example_boot_set:
+ arch: ARM
+ kernel_parameters: ip=dhcp quiet
+ node_roles_groups:
+ - Compute
+ rootfs_provider: cpss3
+ rootfs_provider_passthrough: dvs:api-gw-service-nmn.local:300:nmn0
+
The sat bootprep
command takes any variables provided and substitutes them
+into the input file. Variables are sourced from the command line, any variable
+files directly provided, and the HPC CSM Software Recipe files used, in that
+order. When providing values through a variable file, sat bootprep
+substitutes the values with Jinja2 template syntax. The HPC CSM Software Recipe
+provides default variables in a product_vars.yaml
variable file. This file
+defines information about each HPC software product included in the recipe.
Variables are primarily substituted into the default HPC CSM Software Recipe
+bootprep input files through IUF. However, variable files can also be given to
+sat bootprep
directly from IUF’s use of the recipe. When using variables
+directly with sat bootprep
, there are some limitations. For more
+information on SAT variable limitations, see SAT and IUF.
+For more information on IUF and variable substitutions, see the
+IUF section of
+the Cray System Management Documentation.
View a listing of the default HPC CSM Software Recipe variables and
+their values by running sat bootprep list-vars
. For more information on
+options that can be used with the list-vars
subcommand, refer to the man page
+for the sat bootprep
subcommand.
By default, the sat bootprep
command uses the variables from the latest
+installed version of the HPC CSM Software Recipe. Override this with the
+--recipe-version
command line argument to sat bootprep run
.
(ncn-m001#
) For example, to explicitly select the 22.11.0
version of the HPC CSM Software
+Recipe default variables, specify --recipe-version 22.11.0
:
sat bootprep run --recipe-version 22.11.0 compute-and-uan-bootprep.yaml
+
The entire sat bootprep
input file is not rendered by the Jinja2 template
+engine. Jinja2 template rendering of the input file is performed individually
+for each supported value. The values of the following keys in the bootprep
+input file support rendering as a Jinja2 template and thus support variables:
name
key of each configuration under the configurations
key.layers
key in a
+configuration:
+name
playbook
git.branch
product.version
product.branch
images
key:
+name
base.product.version
base.product.filter.arch
base.product.filter.prefix
base.product.filter.wildcard
configuration
session_templates
key:
+name
configuration
You can use Jinja2 built-in filters in values of any of the keys listed above. +In addition, Python string methods can be called on the string variables.
+Variable names with hyphens are not allowed in Jinja2 expressions because they
+are parsed as an arithmetic expression instead of a single variable. To support
+product names with hyphens, sat bootprep
converts hyphens to underscores in
+all top-level keys of the default HPC CSM Software Recipe variables. It also
+converts any variables sourced from the command line or any variable files
+provided directly. When referring to a variable with hyphens in the bootprep
+input file, keep this in mind. For example, to refer to the product version
+variable for slingshot-host-software
in the bootprep input file, write
+"{{slingshot_host_software.version}}"
.
The following example bootprep input file shows how a variable of a USS version +can be used in an input file that creates a CFS configuration for computes. +Only one layer is shown for brevity.
+---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ layers:
+ - name: uss-compute-{{uss.working_branch}}
+ playbook: cos-compute.yml
+ product:
+ name: uss
+ version: "{{uss.version}}"
+ branch: "{{uss.working_branch}}"
+
Note: When the value of a key in the bootprep input file is a Jinja2 +expression, it must be quoted to pass YAML syntax checking.
+Jinja2 expressions can also use filters and Python’s built-in string methods to
+manipulate the variable values. For example, suppose only the major and minor
+components of a USS version are to be used in the branch name for the USS
+layer of the CFS configuration. Use the split
string method to
+achieve this as follows:
---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ layers:
+ - name: uss-compute-{{uss.working_branch}}
+ playbook: cos-compute.yml
+ product:
+ name: uss
+ version: "{{uss.version}}"
+ branch: integration-{{uss.version.split('.')[0]}}-{{uss.version.split('.')[1]}}
+
Additional variables are available besides the default variables provided by +the HPC CSM Software Recipe. (For more information, see HPC CSM Software +Recipe Variable Substitutions.) +These additional variables are dynamic because their values are determined +at run-time based on the context in which they appear. Available dynamic +variables include the following:
+The variable base.name
can be used in the name
of an image under the
+images
key. The value of this variable is the name of the IMS image or
+recipe used as the base of this image.
The variable image.name
can be used in the name
of a session template
+under the session_templates
key. The value of this variable is the name of
+the IMS image used in this session template.
Note: The name of a session template is restricted to 45 characters. Keep
+this in mind when using image.name
in the name of a session template.
These variables reduce the need to duplicate values throughout the sat bootprep
input file and make the following use cases possible:
This section provides an example bootprep input file. It also gives +instructions for obtaining the default bootprep input files delivered +with a release of the HPC CSM Software Recipe.
+The following bootprep input file provides an example of using most of the +features described in previous sections. It is not intended to be a complete +bootprep file for the entire CSM product.
+---
+configurations:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ layers:
+ - name: uss-compute-{{uss.working_branch}}
+ playbook: cos-compute.yml
+ product:
+ name: uss
+ version: "{{uss.version}}"
+ branch: "{{uss.working_branch}}"
+ - name: cpe-pe_deploy-{{cpe.working_branch}}
+ playbook: pe_deploy.yml
+ product:
+ name: cpe
+ version: "{{cpe.version}}"
+ branch: "{{cpe.working_branch}}"
+
+images:
+- name: "{{default.note}}{{base.name}}{{default.suffix}}"
+ ref_name: base_uss_image
+ base:
+ product:
+ name: uss
+ type: recipe
+ version: "{{uss.version}}"
+
+- name: "compute-{{base.name}}"
+ ref_name: compute_image
+ base:
+ image_ref: base_uss_image
+ configuration: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ configuration_group_names:
+ - Compute
+
+session_templates:
+- name: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ image:
+ image_ref: compute_image
+ configuration: "{{default.note}}compute-{{recipe.version}}{{default.suffix}}"
+ bos_parameters:
+ boot_sets:
+ compute:
+ kernel_parameters: ip=dhcp quiet spire_join_token=${SPIRE_JOIN_TOKEN}
+ node_roles_groups:
+ - Compute
+ rootfs_provider_passthrough: "dvs:api-gw-service-nmn.local:300:hsn0,nmn0:0"
+
Default bootprep input files are delivered by the HPC CSM Software Recipe
+product. Access these files by cloning the hpc-csm-software-recipe
+repository, as described in the Accessing sat bootprep
files process of
+the Cray System Management
+Documentation.
(ncn-m001#
) Find the default input files in the bootprep
directory of the
+cloned repository:
ls bootprep/
+
The sat bootprep generate-example
command was not updated for
+recent bootprep schema changes. It is recommended to instead use the
+default bootprep input files described in Access Default Bootprep Input
+Files. The sat bootprep generate-example
command will be updated in a future release of SAT.
The sat bootprep run
command uses information from the bootprep input file to
+create CFS configurations, IMS images, and BOS session templates. For easy
+reference, the command also includes output summarizing the final creation
+results.
(ncn-m001#
) Here is a sample table output after running sat bootprep run
:
################################################################################
+CFS configurations
+################################################################################
++------------------+
+| name |
++------------------+
+| example-config-1 |
+| example-config-2 |
++------------------+
+################################################################################
+IMS images
+################################################################################
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+| name | preconfigured_image_id | final_image_id | configuration | configuration_group_names |
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+| example-image | c1bcaf00-109d-470f-b665-e7b37dedb62f | a22fb912-22be-449b-a51b-081af2d7aff6 | example-config | Compute |
++---------------+--------------------------------------+--------------------------------------+----------------+----------------------------+
+################################################################################
+BOS session templates
+################################################################################
++------------------+----------------+
+| name | configuration |
++------------------+----------------+
+| example-template | example-config |
++------------------+----------------+
+
The contents of the YAML input files used by sat bootprep
must conform to a
+schema which defines the structure of the data. The schema definition is written
+using the JSON Schema format. (Although the format is named “JSON Schema”, the
+schema itself is written in YAML as well.) More information, including introductory
+materials and a formal specification of the JSON Schema metaschema, can be found
+on the JSON Schema website.
(ncn-m001#
) To view the exact schema specification, run sat bootprep view-schema
.
sat bootprep view-schema
+---
+$schema: "https://json-schema.org/draft/2020-12/schema"
+
Example output:
+title: Bootprep Input File
+description: >
+ A description of the set of CFS configurations to create, the set of IMS
+ images to create and optionally customize with the defined CFS configurations,
+ and the set of BOS session templates to create that reference the defined
+ images and configurations.
+type: object
+additionalProperties: false
+properties:
+ ...
+
The raw schema definition can be difficult to understand without experience
+working with JSON Schema specifications. For this reason, a feature is included
+with sat bootprep
that generates user-friendly HTML documentation for the input
+file schema. This HTML documentation can be browsed with a web browser.
(ncn-m001#
) Create a documentation tarball using sat bootprep
.
sat bootprep generate-docs
+
Example output:
+INFO: Wrote input schema documentation to /root/bootprep-schema-docs.tar.gz
+
An alternate output directory can be specified with the --output-dir
+option. The generated tarball is always named bootprep-schema-docs.tar.gz
.
sat bootprep generate-docs --output-dir /tmp
+
Example output:
+INFO: Wrote input schema documentation to /tmp/bootprep-schema-docs.tar.gz
+
(user@hostname>
) From another machine, copy the tarball to a local directory.
scp root@ncn-m001:bootprep-schema-docs.tar.gz .
+
(user@hostname>
) Extract the contents of the tarball and open the contained index.html
.
tar xzvf bootprep-schema-docs.tar.gz
+
Example output:
+x bootprep-schema-docs/
+x bootprep-schema-docs/index.html
+x bootprep-schema-docs/schema_doc.css
+x bootprep-schema-docs/schema_doc.min.js
+another-machine$ open bootprep-schema-docs/index.html
+